aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/gfs2/Kconfig46
-rw-r--r--fs/gfs2/Makefile44
-rw-r--r--fs/gfs2/acl.c312
-rw-r--r--fs/gfs2/acl.h37
-rw-r--r--fs/gfs2/bits.c178
-rw-r--r--fs/gfs2/bits.h28
-rw-r--r--fs/gfs2/bmap.c1206
-rw-r--r--fs/gfs2/bmap.h39
-rw-r--r--fs/gfs2/daemon.c225
-rw-r--r--fs/gfs2/daemon.h20
-rw-r--r--fs/gfs2/dir.c2157
-rw-r--r--fs/gfs2/dir.h51
-rw-r--r--fs/gfs2/eaops.c185
-rw-r--r--fs/gfs2/eaops.h30
-rw-r--r--fs/gfs2/eattr.c1620
-rw-r--r--fs/gfs2/eattr.h90
-rw-r--r--fs/gfs2/format.h21
-rw-r--r--fs/gfs2/gfs2.h62
-rw-r--r--fs/gfs2/glock.c2513
-rw-r--r--fs/gfs2/glock.h143
-rw-r--r--fs/gfs2/glops.c487
-rw-r--r--fs/gfs2/glops.h23
-rw-r--r--fs/gfs2/incore.h703
-rw-r--r--fs/gfs2/inode.c1805
-rw-r--r--fs/gfs2/inode.h74
-rw-r--r--fs/gfs2/jdata.c382
-rw-r--r--fs/gfs2/jdata.h52
-rw-r--r--fs/gfs2/lm.c235
-rw-r--r--fs/gfs2/lm.h42
-rw-r--r--fs/gfs2/lm_interface.h295
-rw-r--r--fs/gfs2/locking.c192
-rw-r--r--fs/gfs2/log.c659
-rw-r--r--fs/gfs2/log.h68
-rw-r--r--fs/gfs2/lops.c534
-rw-r--r--fs/gfs2/lops.h96
-rw-r--r--fs/gfs2/lvb.c48
-rw-r--r--fs/gfs2/lvb.h28
-rw-r--r--fs/gfs2/main.c103
-rw-r--r--fs/gfs2/meta_io.c876
-rw-r--r--fs/gfs2/meta_io.h88
-rw-r--r--fs/gfs2/mount.c211
-rw-r--r--fs/gfs2/mount.h15
-rw-r--r--fs/gfs2/ondisk.c590
-rw-r--r--fs/gfs2/ops_address.c515
-rw-r--r--fs/gfs2/ops_address.h15
-rw-r--r--fs/gfs2/ops_dentry.c117
-rw-r--r--fs/gfs2/ops_dentry.h15
-rw-r--r--fs/gfs2/ops_export.c310
-rw-r--r--fs/gfs2/ops_export.h15
-rw-r--r--fs/gfs2/ops_file.c1597
-rw-r--r--fs/gfs2/ops_file.h16
-rw-r--r--fs/gfs2/ops_fstype.c879
-rw-r--r--fs/gfs2/ops_fstype.h15
-rw-r--r--fs/gfs2/ops_inode.c1265
-rw-r--r--fs/gfs2/ops_inode.h18
-rw-r--r--fs/gfs2/ops_super.c401
-rw-r--r--fs/gfs2/ops_super.h15
-rw-r--r--fs/gfs2/ops_vm.c199
-rw-r--r--fs/gfs2/ops_vm.h16
-rw-r--r--fs/gfs2/page.c273
-rw-r--r--fs/gfs2/page.h23
-rw-r--r--fs/gfs2/quota.c1238
-rw-r--r--fs/gfs2/quota.h34
-rw-r--r--fs/gfs2/recovery.c570
-rw-r--r--fs/gfs2/recovery.h32
-rw-r--r--fs/gfs2/resize.c291
-rw-r--r--fs/gfs2/resize.h19
-rw-r--r--fs/gfs2/rgrp.c1361
-rw-r--r--fs/gfs2/rgrp.h62
-rw-r--r--fs/gfs2/super.c944
-rw-r--r--fs/gfs2/super.h55
-rw-r--r--fs/gfs2/sys.c640
-rw-r--r--fs/gfs2/sys.h24
-rw-r--r--fs/gfs2/trans.c214
-rw-r--r--fs/gfs2/trans.h40
-rw-r--r--fs/gfs2/unlinked.c453
-rw-r--r--fs/gfs2/unlinked.h25
-rw-r--r--fs/gfs2/util.c273
-rw-r--r--fs/gfs2/util.h180
-rw-r--r--include/linux/gfs2_ioctl.h32
-rw-r--r--include/linux/gfs2_ondisk.h454
81 files changed, 29258 insertions, 0 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
new file mode 100644
index 000000000000..17cb44bea1c0
--- /dev/null
+++ b/fs/gfs2/Kconfig
@@ -0,0 +1,46 @@
1config GFS2_FS
2 tristate "GFS2 file system support"
3 default m
4 depends on EXPERIMENTAL
5 select FS_POSIX_ACL
6 select SYSFS
7 help
8 A cluster filesystem.
9
10 Allows a cluster of computers to simultaneously use a block device
11 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
12 and writes to the block device like a local filesystem, but also uses
13 a lock module to allow the computers coordinate their I/O so
14 filesystem consistency is maintained. One of the nifty features of
15 GFS is perfect consistency -- changes made to the filesystem on one
16 machine show up immediately on all other machines in the cluster.
17
18 To use the GFS2 filesystem, you will need to enable one or more of
19 the below locking modules. Documentation and utilities for GFS2 can
20 be found here: http://sources.redhat.com/cluster/gfs/
21
22config GFS2_FS_LOCKING_NOLOCK
23 tristate "GFS2 \"nolock\" locking module"
24 depends on GFS2_FS
25 help
26 Single node locking module for GFS2.
27
28 Use this module if you want to use GFS2 on a single node without
29 its clustering features. You can still take advantage of the
30 large file support, and upgrade to running a full cluster later on
31 if required.
32
33 If you will only be using GFS2 in cluster mode, you do not need this
34 module.
35
36config GFS2_FS_LOCKING_DLM
37 tristate "GFS2 DLM locking module"
38 depends on GFS2_FS
39 select DLM
40 help
41 Multiple node locking module for GFS2
42
43 Most users of GFS2 will require this module. It provides the locking
44 interface between GFS2 and the DLM, which is required to use GFS2
45 in a cluster environment.
46
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
new file mode 100644
index 000000000000..b1bac4f199db
--- /dev/null
+++ b/fs/gfs2/Makefile
@@ -0,0 +1,44 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := \
3 acl.o \
4 bits.o \
5 bmap.o \
6 daemon.o \
7 dir.o \
8 eaops.o \
9 eattr.o \
10 glock.o \
11 glops.o \
12 inode.o \
13 jdata.o \
14 lm.o \
15 log.o \
16 lops.o \
17 locking.o \
18 lvb.o \
19 main.o \
20 meta_io.o \
21 mount.o \
22 ondisk.o \
23 ops_address.o \
24 ops_dentry.o \
25 ops_export.o \
26 ops_file.o \
27 ops_fstype.o \
28 ops_inode.o \
29 ops_super.o \
30 ops_vm.o \
31 page.o \
32 quota.o \
33 resize.o \
34 recovery.o \
35 rgrp.o \
36 super.o \
37 sys.o \
38 trans.o \
39 unlinked.o \
40 util.o
41
42obj-$(CONFIG_GFS2_LOCKING_NOLOCK) += locking/nolock/
43obj-$(CONFIG_GFS2_LOCKING_DLM) += locking/dlm/
44
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
new file mode 100644
index 000000000000..33c465a2ab53
--- /dev/null
+++ b/fs/gfs2/acl.c
@@ -0,0 +1,312 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "acl.h"
21#include "eaops.h"
22#include "eattr.h"
23#include "glock.h"
24#include "inode.h"
25#include "meta_io.h"
26#include "trans.h"
27
28#define ACL_ACCESS 1
29#define ACL_DEFAULT 0
30
31int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
32 struct gfs2_ea_request *er,
33 int *remove, mode_t *mode)
34{
35 struct posix_acl *acl;
36 int error;
37
38 error = gfs2_acl_validate_remove(ip, access);
39 if (error)
40 return error;
41
42 if (!er->er_data)
43 return -EINVAL;
44
45 acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
46 if (IS_ERR(acl))
47 return PTR_ERR(acl);
48 if (!acl) {
49 *remove = 1;
50 return 0;
51 }
52
53 error = posix_acl_valid(acl);
54 if (error)
55 goto out;
56
57 if (access) {
58 error = posix_acl_equiv_mode(acl, mode);
59 if (!error)
60 *remove = 1;
61 else if (error > 0)
62 error = 0;
63 }
64
65 out:
66 posix_acl_release(acl);
67
68 return error;
69}
70
71int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
72{
73 if (!ip->i_sbd->sd_args.ar_posix_acl)
74 return -EOPNOTSUPP;
75 if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
76 return -EPERM;
77 if (S_ISLNK(ip->i_di.di_mode))
78 return -EOPNOTSUPP;
79 if (!access && !S_ISDIR(ip->i_di.di_mode))
80 return -EACCES;
81
82 return 0;
83}
84
85static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
86 struct gfs2_ea_location *el, char **data, unsigned int *len)
87{
88 struct gfs2_ea_request er;
89 struct gfs2_ea_location el_this;
90 int error;
91
92 if (!ip->i_di.di_eattr)
93 return 0;
94
95 memset(&er, 0, sizeof(struct gfs2_ea_request));
96 if (access) {
97 er.er_name = GFS2_POSIX_ACL_ACCESS;
98 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
99 } else {
100 er.er_name = GFS2_POSIX_ACL_DEFAULT;
101 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
102 }
103 er.er_type = GFS2_EATYPE_SYS;
104
105 if (!el)
106 el = &el_this;
107
108 error = gfs2_ea_find(ip, &er, el);
109 if (error)
110 return error;
111 if (!el->el_ea)
112 return 0;
113 if (!GFS2_EA_DATA_LEN(el->el_ea))
114 goto out;
115
116 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
117 er.er_data = kmalloc(er.er_data_len, GFP_KERNEL);
118 error = -ENOMEM;
119 if (!er.er_data)
120 goto out;
121
122 error = gfs2_ea_get_copy(ip, el, er.er_data);
123 if (error)
124 goto out_kfree;
125
126 if (acl) {
127 *acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
128 if (IS_ERR(*acl))
129 error = PTR_ERR(*acl);
130 }
131
132 out_kfree:
133 if (error || !data)
134 kfree(er.er_data);
135 else {
136 *data = er.er_data;
137 *len = er.er_data_len;
138 }
139
140 out:
141 if (error || el == &el_this)
142 brelse(el->el_bh);
143
144 return error;
145}
146
147/**
148 * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
149 * @inode: the file we want to do something to
150 * @mask: what we want to do
151 *
152 * Returns: errno
153 */
154
155int gfs2_check_acl_locked(struct inode *inode, int mask)
156{
157 struct posix_acl *acl = NULL;
158 int error;
159
160 error = acl_get(get_v2ip(inode), ACL_ACCESS, &acl, NULL, NULL, NULL);
161 if (error)
162 return error;
163
164 if (acl) {
165 error = posix_acl_permission(inode, acl, mask);
166 posix_acl_release(acl);
167 return error;
168 }
169
170 return -EAGAIN;
171}
172
173int gfs2_check_acl(struct inode *inode, int mask)
174{
175 struct gfs2_inode *ip = get_v2ip(inode);
176 struct gfs2_holder i_gh;
177 int error;
178
179 error = gfs2_glock_nq_init(ip->i_gl,
180 LM_ST_SHARED, LM_FLAG_ANY,
181 &i_gh);
182 if (!error) {
183 error = gfs2_check_acl_locked(inode, mask);
184 gfs2_glock_dq_uninit(&i_gh);
185 }
186
187 return error;
188}
189
190static int munge_mode(struct gfs2_inode *ip, mode_t mode)
191{
192 struct gfs2_sbd *sdp = ip->i_sbd;
193 struct buffer_head *dibh;
194 int error;
195
196 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
197 if (error)
198 return error;
199
200 error = gfs2_meta_inode_buffer(ip, &dibh);
201 if (!error) {
202 gfs2_assert_withdraw(sdp,
203 (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
204 ip->i_di.di_mode = mode;
205 gfs2_trans_add_bh(ip->i_gl, dibh);
206 gfs2_dinode_out(&ip->i_di, dibh->b_data);
207 brelse(dibh);
208 }
209
210 gfs2_trans_end(sdp);
211
212 return 0;
213}
214
215int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
216{
217 struct gfs2_sbd *sdp = dip->i_sbd;
218 struct posix_acl *acl = NULL, *clone;
219 struct gfs2_ea_request er;
220 mode_t mode = ip->i_di.di_mode;
221 int error;
222
223 if (!sdp->sd_args.ar_posix_acl)
224 return 0;
225 if (S_ISLNK(ip->i_di.di_mode))
226 return 0;
227
228 memset(&er, 0, sizeof(struct gfs2_ea_request));
229 er.er_type = GFS2_EATYPE_SYS;
230
231 error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
232 &er.er_data, &er.er_data_len);
233 if (error)
234 return error;
235 if (!acl) {
236 mode &= ~current->fs->umask;
237 if (mode != ip->i_di.di_mode)
238 error = munge_mode(ip, mode);
239 return error;
240 }
241
242 clone = posix_acl_clone(acl, GFP_KERNEL);
243 error = -ENOMEM;
244 if (!clone)
245 goto out;
246 posix_acl_release(acl);
247 acl = clone;
248
249 if (S_ISDIR(ip->i_di.di_mode)) {
250 er.er_name = GFS2_POSIX_ACL_DEFAULT;
251 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
252 error = gfs2_system_eaops.eo_set(ip, &er);
253 if (error)
254 goto out;
255 }
256
257 error = posix_acl_create_masq(acl, &mode);
258 if (error < 0)
259 goto out;
260 if (error > 0) {
261 er.er_name = GFS2_POSIX_ACL_ACCESS;
262 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
263 posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
264 er.er_mode = mode;
265 er.er_flags = GFS2_ERF_MODE;
266 error = gfs2_system_eaops.eo_set(ip, &er);
267 if (error)
268 goto out;
269 } else
270 munge_mode(ip, mode);
271
272 out:
273 posix_acl_release(acl);
274 kfree(er.er_data);
275 return error;
276}
277
278int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
279{
280 struct posix_acl *acl = NULL, *clone;
281 struct gfs2_ea_location el;
282 char *data;
283 unsigned int len;
284 int error;
285
286 error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
287 if (error)
288 return error;
289 if (!acl)
290 return gfs2_setattr_simple(ip, attr);
291
292 clone = posix_acl_clone(acl, GFP_KERNEL);
293 error = -ENOMEM;
294 if (!clone)
295 goto out;
296 posix_acl_release(acl);
297 acl = clone;
298
299 error = posix_acl_chmod_masq(acl, attr->ia_mode);
300 if (!error) {
301 posix_acl_to_xattr(acl, data, len);
302 error = gfs2_ea_acl_chmod(ip, &el, attr, data);
303 }
304
305 out:
306 posix_acl_release(acl);
307 brelse(el.el_bh);
308 kfree(data);
309
310 return error;
311}
312
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
new file mode 100644
index 000000000000..a174b4f6bcc2
--- /dev/null
+++ b/fs/gfs2/acl.h
@@ -0,0 +1,37 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __ACL_DOT_H__
11#define __ACL_DOT_H__
12
13#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
14#define GFS2_POSIX_ACL_ACCESS_LEN 16
15#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
16#define GFS2_POSIX_ACL_DEFAULT_LEN 17
17
18#define GFS2_ACL_IS_ACCESS(name, len) \
19 ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \
20 !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len)))
21
22#define GFS2_ACL_IS_DEFAULT(name, len) \
23 ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
24 !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
25
26struct gfs2_ea_request;
27
28int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
29 struct gfs2_ea_request *er,
30 int *remove, mode_t *mode);
31int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
32int gfs2_check_acl_locked(struct inode *inode, int mask);
33int gfs2_check_acl(struct inode *inode, int mask);
34int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
35int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
36
37#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/bits.c b/fs/gfs2/bits.c
new file mode 100644
index 000000000000..57d420a86adf
--- /dev/null
+++ b/fs/gfs2/bits.c
@@ -0,0 +1,178 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11 * These routines are used by the resource group routines (rgrp.c)
12 * to keep track of block allocation. Each block is represented by two
13 * bits. One bit indicates whether or not the block is used. (1=used,
14 * 0=free) The other bit indicates whether or not the block contains a
15 * dinode or not. (1=dinode, 0=not-dinode) So, each byte represents
16 * GFS2_NBBY (i.e. 4) blocks.
17 */
18
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/spinlock.h>
22#include <linux/completion.h>
23#include <linux/buffer_head.h>
24#include <asm/semaphore.h>
25
26#include "gfs2.h"
27#include "bits.h"
28
29static const char valid_change[16] = {
30 /* current */
31 /* n */ 0, 1, 0, 1,
32 /* e */ 1, 0, 0, 0,
33 /* w */ 0, 0, 0, 0,
34 1, 0, 0, 0
35};
36
37/**
38 * gfs2_setbit - Set a bit in the bitmaps
39 * @buffer: the buffer that holds the bitmaps
40 * @buflen: the length (in bytes) of the buffer
41 * @block: the block to set
42 * @new_state: the new state of the block
43 *
44 */
45
46void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
47 unsigned int buflen, uint32_t block, unsigned char new_state)
48{
49 unsigned char *byte, *end, cur_state;
50 unsigned int bit;
51
52 byte = buffer + (block / GFS2_NBBY);
53 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
54 end = buffer + buflen;
55
56 gfs2_assert(rgd->rd_sbd, byte < end);
57
58 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
59
60 if (valid_change[new_state * 4 + cur_state]) {
61 *byte ^= cur_state << bit;
62 *byte |= new_state << bit;
63 } else
64 gfs2_consist_rgrpd(rgd);
65}
66
67/**
68 * gfs2_testbit - test a bit in the bitmaps
69 * @buffer: the buffer that holds the bitmaps
70 * @buflen: the length (in bytes) of the buffer
71 * @block: the block to read
72 *
73 */
74
75unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
76 unsigned int buflen, uint32_t block)
77{
78 unsigned char *byte, *end, cur_state;
79 unsigned int bit;
80
81 byte = buffer + (block / GFS2_NBBY);
82 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
83 end = buffer + buflen;
84
85 gfs2_assert(rgd->rd_sbd, byte < end);
86
87 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
88
89 return cur_state;
90}
91
92/**
93 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
94 * a block in a given allocation state.
95 * @buffer: the buffer that holds the bitmaps
96 * @buflen: the length (in bytes) of the buffer
97 * @goal: start search at this block's bit-pair (within @buffer)
98 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
99 * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
100 *
101 * Scope of @goal and returned block number is only within this bitmap buffer,
102 * not entire rgrp or filesystem. @buffer will be offset from the actual
103 * beginning of a bitmap block buffer, skipping any header structures.
104 *
105 * Return: the block number (bitmap buffer scope) that was found
106 */
107
108uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
109 unsigned int buflen, uint32_t goal,
110 unsigned char old_state)
111{
112 unsigned char *byte, *end, alloc;
113 uint32_t blk = goal;
114 unsigned int bit;
115
116 byte = buffer + (goal / GFS2_NBBY);
117 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
118 end = buffer + buflen;
119 alloc = (old_state & 1) ? 0 : 0x55;
120
121 while (byte < end) {
122 if ((*byte & 0x55) == alloc) {
123 blk += (8 - bit) >> 1;
124
125 bit = 0;
126 byte++;
127
128 continue;
129 }
130
131 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
132 return blk;
133
134 bit += GFS2_BIT_SIZE;
135 if (bit >= 8) {
136 bit = 0;
137 byte++;
138 }
139
140 blk++;
141 }
142
143 return BFITNOENT;
144}
145
146/**
147 * gfs2_bitcount - count the number of bits in a certain state
148 * @buffer: the buffer that holds the bitmaps
149 * @buflen: the length (in bytes) of the buffer
150 * @state: the state of the block we're looking for
151 *
152 * Returns: The number of bits
153 */
154
155uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer,
156 unsigned int buflen, unsigned char state)
157{
158 unsigned char *byte = buffer;
159 unsigned char *end = buffer + buflen;
160 unsigned char state1 = state << 2;
161 unsigned char state2 = state << 4;
162 unsigned char state3 = state << 6;
163 uint32_t count = 0;
164
165 for (; byte < end; byte++) {
166 if (((*byte) & 0x03) == state)
167 count++;
168 if (((*byte) & 0x0C) == state1)
169 count++;
170 if (((*byte) & 0x30) == state2)
171 count++;
172 if (((*byte) & 0xC0) == state3)
173 count++;
174 }
175
176 return count;
177}
178
diff --git a/fs/gfs2/bits.h b/fs/gfs2/bits.h
new file mode 100644
index 000000000000..36ccbdcb1eef
--- /dev/null
+++ b/fs/gfs2/bits.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __BITS_DOT_H__
11#define __BITS_DOT_H__
12
13#define BFITNOENT 0xFFFFFFFF
14
15void gfs2_setbit(struct gfs2_rgrpd *rgd,
16 unsigned char *buffer, unsigned int buflen,
17 uint32_t block, unsigned char new_state);
18unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
19 unsigned char *buffer, unsigned int buflen,
20 uint32_t block);
21uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd,
22 unsigned char *buffer, unsigned int buflen,
23 uint32_t goal, unsigned char old_state);
24uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd,
25 unsigned char *buffer, unsigned int buflen,
26 unsigned char state);
27
28#endif /* __BITS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
new file mode 100644
index 000000000000..4b4e295b3bf5
--- /dev/null
+++ b/fs/gfs2/bmap.c
@@ -0,0 +1,1206 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
21#include "jdata.h"
22#include "meta_io.h"
23#include "page.h"
24#include "quota.h"
25#include "rgrp.h"
26#include "trans.h"
27
28/* This doesn't need to be that large as max 64 bit pointers in a 4k
29 * block is 512, so __u16 is fine for that. It saves stack space to
30 * keep it small.
31 */
32struct metapath {
33 __u16 mp_list[GFS2_MAX_META_HEIGHT];
34};
35
36typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
37 struct buffer_head *bh, uint64_t *top,
38 uint64_t *bottom, unsigned int height,
39 void *data);
40
41struct strip_mine {
42 int sm_first;
43 unsigned int sm_height;
44};
45
46/**
47 * @gfs2_unstuffer_sync - Synchronously unstuff a dinode
48 * @ip:
49 * @dibh:
50 * @block:
51 * @private:
52 *
53 * Cheat and use a metadata buffer instead of a data page.
54 *
55 * Returns: errno
56 */
57
58int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh,
59 uint64_t block, void *private)
60{
61 struct buffer_head *bh;
62 int error;
63
64 bh = gfs2_meta_new(ip->i_gl, block);
65
66 gfs2_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs2_dinode));
67
68 set_buffer_dirty(bh);
69 error = sync_dirty_buffer(bh);
70
71 brelse(bh);
72
73 return error;
74}
75
76/**
77 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
78 * @ip: The GFS2 inode to unstuff
79 * @unstuffer: the routine that handles unstuffing a non-zero length file
80 * @private: private data for the unstuffer
81 *
82 * This routine unstuffs a dinode and returns it to a "normal" state such
83 * that the height can be grown in the traditional way.
84 *
85 * Returns: errno
86 */
87
88int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
89 void *private)
90{
91 struct buffer_head *bh, *dibh;
92 uint64_t block = 0;
93 int journaled = gfs2_is_jdata(ip);
94 int error;
95
96 down_write(&ip->i_rw_mutex);
97
98 error = gfs2_meta_inode_buffer(ip, &dibh);
99 if (error)
100 goto out;
101
102 if (ip->i_di.di_size) {
103 /* Get a free block, fill it with the stuffed data,
104 and write it out to disk */
105
106 if (journaled) {
107 block = gfs2_alloc_meta(ip);
108
109 error = gfs2_jdata_get_buffer(ip, block, 1, &bh);
110 if (error)
111 goto out_brelse;
112 gfs2_buffer_copy_tail(bh,
113 sizeof(struct gfs2_meta_header),
114 dibh, sizeof(struct gfs2_dinode));
115 brelse(bh);
116 } else {
117 block = gfs2_alloc_data(ip);
118
119 error = unstuffer(ip, dibh, block, private);
120 if (error)
121 goto out_brelse;
122 }
123 }
124
125 /* Set up the pointer to the new block */
126
127 gfs2_trans_add_bh(ip->i_gl, dibh);
128
129 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
130
131 if (ip->i_di.di_size) {
132 *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_be64(block);
133 ip->i_di.di_blocks++;
134 }
135
136 ip->i_di.di_height = 1;
137
138 gfs2_dinode_out(&ip->i_di, dibh->b_data);
139
140 out_brelse:
141 brelse(dibh);
142
143 out:
144 up_write(&ip->i_rw_mutex);
145
146 return error;
147}
148
149/**
150 * calc_tree_height - Calculate the height of a metadata tree
151 * @ip: The GFS2 inode
152 * @size: The proposed size of the file
153 *
154 * Work out how tall a metadata tree needs to be in order to accommodate a
155 * file of a particular size. If size is less than the current size of
156 * the inode, then the current size of the inode is used instead of the
157 * supplied one.
158 *
159 * Returns: the height the tree should be
160 */
161
162static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size)
163{
164 struct gfs2_sbd *sdp = ip->i_sbd;
165 uint64_t *arr;
166 unsigned int max, height;
167
168 if (ip->i_di.di_size > size)
169 size = ip->i_di.di_size;
170
171 if (gfs2_is_jdata(ip)) {
172 arr = sdp->sd_jheightsize;
173 max = sdp->sd_max_jheight;
174 } else {
175 arr = sdp->sd_heightsize;
176 max = sdp->sd_max_height;
177 }
178
179 for (height = 0; height < max; height++)
180 if (arr[height] >= size)
181 break;
182
183 return height;
184}
185
186/**
187 * build_height - Build a metadata tree of the requested height
188 * @ip: The GFS2 inode
189 * @height: The height to build to
190 *
191 * This routine makes sure that the metadata tree is tall enough to hold
192 * "size" bytes of data.
193 *
194 * Returns: errno
195 */
196
197static int build_height(struct gfs2_inode *ip, int height)
198{
199 struct gfs2_sbd *sdp = ip->i_sbd;
200 struct buffer_head *bh, *dibh;
201 uint64_t block = 0, *bp;
202 unsigned int x;
203 int new_block;
204 int error;
205
206 while (ip->i_di.di_height < height) {
207 error = gfs2_meta_inode_buffer(ip, &dibh);
208 if (error)
209 return error;
210
211 new_block = 0;
212 bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
213 for (x = 0; x < sdp->sd_diptrs; x++, bp++)
214 if (*bp) {
215 new_block = 1;
216 break;
217 }
218
219 if (new_block) {
220 /* Get a new block, fill it with the old direct
221 pointers, and write it out */
222
223 block = gfs2_alloc_meta(ip);
224
225 bh = gfs2_meta_new(ip->i_gl, block);
226 gfs2_trans_add_bh(ip->i_gl, bh);
227 gfs2_metatype_set(bh,
228 GFS2_METATYPE_IN,
229 GFS2_FORMAT_IN);
230 gfs2_buffer_copy_tail(bh,
231 sizeof(struct gfs2_meta_header),
232 dibh, sizeof(struct gfs2_dinode));
233
234 brelse(bh);
235 }
236
237 /* Set up the new direct pointer and write it out to disk */
238
239 gfs2_trans_add_bh(ip->i_gl, dibh);
240
241 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
242
243 if (new_block) {
244 *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_be64(block);
245 ip->i_di.di_blocks++;
246 }
247
248 ip->i_di.di_height++;
249
250 gfs2_dinode_out(&ip->i_di, dibh->b_data);
251 brelse(dibh);
252 }
253
254 return 0;
255}
256
257/**
258 * find_metapath - Find path through the metadata tree
259 * @ip: The inode pointer
260 * @mp: The metapath to return the result in
261 * @block: The disk block to look up
262 *
263 * This routine returns a struct metapath structure that defines a path
264 * through the metadata of inode "ip" to get to block "block".
265 *
266 * Example:
267 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
268 * filesystem with a blocksize of 4096.
269 *
270 * find_metapath() would return a struct metapath structure set to:
271 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
272 * and mp_list[2] = 165.
273 *
274 * That means that in order to get to the block containing the byte at
275 * offset 101342453, we would load the indirect block pointed to by pointer
276 * 0 in the dinode. We would then load the indirect block pointed to by
277 * pointer 48 in that indirect block. We would then load the data block
278 * pointed to by pointer 165 in that indirect block.
279 *
280 * ----------------------------------------
281 * | Dinode | |
282 * | | 4|
283 * | |0 1 2 3 4 5 9|
284 * | | 6|
285 * ----------------------------------------
286 * |
287 * |
288 * V
289 * ----------------------------------------
290 * | Indirect Block |
291 * | 5|
292 * | 4 4 4 4 4 5 5 1|
293 * |0 5 6 7 8 9 0 1 2|
294 * ----------------------------------------
295 * |
296 * |
297 * V
298 * ----------------------------------------
299 * | Indirect Block |
300 * | 1 1 1 1 1 5|
301 * | 6 6 6 6 6 1|
302 * |0 3 4 5 6 7 2|
303 * ----------------------------------------
304 * |
305 * |
306 * V
307 * ----------------------------------------
308 * | Data block containing offset |
309 * | 101342453 |
310 * | |
311 * | |
312 * ----------------------------------------
313 *
314 */
315
316static void find_metapath(struct gfs2_inode *ip, uint64_t block, struct metapath *mp)
317{
318 struct gfs2_sbd *sdp = ip->i_sbd;
319 uint64_t b = block;
320 unsigned int i;
321
322 for (i = ip->i_di.di_height; i--;)
323 mp->mp_list[i] = (__u16)do_div(b, sdp->sd_inptrs);
324
325}
326
327/**
328 * metapointer - Return pointer to start of metadata in a buffer
329 * @bh: The buffer
330 * @height: The metadata height (0 = dinode)
331 * @mp: The metapath
332 *
333 * Return a pointer to the block number of the next height of the metadata
334 * tree given a buffer containing the pointer to the current height of the
335 * metadata tree.
336 */
337
338static inline uint64_t *metapointer(struct buffer_head *bh,
339 unsigned int height, struct metapath *mp)
340{
341 unsigned int head_size = (height > 0) ?
342 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
343
344 return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height];
345}
346
347/**
348 * lookup_block - Get the next metadata block in metadata tree
349 * @ip: The GFS2 inode
350 * @bh: Buffer containing the pointers to metadata blocks
351 * @height: The height of the tree (0 = dinode)
352 * @mp: The metapath
353 * @create: Non-zero if we may create a new meatdata block
354 * @new: Used to indicate if we did create a new metadata block
355 * @block: the returned disk block number
356 *
357 * Given a metatree, complete to a particular height, checks to see if the next
358 * height of the tree exists. If not the next height of the tree is created.
359 * The block number of the next height of the metadata tree is returned.
360 *
361 */
362
363static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
364 unsigned int height, struct metapath *mp, int create,
365 int *new, uint64_t *block)
366{
367 uint64_t *ptr = metapointer(bh, height, mp);
368
369 if (*ptr) {
370 *block = be64_to_cpu(*ptr);
371 return;
372 }
373
374 *block = 0;
375
376 if (!create)
377 return;
378
379 if (height == ip->i_di.di_height - 1 &&
380 !gfs2_is_jdata(ip))
381 *block = gfs2_alloc_data(ip);
382 else
383 *block = gfs2_alloc_meta(ip);
384
385 gfs2_trans_add_bh(ip->i_gl, bh);
386
387 *ptr = cpu_to_be64(*block);
388 ip->i_di.di_blocks++;
389
390 *new = 1;
391}
392
393/**
394 * gfs2_block_map - Map a block from an inode to a disk block
395 * @ip: The GFS2 inode
396 * @lblock: The logical block number
397 * @new: Value/Result argument (1 = may create/did create new blocks)
398 * @dblock: the disk block number of the start of an extent
399 * @extlen: the size of the extent
400 *
401 * Find the block number on the current device which corresponds to an
402 * inode's block. If the block had to be created, "new" will be set.
403 *
404 * Returns: errno
405 */
406
407int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
408 uint64_t *dblock, uint32_t *extlen)
409{
410 struct gfs2_sbd *sdp = ip->i_sbd;
411 struct buffer_head *bh;
412 struct metapath mp;
413 int create = *new;
414 unsigned int bsize;
415 unsigned int height;
416 unsigned int end_of_metadata;
417 unsigned int x;
418 int error = 0;
419
420 *new = 0;
421 *dblock = 0;
422 if (extlen)
423 *extlen = 0;
424
425 if (create)
426 down_write(&ip->i_rw_mutex);
427 else
428 down_read(&ip->i_rw_mutex);
429
430 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
431 goto out;
432
433 bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
434
435 height = calc_tree_height(ip, (lblock + 1) * bsize);
436 if (ip->i_di.di_height < height) {
437 if (!create)
438 goto out;
439
440 error = build_height(ip, height);
441 if (error)
442 goto out;
443 }
444
445 find_metapath(ip, lblock, &mp);
446 end_of_metadata = ip->i_di.di_height - 1;
447
448 error = gfs2_meta_inode_buffer(ip, &bh);
449 if (error)
450 goto out;
451
452 for (x = 0; x < end_of_metadata; x++) {
453 lookup_block(ip, bh, x, &mp, create, new, dblock);
454 brelse(bh);
455 if (!*dblock)
456 goto out;
457
458 error = gfs2_meta_indirect_buffer(ip, x+1, *dblock, *new, &bh);
459 if (error)
460 goto out;
461 }
462
463 lookup_block(ip, bh, end_of_metadata, &mp, create, new, dblock);
464
465 if (extlen && *dblock) {
466 *extlen = 1;
467
468 if (!*new) {
469 uint64_t tmp_dblock;
470 int tmp_new;
471 unsigned int nptrs;
472
473 nptrs = (end_of_metadata) ? sdp->sd_inptrs :
474 sdp->sd_diptrs;
475
476 while (++mp.mp_list[end_of_metadata] < nptrs) {
477 lookup_block(ip, bh, end_of_metadata, &mp,
478 0, &tmp_new, &tmp_dblock);
479
480 if (*dblock + *extlen != tmp_dblock)
481 break;
482
483 (*extlen)++;
484 }
485 }
486 }
487
488 brelse(bh);
489
490 if (*new) {
491 error = gfs2_meta_inode_buffer(ip, &bh);
492 if (!error) {
493 gfs2_trans_add_bh(ip->i_gl, bh);
494 gfs2_dinode_out(&ip->i_di, bh->b_data);
495 brelse(bh);
496 }
497 }
498
499 out:
500 if (create)
501 up_write(&ip->i_rw_mutex);
502 else
503 up_read(&ip->i_rw_mutex);
504
505 return error;
506}
507
508/**
509 * recursive_scan - recursively scan through the end of a file
510 * @ip: the inode
511 * @dibh: the dinode buffer
512 * @mp: the path through the metadata to the point to start
513 * @height: the height the recursion is at
514 * @block: the indirect block to look at
515 * @first: 1 if this is the first block
516 * @bc: the call to make for each piece of metadata
517 * @data: data opaque to this function to pass to @bc
518 *
519 * When this is first called @height and @block should be zero and
520 * @first should be 1.
521 *
522 * Returns: errno
523 */
524
525static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
526 struct metapath *mp, unsigned int height,
527 uint64_t block, int first, block_call_t bc,
528 void *data)
529{
530 struct gfs2_sbd *sdp = ip->i_sbd;
531 struct buffer_head *bh = NULL;
532 uint64_t *top, *bottom;
533 uint64_t bn;
534 int error;
535 int mh_size = sizeof(struct gfs2_meta_header);
536
537 if (!height) {
538 error = gfs2_meta_inode_buffer(ip, &bh);
539 if (error)
540 return error;
541 dibh = bh;
542
543 top = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
544 mp->mp_list[0];
545 bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
546 sdp->sd_diptrs;
547 } else {
548 error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
549 if (error)
550 return error;
551
552 top = (uint64_t *)(bh->b_data + mh_size) +
553 ((first) ? mp->mp_list[height] : 0);
554
555 bottom = (uint64_t *)(bh->b_data + mh_size) + sdp->sd_inptrs;
556 }
557
558 error = bc(ip, dibh, bh, top, bottom, height, data);
559 if (error)
560 goto out;
561
562 if (height < ip->i_di.di_height - 1)
563 for (; top < bottom; top++, first = 0) {
564 if (!*top)
565 continue;
566
567 bn = be64_to_cpu(*top);
568
569 error = recursive_scan(ip, dibh, mp, height + 1, bn,
570 first, bc, data);
571 if (error)
572 break;
573 }
574
575 out:
576 brelse(bh);
577
578 return error;
579}
580
581/**
582 * do_strip - Look for a layer a particular layer of the file and strip it off
583 * @ip: the inode
584 * @dibh: the dinode buffer
585 * @bh: A buffer of pointers
586 * @top: The first pointer in the buffer
587 * @bottom: One more than the last pointer
588 * @height: the height this buffer is at
589 * @data: a pointer to a struct strip_mine
590 *
591 * Returns: errno
592 */
593
594static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
595 struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
596 unsigned int height, void *data)
597{
598 struct strip_mine *sm = (struct strip_mine *)data;
599 struct gfs2_sbd *sdp = ip->i_sbd;
600 struct gfs2_rgrp_list rlist;
601 uint64_t bn, bstart;
602 uint32_t blen;
603 uint64_t *p;
604 unsigned int rg_blocks = 0;
605 int metadata;
606 unsigned int revokes = 0;
607 int x;
608 int error;
609
610 if (!*top)
611 sm->sm_first = 0;
612
613 if (height != sm->sm_height)
614 return 0;
615
616 if (sm->sm_first) {
617 top++;
618 sm->sm_first = 0;
619 }
620
621 metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip);
622 if (metadata)
623 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
624
625 error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
626 if (error)
627 return error;
628
629 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
630 bstart = 0;
631 blen = 0;
632
633 for (p = top; p < bottom; p++) {
634 if (!*p)
635 continue;
636
637 bn = be64_to_cpu(*p);
638
639 if (bstart + blen == bn)
640 blen++;
641 else {
642 if (bstart)
643 gfs2_rlist_add(sdp, &rlist, bstart);
644
645 bstart = bn;
646 blen = 1;
647 }
648 }
649
650 if (bstart)
651 gfs2_rlist_add(sdp, &rlist, bstart);
652 else
653 goto out; /* Nothing to do */
654
655 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
656
657 for (x = 0; x < rlist.rl_rgrps; x++) {
658 struct gfs2_rgrpd *rgd;
659 rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
660 rg_blocks += rgd->rd_ri.ri_length;
661 }
662
663 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
664 if (error)
665 goto out_rlist;
666
667 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
668 RES_INDIRECT + RES_STATFS + RES_QUOTA,
669 revokes);
670 if (error)
671 goto out_rg_gunlock;
672
673 down_write(&ip->i_rw_mutex);
674
675 gfs2_trans_add_bh(ip->i_gl, dibh);
676 gfs2_trans_add_bh(ip->i_gl, bh);
677
678 bstart = 0;
679 blen = 0;
680
681 for (p = top; p < bottom; p++) {
682 if (!*p)
683 continue;
684
685 bn = be64_to_cpu(*p);
686
687 if (bstart + blen == bn)
688 blen++;
689 else {
690 if (bstart) {
691 if (metadata)
692 gfs2_free_meta(ip, bstart, blen);
693 else
694 gfs2_free_data(ip, bstart, blen);
695 }
696
697 bstart = bn;
698 blen = 1;
699 }
700
701 *p = 0;
702 if (!ip->i_di.di_blocks)
703 gfs2_consist_inode(ip);
704 ip->i_di.di_blocks--;
705 }
706 if (bstart) {
707 if (metadata)
708 gfs2_free_meta(ip, bstart, blen);
709 else
710 gfs2_free_data(ip, bstart, blen);
711 }
712
713 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
714
715 gfs2_dinode_out(&ip->i_di, dibh->b_data);
716
717 up_write(&ip->i_rw_mutex);
718
719 gfs2_trans_end(sdp);
720
721 out_rg_gunlock:
722 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
723
724 out_rlist:
725 gfs2_rlist_free(&rlist);
726
727 out:
728 gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
729
730 return error;
731}
732
733/**
734 * do_grow - Make a file look bigger than it is
735 * @ip: the inode
736 * @size: the size to set the file to
737 *
738 * Called with an exclusive lock on @ip.
739 *
740 * Returns: errno
741 */
742
743static int do_grow(struct gfs2_inode *ip, uint64_t size)
744{
745 struct gfs2_sbd *sdp = ip->i_sbd;
746 struct gfs2_alloc *al;
747 struct buffer_head *dibh;
748 unsigned int h;
749 int error;
750
751 al = gfs2_alloc_get(ip);
752
753 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
754 if (error)
755 goto out;
756
757 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
758 if (error)
759 goto out_gunlock_q;
760
761 al->al_requested = sdp->sd_max_height + RES_DATA;
762
763 error = gfs2_inplace_reserve(ip);
764 if (error)
765 goto out_gunlock_q;
766
767 error = gfs2_trans_begin(sdp,
768 sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
769 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
770 if (error)
771 goto out_ipres;
772
773 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
774 if (gfs2_is_stuffed(ip)) {
775 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
776 NULL);
777 if (error)
778 goto out_end_trans;
779 }
780
781 h = calc_tree_height(ip, size);
782 if (ip->i_di.di_height < h) {
783 down_write(&ip->i_rw_mutex);
784 error = build_height(ip, h);
785 up_write(&ip->i_rw_mutex);
786 if (error)
787 goto out_end_trans;
788 }
789 }
790
791 ip->i_di.di_size = size;
792 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
793
794 error = gfs2_meta_inode_buffer(ip, &dibh);
795 if (error)
796 goto out_end_trans;
797
798 gfs2_trans_add_bh(ip->i_gl, dibh);
799 gfs2_dinode_out(&ip->i_di, dibh->b_data);
800 brelse(dibh);
801
802 out_end_trans:
803 gfs2_trans_end(sdp);
804
805 out_ipres:
806 gfs2_inplace_release(ip);
807
808 out_gunlock_q:
809 gfs2_quota_unlock(ip);
810
811 out:
812 gfs2_alloc_put(ip);
813
814 return error;
815}
816
817static int truncator_journaled(struct gfs2_inode *ip, uint64_t size)
818{
819 uint64_t lbn, dbn;
820 uint32_t off;
821 struct buffer_head *bh;
822 int new = 0;
823 int error;
824
825 lbn = size;
826 off = do_div(lbn, ip->i_sbd->sd_jbsize);
827
828 error = gfs2_block_map(ip, lbn, &new, &dbn, NULL);
829 if (error || !dbn)
830 return error;
831
832 error = gfs2_jdata_get_buffer(ip, dbn, 0, &bh);
833 if (error)
834 return error;
835
836 gfs2_trans_add_bh(ip->i_gl, bh);
837 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off);
838
839 brelse(bh);
840
841 return 0;
842}
843
844static int trunc_start(struct gfs2_inode *ip, uint64_t size,
845 gfs2_truncator_t truncator)
846{
847 struct gfs2_sbd *sdp = ip->i_sbd;
848 struct buffer_head *dibh;
849 int journaled = gfs2_is_jdata(ip);
850 int error;
851
852 error = gfs2_trans_begin(sdp,
853 RES_DINODE + ((journaled) ? RES_JDATA : 0), 0);
854 if (error)
855 return error;
856
857 error = gfs2_meta_inode_buffer(ip, &dibh);
858 if (error)
859 goto out;
860
861 if (gfs2_is_stuffed(ip)) {
862 ip->i_di.di_size = size;
863 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
864 gfs2_trans_add_bh(ip->i_gl, dibh);
865 gfs2_dinode_out(&ip->i_di, dibh->b_data);
866 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
867 error = 1;
868
869 } else {
870 if (journaled) {
871 uint64_t junk = size;
872 /* we're just interested in the modulus */
873 if (do_div(junk, sdp->sd_jbsize))
874 error = truncator_journaled(ip, size);
875 } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
876 error = truncator(ip, size);
877
878 if (!error) {
879 ip->i_di.di_size = size;
880 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
881 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
882 gfs2_trans_add_bh(ip->i_gl, dibh);
883 gfs2_dinode_out(&ip->i_di, dibh->b_data);
884 }
885 }
886
887 brelse(dibh);
888
889 out:
890 gfs2_trans_end(sdp);
891
892 return error;
893}
894
895static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size)
896{
897 unsigned int height = ip->i_di.di_height;
898 uint64_t lblock;
899 struct metapath mp;
900 int error;
901
902 if (!size)
903 lblock = 0;
904 else if (gfs2_is_jdata(ip)) {
905 lblock = size - 1;
906 do_div(lblock, ip->i_sbd->sd_jbsize);
907 } else
908 lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift;
909
910 find_metapath(ip, lblock, &mp);
911 gfs2_alloc_get(ip);
912
913 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
914 if (error)
915 goto out;
916
917 while (height--) {
918 struct strip_mine sm;
919 sm.sm_first = !!size;
920 sm.sm_height = height;
921
922 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
923 if (error)
924 break;
925 }
926
927 gfs2_quota_unhold(ip);
928
929 out:
930 gfs2_alloc_put(ip);
931 return error;
932}
933
934static int trunc_end(struct gfs2_inode *ip)
935{
936 struct gfs2_sbd *sdp = ip->i_sbd;
937 struct buffer_head *dibh;
938 int error;
939
940 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
941 if (error)
942 return error;
943
944 down_write(&ip->i_rw_mutex);
945
946 error = gfs2_meta_inode_buffer(ip, &dibh);
947 if (error)
948 goto out;
949
950 if (!ip->i_di.di_size) {
951 ip->i_di.di_height = 0;
952 ip->i_di.di_goal_meta =
953 ip->i_di.di_goal_data =
954 ip->i_num.no_addr;
955 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
956 }
957 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
958 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
959
960 gfs2_trans_add_bh(ip->i_gl, dibh);
961 gfs2_dinode_out(&ip->i_di, dibh->b_data);
962 brelse(dibh);
963
964 out:
965 up_write(&ip->i_rw_mutex);
966
967 gfs2_trans_end(sdp);
968
969 return error;
970}
971
972/**
973 * do_shrink - make a file smaller
974 * @ip: the inode
975 * @size: the size to make the file
976 * @truncator: function to truncate the last partial block
977 *
978 * Called with an exclusive lock on @ip.
979 *
980 * Returns: errno
981 */
982
983static int do_shrink(struct gfs2_inode *ip, uint64_t size,
984 gfs2_truncator_t truncator)
985{
986 int error;
987
988 error = trunc_start(ip, size, truncator);
989 if (error < 0)
990 return error;
991 if (error > 0)
992 return 0;
993
994 error = trunc_dealloc(ip, size);
995 if (!error)
996 error = trunc_end(ip);
997
998 return error;
999}
1000
1001/**
1002 * gfs2_truncatei - make a file a give size
1003 * @ip: the inode
1004 * @size: the size to make the file
1005 * @truncator: function to truncate the last partial block
1006 *
1007 * The file size can grow, shrink, or stay the same size.
1008 *
1009 * Returns: errno
1010 */
1011
1012int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size,
1013 gfs2_truncator_t truncator)
1014{
1015 int error;
1016
1017 if (gfs2_assert_warn(ip->i_sbd, S_ISREG(ip->i_di.di_mode)))
1018 return -EINVAL;
1019
1020 if (size > ip->i_di.di_size)
1021 error = do_grow(ip, size);
1022 else
1023 error = do_shrink(ip, size, truncator);
1024
1025 return error;
1026}
1027
1028int gfs2_truncatei_resume(struct gfs2_inode *ip)
1029{
1030 int error;
1031 error = trunc_dealloc(ip, ip->i_di.di_size);
1032 if (!error)
1033 error = trunc_end(ip);
1034 return error;
1035}
1036
1037int gfs2_file_dealloc(struct gfs2_inode *ip)
1038{
1039 return trunc_dealloc(ip, 0);
1040}
1041
1042/**
1043 * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
1044 * @ip: the file
1045 * @len: the number of bytes to be written to the file
1046 * @data_blocks: returns the number of data blocks required
1047 * @ind_blocks: returns the number of indirect blocks required
1048 *
1049 */
1050
1051void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
1052 unsigned int *data_blocks, unsigned int *ind_blocks)
1053{
1054 struct gfs2_sbd *sdp = ip->i_sbd;
1055 unsigned int tmp;
1056
1057 if (gfs2_is_jdata(ip)) {
1058 *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2;
1059 *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
1060 } else {
1061 *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
1062 *ind_blocks = 3 * (sdp->sd_max_height - 1);
1063 }
1064
1065 for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
1066 tmp = DIV_RU(tmp, sdp->sd_inptrs);
1067 *ind_blocks += tmp;
1068 }
1069}
1070
1071/**
1072 * gfs2_write_alloc_required - figure out if a write will require an allocation
1073 * @ip: the file being written to
1074 * @offset: the offset to write to
1075 * @len: the number of bytes being written
1076 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
1077 *
1078 * Returns: errno
1079 */
1080
1081int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
1082 unsigned int len, int *alloc_required)
1083{
1084 struct gfs2_sbd *sdp = ip->i_sbd;
1085 uint64_t lblock, lblock_stop, dblock;
1086 uint32_t extlen;
1087 int new = 0;
1088 int error = 0;
1089
1090 *alloc_required = 0;
1091
1092 if (!len)
1093 return 0;
1094
1095 if (gfs2_is_stuffed(ip)) {
1096 if (offset + len >
1097 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1098 *alloc_required = 1;
1099 return 0;
1100 }
1101
1102 if (gfs2_is_jdata(ip)) {
1103 unsigned int bsize = sdp->sd_jbsize;
1104 lblock = offset;
1105 do_div(lblock, bsize);
1106 lblock_stop = offset + len + bsize - 1;
1107 do_div(lblock_stop, bsize);
1108 } else {
1109 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1110 lblock = offset >> shift;
1111 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1112 }
1113
1114 for (; lblock < lblock_stop; lblock += extlen) {
1115 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
1116 if (error)
1117 return error;
1118
1119 if (!dblock) {
1120 *alloc_required = 1;
1121 return 0;
1122 }
1123 }
1124
1125 return 0;
1126}
1127
1128/**
1129 * do_gfm - Copy out the dinode/indirect blocks of a file
1130 * @ip: the file
1131 * @dibh: the dinode buffer
1132 * @bh: the indirect buffer we're looking at
1133 * @top: the first pointer in the block
1134 * @bottom: one more than the last pointer in the block
1135 * @height: the height the block is at
1136 * @data: a pointer to a struct gfs2_user_buffer structure
1137 *
1138 * If this is a journaled file, copy out the data too.
1139 *
1140 * Returns: errno
1141 */
1142
1143static int do_gfm(struct gfs2_inode *ip, struct buffer_head *dibh,
1144 struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
1145 unsigned int height, void *data)
1146{
1147 struct gfs2_user_buffer *ub = (struct gfs2_user_buffer *)data;
1148 int error;
1149
1150 error = gfs2_add_bh_to_ub(ub, bh);
1151 if (error)
1152 return error;
1153
1154 if (!S_ISDIR(ip->i_di.di_mode) ||
1155 height + 1 != ip->i_di.di_height)
1156 return 0;
1157
1158 for (; top < bottom; top++)
1159 if (*top) {
1160 struct buffer_head *data_bh;
1161
1162 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*top),
1163 DIO_START | DIO_WAIT,
1164 &data_bh);
1165 if (error)
1166 return error;
1167
1168 error = gfs2_add_bh_to_ub(ub, data_bh);
1169
1170 brelse(data_bh);
1171
1172 if (error)
1173 return error;
1174 }
1175
1176 return 0;
1177}
1178
1179/**
1180 * gfs2_get_file_meta - return all the metadata for a file
1181 * @ip: the file
1182 * @ub: the structure representing the meta
1183 *
1184 * Returns: errno
1185 */
1186
1187int gfs2_get_file_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub)
1188{
1189 int error;
1190
1191 if (gfs2_is_stuffed(ip)) {
1192 struct buffer_head *dibh;
1193 error = gfs2_meta_inode_buffer(ip, &dibh);
1194 if (!error) {
1195 error = gfs2_add_bh_to_ub(ub, dibh);
1196 brelse(dibh);
1197 }
1198 } else {
1199 struct metapath mp;
1200 find_metapath(ip, 0, &mp);
1201 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_gfm, ub);
1202 }
1203
1204 return error;
1205}
1206
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
new file mode 100644
index 000000000000..de16e44f049f
--- /dev/null
+++ b/fs/gfs2/bmap.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __BMAP_DOT_H__
11#define __BMAP_DOT_H__
12
13typedef int (*gfs2_unstuffer_t) (struct gfs2_inode * ip,
14 struct buffer_head * dibh, uint64_t block,
15 void *private);
16int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh,
17 uint64_t block, void *private);
18int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
19 void *private);
20
21int gfs2_block_map(struct gfs2_inode *ip,
22 uint64_t lblock, int *new,
23 uint64_t *dblock, uint32_t *extlen);
24
25typedef int (*gfs2_truncator_t) (struct gfs2_inode * ip, uint64_t size);
26int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size,
27 gfs2_truncator_t truncator);
28int gfs2_truncatei_resume(struct gfs2_inode *ip);
29int gfs2_file_dealloc(struct gfs2_inode *ip);
30
31void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
32 unsigned int *data_blocks,
33 unsigned int *ind_blocks);
34int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
35 unsigned int len, int *alloc_required);
36
37int gfs2_get_file_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub);
38
39#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
new file mode 100644
index 000000000000..cff8d5368d21
--- /dev/null
+++ b/fs/gfs2/daemon.c
@@ -0,0 +1,225 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <linux/delay.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "daemon.h"
21#include "glock.h"
22#include "log.h"
23#include "quota.h"
24#include "recovery.h"
25#include "super.h"
26#include "unlinked.h"
27
28/* This uses schedule_timeout() instead of msleep() because it's good for
29 the daemons to wake up more often than the timeout when unmounting so
30 the user's unmount doesn't sit there forever.
31
32 The kthread functions used to start these daemons block and flush signals. */
33
34/**
35 * gfs2_scand - Look for cached glocks and inodes to toss from memory
36 * @sdp: Pointer to GFS2 superblock
37 *
38 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
39 * See gfs2_glockd()
40 */
41
42int gfs2_scand(void *data)
43{
44 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
45 unsigned long t;
46
47 while (!kthread_should_stop()) {
48 gfs2_scand_internal(sdp);
49 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
50 schedule_timeout_interruptible(t);
51 }
52
53 return 0;
54}
55
56/**
57 * gfs2_glockd - Reclaim unused glock structures
58 * @sdp: Pointer to GFS2 superblock
59 *
60 * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
61 * Number of daemons can be set by user, with num_glockd mount option.
62 */
63
64int gfs2_glockd(void *data)
65{
66 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
67 DECLARE_WAITQUEUE(wait_chan, current);
68
69 while (!kthread_should_stop()) {
70 while (atomic_read(&sdp->sd_reclaim_count))
71 gfs2_reclaim_glock(sdp);
72
73 set_current_state(TASK_INTERRUPTIBLE);
74 add_wait_queue(&sdp->sd_reclaim_wq, &wait_chan);
75 if (!atomic_read(&sdp->sd_reclaim_count) &&
76 !kthread_should_stop())
77 schedule();
78 remove_wait_queue(&sdp->sd_reclaim_wq, &wait_chan);
79 set_current_state(TASK_RUNNING);
80 }
81
82 return 0;
83}
84
85/**
86 * gfs2_recoverd - Recover dead machine's journals
87 * @sdp: Pointer to GFS2 superblock
88 *
89 */
90
91int gfs2_recoverd(void *data)
92{
93 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
94 unsigned long t;
95
96 while (!kthread_should_stop()) {
97 gfs2_check_journals(sdp);
98 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
99 schedule_timeout_interruptible(t);
100 }
101
102 return 0;
103}
104
105/**
106 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
107 * @sdp: Pointer to GFS2 superblock
108 *
109 * Also, periodically check to make sure that we're using the most recent
110 * journal index.
111 */
112
113int gfs2_logd(void *data)
114{
115 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
116 struct gfs2_holder ji_gh;
117 unsigned long t;
118
119 while (!kthread_should_stop()) {
120 /* Advance the log tail */
121
122 t = sdp->sd_log_flush_time +
123 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
124
125 gfs2_ail1_empty(sdp, DIO_ALL);
126
127 if (time_after_eq(jiffies, t)) {
128 gfs2_log_flush(sdp);
129 sdp->sd_log_flush_time = jiffies;
130 }
131
132 /* Check for latest journal index */
133
134 t = sdp->sd_jindex_refresh_time +
135 gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
136
137 if (time_after_eq(jiffies, t)) {
138 if (!gfs2_jindex_hold(sdp, &ji_gh))
139 gfs2_glock_dq_uninit(&ji_gh);
140 sdp->sd_jindex_refresh_time = jiffies;
141 }
142
143 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
144 schedule_timeout_interruptible(t);
145 }
146
147 return 0;
148}
149
150/**
151 * gfs2_quotad - Write cached quota changes into the quota file
152 * @sdp: Pointer to GFS2 superblock
153 *
154 */
155
156int gfs2_quotad(void *data)
157{
158 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
159 unsigned long t;
160 int error;
161
162 while (!kthread_should_stop()) {
163 /* Update the master statfs file */
164
165 t = sdp->sd_statfs_sync_time +
166 gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
167
168 if (time_after_eq(jiffies, t)) {
169 error = gfs2_statfs_sync(sdp);
170 if (error &&
171 error != -EROFS &&
172 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
173 fs_err(sdp, "quotad: (1) error=%d\n", error);
174 sdp->sd_statfs_sync_time = jiffies;
175 }
176
177 /* Update quota file */
178
179 t = sdp->sd_quota_sync_time +
180 gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
181
182 if (time_after_eq(jiffies, t)) {
183 error = gfs2_quota_sync(sdp);
184 if (error &&
185 error != -EROFS &&
186 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
187 fs_err(sdp, "quotad: (2) error=%d\n", error);
188 sdp->sd_quota_sync_time = jiffies;
189 }
190
191 gfs2_quota_scan(sdp);
192
193 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
194 schedule_timeout_interruptible(t);
195 }
196
197 return 0;
198}
199
200/**
201 * gfs2_inoded - Deallocate unlinked inodes
202 * @sdp: Pointer to GFS2 superblock
203 *
204 */
205
206int gfs2_inoded(void *data)
207{
208 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
209 unsigned long t;
210 int error;
211
212 while (!kthread_should_stop()) {
213 error = gfs2_unlinked_dealloc(sdp);
214 if (error &&
215 error != -EROFS &&
216 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
217 fs_err(sdp, "inoded: error = %d\n", error);
218
219 t = gfs2_tune_get(sdp, gt_inoded_secs) * HZ;
220 schedule_timeout_interruptible(t);
221 }
222
223 return 0;
224}
225
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
new file mode 100644
index 000000000000..a27fdeda5fbb
--- /dev/null
+++ b/fs/gfs2/daemon.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DAEMON_DOT_H__
11#define __DAEMON_DOT_H__
12
13int gfs2_scand(void *data);
14int gfs2_glockd(void *data);
15int gfs2_recoverd(void *data);
16int gfs2_logd(void *data);
17int gfs2_quotad(void *data);
18int gfs2_inoded(void *data);
19
20#endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
new file mode 100644
index 000000000000..6b1dc3dc3a2d
--- /dev/null
+++ b/fs/gfs2/dir.c
@@ -0,0 +1,2157 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11* Implements Extendible Hashing as described in:
12* "Extendible Hashing" by Fagin, et al in
13* __ACM Trans. on Database Systems__, Sept 1979.
14*
15*
16* Here's the layout of dirents which is essentially the same as that of ext2
17* within a single block. The field de_name_len is the number of bytes
18* actually required for the name (no null terminator). The field de_rec_len
19* is the number of bytes allocated to the dirent. The offset of the next
20* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21* deleted, the preceding dirent inherits its allocated space, ie
22* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23* by adding de_rec_len to the current dirent, this essentially causes the
24* deleted dirent to get jumped over when iterating through all the dirents.
25*
26* When deleting the first dirent in a block, there is no previous dirent so
27* the field de_ino is set to zero to designate it as deleted. When allocating
28* a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29* first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30* dirent is allocated. Otherwise it must go through all the 'used' dirents
31* searching for one in which the amount of total space minus the amount of
32* used space will provide enough space for the new dirent.
33*
34* There are two types of blocks in which dirents reside. In a stuffed dinode,
35* the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36* the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37* beginning of the leaf block. The dirents reside in leaves when
38*
39* dip->i_di.di_flags & GFS2_DIF_EXHASH is true
40*
41* Otherwise, the dirents are "linear", within a single stuffed dinode block.
42*
43* When the dirents are in leaves, the actual contents of the directory file are
44* used as an array of 64-bit block pointers pointing to the leaf blocks. The
45* dirents are NOT in the directory file itself. There can be more than one block
46* pointer in the array that points to the same leaf. In fact, when a directory
47* is first converted from linear to exhash, all of the pointers point to the
48* same leaf.
49*
50* When a leaf is completely full, the size of the hash table can be
51* doubled unless it is already at the maximum size which is hard coded into
52* GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53* but never before the maximum hash table size has been reached.
54*/
55
56#include <linux/sched.h>
57#include <linux/slab.h>
58#include <linux/spinlock.h>
59#include <linux/completion.h>
60#include <linux/buffer_head.h>
61#include <linux/sort.h>
62#include <asm/semaphore.h>
63
64#include "gfs2.h"
65#include "dir.h"
66#include "glock.h"
67#include "inode.h"
68#include "jdata.h"
69#include "meta_io.h"
70#include "quota.h"
71#include "rgrp.h"
72#include "trans.h"
73
74#define IS_LEAF 1 /* Hashed (leaf) directory */
75#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
76
77#if 1
78#define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1)
79#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
80#else
81#define gfs2_disk_hash2offset(h) (((uint64_t)(h)))
82#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
83#endif
84
85typedef int (*leaf_call_t) (struct gfs2_inode *dip,
86 uint32_t index, uint32_t len, uint64_t leaf_no,
87 void *data);
88
89/**
90 * int gfs2_filecmp - Compare two filenames
91 * @file1: The first filename
92 * @file2: The second filename
93 * @len_of_file2: The length of the second file
94 *
95 * This routine compares two filenames and returns 1 if they are equal.
96 *
97 * Returns: 1 if the files are the same, otherwise 0.
98 */
99
100int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2)
101{
102 if (file1->len != len_of_file2)
103 return 0;
104 if (memcmp(file1->name, file2, file1->len))
105 return 0;
106 return 1;
107}
108
109/**
110 * dirent_first - Return the first dirent
111 * @dip: the directory
112 * @bh: The buffer
113 * @dent: Pointer to list of dirents
114 *
115 * return first dirent whether bh points to leaf or stuffed dinode
116 *
117 * Returns: IS_LEAF, IS_DINODE, or -errno
118 */
119
120static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
121 struct gfs2_dirent **dent)
122{
123 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
124
125 if (be16_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
126 if (gfs2_meta_check(dip->i_sbd, bh))
127 return -EIO;
128 *dent = (struct gfs2_dirent *)(bh->b_data +
129 sizeof(struct gfs2_leaf));
130 return IS_LEAF;
131 } else {
132 if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI))
133 return -EIO;
134 *dent = (struct gfs2_dirent *)(bh->b_data +
135 sizeof(struct gfs2_dinode));
136 return IS_DINODE;
137 }
138}
139
140/**
141 * dirent_next - Next dirent
142 * @dip: the directory
143 * @bh: The buffer
144 * @dent: Pointer to list of dirents
145 *
146 * Returns: 0 on success, error code otherwise
147 */
148
149static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
150 struct gfs2_dirent **dent)
151{
152 struct gfs2_dirent *tmp, *cur;
153 char *bh_end;
154 uint32_t cur_rec_len;
155
156 cur = *dent;
157 bh_end = bh->b_data + bh->b_size;
158 cur_rec_len = be32_to_cpu(cur->de_rec_len);
159
160 if ((char *)cur + cur_rec_len >= bh_end) {
161 if ((char *)cur + cur_rec_len > bh_end) {
162 gfs2_consist_inode(dip);
163 return -EIO;
164 }
165 return -ENOENT;
166 }
167
168 tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len);
169
170 if ((char *)tmp + be32_to_cpu(tmp->de_rec_len) > bh_end) {
171 gfs2_consist_inode(dip);
172 return -EIO;
173 }
174 /* Only the first dent could ever have de_inum.no_addr == 0 */
175 if (!tmp->de_inum.no_addr) {
176 gfs2_consist_inode(dip);
177 return -EIO;
178 }
179
180 *dent = tmp;
181
182 return 0;
183}
184
185/**
186 * dirent_del - Delete a dirent
187 * @dip: The GFS2 inode
188 * @bh: The buffer
189 * @prev: The previous dirent
190 * @cur: The current dirent
191 *
192 */
193
194static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
195 struct gfs2_dirent *prev, struct gfs2_dirent *cur)
196{
197 uint32_t cur_rec_len, prev_rec_len;
198
199 if (!cur->de_inum.no_addr) {
200 gfs2_consist_inode(dip);
201 return;
202 }
203
204 gfs2_trans_add_bh(dip->i_gl, bh);
205
206 /* If there is no prev entry, this is the first entry in the block.
207 The de_rec_len is already as big as it needs to be. Just zero
208 out the inode number and return. */
209
210 if (!prev) {
211 cur->de_inum.no_addr = 0; /* No endianess worries */
212 return;
213 }
214
215 /* Combine this dentry with the previous one. */
216
217 prev_rec_len = be32_to_cpu(prev->de_rec_len);
218 cur_rec_len = be32_to_cpu(cur->de_rec_len);
219
220 if ((char *)prev + prev_rec_len != (char *)cur)
221 gfs2_consist_inode(dip);
222 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
223 gfs2_consist_inode(dip);
224
225 prev_rec_len += cur_rec_len;
226 prev->de_rec_len = cpu_to_be32(prev_rec_len);
227}
228
229/**
230 * gfs2_dirent_alloc - Allocate a directory entry
231 * @dip: The GFS2 inode
232 * @bh: The buffer
233 * @name_len: The length of the name
234 * @dent_out: Pointer to list of dirents
235 *
236 * Returns: 0 on success, error code otherwise
237 */
238
239int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh,
240 int name_len, struct gfs2_dirent **dent_out)
241{
242 struct gfs2_dirent *dent, *new;
243 unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
244 unsigned int entries = 0, offset = 0;
245 int type;
246
247 type = dirent_first(dip, bh, &dent);
248 if (type < 0)
249 return type;
250
251 if (type == IS_LEAF) {
252 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
253 entries = be16_to_cpu(leaf->lf_entries);
254 offset = sizeof(struct gfs2_leaf);
255 } else {
256 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
257 entries = be32_to_cpu(dinode->di_entries);
258 offset = sizeof(struct gfs2_dinode);
259 }
260
261 if (!entries) {
262 if (dent->de_inum.no_addr) {
263 gfs2_consist_inode(dip);
264 return -EIO;
265 }
266
267 gfs2_trans_add_bh(dip->i_gl, bh);
268
269 dent->de_rec_len = bh->b_size - offset;
270 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
271 dent->de_name_len = name_len;
272
273 *dent_out = dent;
274 return 0;
275 }
276
277 do {
278 uint32_t cur_rec_len, cur_name_len;
279
280 cur_rec_len = be32_to_cpu(dent->de_rec_len);
281 cur_name_len = dent->de_name_len;
282
283 if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
284 (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) {
285 gfs2_trans_add_bh(dip->i_gl, bh);
286
287 if (dent->de_inum.no_addr) {
288 new = (struct gfs2_dirent *)((char *)dent +
289 GFS2_DIRENT_SIZE(cur_name_len));
290 memset(new, 0, sizeof(struct gfs2_dirent));
291
292 new->de_rec_len = cur_rec_len - GFS2_DIRENT_SIZE(cur_name_len);
293 new->de_rec_len = cpu_to_be32(new->de_rec_len);
294 new->de_name_len = name_len;
295
296 dent->de_rec_len = cur_rec_len - be32_to_cpu(new->de_rec_len);
297 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
298
299 *dent_out = new;
300 return 0;
301 }
302
303 dent->de_name_len = name_len;
304
305 *dent_out = dent;
306 return 0;
307 }
308 } while (dirent_next(dip, bh, &dent) == 0);
309
310 return -ENOSPC;
311}
312
313/**
314 * dirent_fits - See if we can fit a entry in this buffer
315 * @dip: The GFS2 inode
316 * @bh: The buffer
317 * @name_len: The length of the name
318 *
319 * Returns: 1 if it can fit, 0 otherwise
320 */
321
322static int dirent_fits(struct gfs2_inode *dip, struct buffer_head *bh,
323 int name_len)
324{
325 struct gfs2_dirent *dent;
326 unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
327 unsigned int entries = 0;
328 int type;
329
330 type = dirent_first(dip, bh, &dent);
331 if (type < 0)
332 return type;
333
334 if (type == IS_LEAF) {
335 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
336 entries = be16_to_cpu(leaf->lf_entries);
337 } else {
338 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
339 entries = be32_to_cpu(dinode->di_entries);
340 }
341
342 if (!entries)
343 return 1;
344
345 do {
346 uint32_t cur_rec_len, cur_name_len;
347
348 cur_rec_len = be32_to_cpu(dent->de_rec_len);
349 cur_name_len = dent->de_name_len;
350
351 if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
352 (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len))
353 return 1;
354 } while (dirent_next(dip, bh, &dent) == 0);
355
356 return 0;
357}
358
359static int leaf_search(struct gfs2_inode *dip, struct buffer_head *bh,
360 struct qstr *filename, struct gfs2_dirent **dent_out,
361 struct gfs2_dirent **dent_prev)
362{
363 uint32_t hash;
364 struct gfs2_dirent *dent, *prev = NULL;
365 unsigned int entries = 0;
366 int type;
367
368 type = dirent_first(dip, bh, &dent);
369 if (type < 0)
370 return type;
371
372 if (type == IS_LEAF) {
373 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
374 entries = be16_to_cpu(leaf->lf_entries);
375 } else if (type == IS_DINODE) {
376 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
377 entries = be32_to_cpu(dinode->di_entries);
378 }
379
380 hash = gfs2_disk_hash(filename->name, filename->len);
381
382 do {
383 if (!dent->de_inum.no_addr) {
384 prev = dent;
385 continue;
386 }
387
388 if (be32_to_cpu(dent->de_hash) == hash &&
389 gfs2_filecmp(filename, (char *)(dent + 1),
390 dent->de_name_len)) {
391 *dent_out = dent;
392 if (dent_prev)
393 *dent_prev = prev;
394
395 return 0;
396 }
397
398 prev = dent;
399 } while (dirent_next(dip, bh, &dent) == 0);
400
401 return -ENOENT;
402}
403
404static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no,
405 struct buffer_head **bhp)
406{
407 int error;
408
409 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp);
410 if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF))
411 error = -EIO;
412
413 return error;
414}
415
416/**
417 * get_leaf_nr - Get a leaf number associated with the index
418 * @dip: The GFS2 inode
419 * @index:
420 * @leaf_out:
421 *
422 * Returns: 0 on success, error code otherwise
423 */
424
425static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index,
426 uint64_t *leaf_out)
427{
428 uint64_t leaf_no;
429 int error;
430
431 error = gfs2_jdata_read_mem(dip, (char *)&leaf_no,
432 index * sizeof(uint64_t),
433 sizeof(uint64_t));
434 if (error != sizeof(uint64_t))
435 return (error < 0) ? error : -EIO;
436
437 *leaf_out = be64_to_cpu(leaf_no);
438
439 return 0;
440}
441
442static int get_first_leaf(struct gfs2_inode *dip, uint32_t index,
443 struct buffer_head **bh_out)
444{
445 uint64_t leaf_no;
446 int error;
447
448 error = get_leaf_nr(dip, index, &leaf_no);
449 if (!error)
450 error = get_leaf(dip, leaf_no, bh_out);
451
452 return error;
453}
454
455static int get_next_leaf(struct gfs2_inode *dip, struct buffer_head *bh_in,
456 struct buffer_head **bh_out)
457{
458 struct gfs2_leaf *leaf;
459 int error;
460
461 leaf = (struct gfs2_leaf *)bh_in->b_data;
462
463 if (!leaf->lf_next)
464 error = -ENOENT;
465 else
466 error = get_leaf(dip, be64_to_cpu(leaf->lf_next), bh_out);
467
468 return error;
469}
470
471static int linked_leaf_search(struct gfs2_inode *dip, struct qstr *filename,
472 struct gfs2_dirent **dent_out,
473 struct gfs2_dirent **dent_prev,
474 struct buffer_head **bh_out)
475{
476 struct buffer_head *bh = NULL, *bh_next;
477 uint32_t hsize, index;
478 uint32_t hash;
479 int error;
480
481 hsize = 1 << dip->i_di.di_depth;
482 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
483 gfs2_consist_inode(dip);
484 return -EIO;
485 }
486
487 /* Figure out the address of the leaf node. */
488
489 hash = gfs2_disk_hash(filename->name, filename->len);
490 index = hash >> (32 - dip->i_di.di_depth);
491
492 error = get_first_leaf(dip, index, &bh_next);
493 if (error)
494 return error;
495
496 /* Find the entry */
497
498 do {
499 brelse(bh);
500
501 bh = bh_next;
502
503 error = leaf_search(dip, bh, filename, dent_out, dent_prev);
504 switch (error) {
505 case 0:
506 *bh_out = bh;
507 return 0;
508
509 case -ENOENT:
510 break;
511
512 default:
513 brelse(bh);
514 return error;
515 }
516
517 error = get_next_leaf(dip, bh, &bh_next);
518 }
519 while (!error);
520
521 brelse(bh);
522
523 return error;
524}
525
526/**
527 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
528 * @dip: The GFS2 inode
529 *
530 * Returns: 0 on success, error code otherwise
531 */
532
533static int dir_make_exhash(struct gfs2_inode *dip)
534{
535 struct gfs2_sbd *sdp = dip->i_sbd;
536 struct gfs2_dirent *dent;
537 struct buffer_head *bh, *dibh;
538 struct gfs2_leaf *leaf;
539 int y;
540 uint32_t x;
541 uint64_t *lp, bn;
542 int error;
543
544 error = gfs2_meta_inode_buffer(dip, &dibh);
545 if (error)
546 return error;
547
548 /* Allocate a new block for the first leaf node */
549
550 bn = gfs2_alloc_meta(dip);
551
552 /* Turn over a new leaf */
553
554 bh = gfs2_meta_new(dip->i_gl, bn);
555 gfs2_trans_add_bh(dip->i_gl, bh);
556 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
557 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
558
559 /* Fill in the leaf structure */
560
561 leaf = (struct gfs2_leaf *)bh->b_data;
562
563 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
564
565 leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
566 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
567
568 /* Copy dirents */
569
570 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
571 sizeof(struct gfs2_dinode));
572
573 /* Find last entry */
574
575 x = 0;
576 dirent_first(dip, bh, &dent);
577
578 do {
579 if (!dent->de_inum.no_addr)
580 continue;
581 if (++x == dip->i_di.di_entries)
582 break;
583 }
584 while (dirent_next(dip, bh, &dent) == 0);
585
586 /* Adjust the last dirent's record length
587 (Remember that dent still points to the last entry.) */
588
589 dent->de_rec_len = be32_to_cpu(dent->de_rec_len) +
590 sizeof(struct gfs2_dinode) -
591 sizeof(struct gfs2_leaf);
592 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
593
594 brelse(bh);
595
596 /* We're done with the new leaf block, now setup the new
597 hash table. */
598
599 gfs2_trans_add_bh(dip->i_gl, dibh);
600 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
601
602 lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
603
604 for (x = sdp->sd_hash_ptrs; x--; lp++)
605 *lp = cpu_to_be64(bn);
606
607 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
608 dip->i_di.di_blocks++;
609 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
610 dip->i_di.di_payload_format = 0;
611
612 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
613 dip->i_di.di_depth = y;
614
615 gfs2_dinode_out(&dip->i_di, dibh->b_data);
616
617 brelse(dibh);
618
619 return 0;
620}
621
622/**
623 * dir_split_leaf - Split a leaf block into two
624 * @dip: The GFS2 inode
625 * @index:
626 * @leaf_no:
627 *
628 * Returns: 0 on success, error code on failure
629 */
630
631static int dir_split_leaf(struct gfs2_inode *dip, uint32_t index,
632 uint64_t leaf_no)
633{
634 struct buffer_head *nbh, *obh, *dibh;
635 struct gfs2_leaf *nleaf, *oleaf;
636 struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new;
637 uint32_t start, len, half_len, divider;
638 uint64_t bn, *lp;
639 uint32_t name_len;
640 int x, moved = 0;
641 int error;
642
643 /* Allocate the new leaf block */
644
645 bn = gfs2_alloc_meta(dip);
646
647 /* Get the new leaf block */
648
649 nbh = gfs2_meta_new(dip->i_gl, bn);
650 gfs2_trans_add_bh(dip->i_gl, nbh);
651 gfs2_metatype_set(nbh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
652 gfs2_buffer_clear_tail(nbh, sizeof(struct gfs2_meta_header));
653
654 nleaf = (struct gfs2_leaf *)nbh->b_data;
655
656 nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
657
658 /* Get the old leaf block */
659
660 error = get_leaf(dip, leaf_no, &obh);
661 if (error)
662 goto fail;
663
664 gfs2_trans_add_bh(dip->i_gl, obh);
665
666 oleaf = (struct gfs2_leaf *)obh->b_data;
667
668 /* Compute the start and len of leaf pointers in the hash table. */
669
670 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
671 half_len = len >> 1;
672 if (!half_len) {
673 gfs2_consist_inode(dip);
674 error = -EIO;
675 goto fail_brelse;
676 }
677
678 start = (index & ~(len - 1));
679
680 /* Change the pointers.
681 Don't bother distinguishing stuffed from non-stuffed.
682 This code is complicated enough already. */
683
684 lp = kcalloc(half_len, sizeof(uint64_t), GFP_KERNEL | __GFP_NOFAIL);
685
686 error = gfs2_jdata_read_mem(dip, (char *)lp, start * sizeof(uint64_t),
687 half_len * sizeof(uint64_t));
688 if (error != half_len * sizeof(uint64_t)) {
689 if (error >= 0)
690 error = -EIO;
691 goto fail_lpfree;
692 }
693
694 /* Change the pointers */
695
696 for (x = 0; x < half_len; x++)
697 lp[x] = cpu_to_be64(bn);
698
699 error = gfs2_jdata_write_mem(dip, (char *)lp, start * sizeof(uint64_t),
700 half_len * sizeof(uint64_t));
701 if (error != half_len * sizeof(uint64_t)) {
702 if (error >= 0)
703 error = -EIO;
704 goto fail_lpfree;
705 }
706
707 kfree(lp);
708
709 /* Compute the divider */
710
711 divider = (start + half_len) << (32 - dip->i_di.di_depth);
712
713 /* Copy the entries */
714
715 dirent_first(dip, obh, &dent);
716
717 do {
718 next = dent;
719 if (dirent_next(dip, obh, &next))
720 next = NULL;
721
722 if (dent->de_inum.no_addr &&
723 be32_to_cpu(dent->de_hash) < divider) {
724 name_len = dent->de_name_len;
725
726 gfs2_dirent_alloc(dip, nbh, name_len, &new);
727
728 new->de_inum = dent->de_inum; /* No endian worries */
729 new->de_hash = dent->de_hash; /* No endian worries */
730 new->de_type = dent->de_type; /* No endian worries */
731 memcpy((char *)(new + 1), (char *)(dent + 1),
732 name_len);
733
734 nleaf->lf_entries = be16_to_cpu(nleaf->lf_entries)+1;
735 nleaf->lf_entries = cpu_to_be16(nleaf->lf_entries);
736
737 dirent_del(dip, obh, prev, dent);
738
739 if (!oleaf->lf_entries)
740 gfs2_consist_inode(dip);
741 oleaf->lf_entries = be16_to_cpu(oleaf->lf_entries)-1;
742 oleaf->lf_entries = cpu_to_be16(oleaf->lf_entries);
743
744 if (!prev)
745 prev = dent;
746
747 moved = 1;
748 } else
749 prev = dent;
750
751 dent = next;
752 }
753 while (dent);
754
755 /* If none of the entries got moved into the new leaf,
756 artificially fill in the first entry. */
757
758 if (!moved) {
759 gfs2_dirent_alloc(dip, nbh, 0, &new);
760 new->de_inum.no_addr = 0;
761 }
762
763 oleaf->lf_depth = be16_to_cpu(oleaf->lf_depth) + 1;
764 oleaf->lf_depth = cpu_to_be16(oleaf->lf_depth);
765 nleaf->lf_depth = oleaf->lf_depth;
766
767 error = gfs2_meta_inode_buffer(dip, &dibh);
768 if (!gfs2_assert_withdraw(dip->i_sbd, !error)) {
769 dip->i_di.di_blocks++;
770 gfs2_dinode_out(&dip->i_di, dibh->b_data);
771 brelse(dibh);
772 }
773
774 brelse(obh);
775 brelse(nbh);
776
777 return error;
778
779 fail_lpfree:
780 kfree(lp);
781
782 fail_brelse:
783 brelse(obh);
784
785 fail:
786 brelse(nbh);
787 return error;
788}
789
790/**
791 * dir_double_exhash - Double size of ExHash table
792 * @dip: The GFS2 dinode
793 *
794 * Returns: 0 on success, error code on failure
795 */
796
797static int dir_double_exhash(struct gfs2_inode *dip)
798{
799 struct gfs2_sbd *sdp = dip->i_sbd;
800 struct buffer_head *dibh;
801 uint32_t hsize;
802 uint64_t *buf;
803 uint64_t *from, *to;
804 uint64_t block;
805 int x;
806 int error = 0;
807
808 hsize = 1 << dip->i_di.di_depth;
809 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
810 gfs2_consist_inode(dip);
811 return -EIO;
812 }
813
814 /* Allocate both the "from" and "to" buffers in one big chunk */
815
816 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
817
818 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
819 error = gfs2_jdata_read_mem(dip, (char *)buf,
820 block * sdp->sd_hash_bsize,
821 sdp->sd_hash_bsize);
822 if (error != sdp->sd_hash_bsize) {
823 if (error >= 0)
824 error = -EIO;
825 goto fail;
826 }
827
828 from = buf;
829 to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
830
831 for (x = sdp->sd_hash_ptrs; x--; from++) {
832 *to++ = *from; /* No endianess worries */
833 *to++ = *from;
834 }
835
836 error = gfs2_jdata_write_mem(dip,
837 (char *)buf + sdp->sd_hash_bsize,
838 block * sdp->sd_sb.sb_bsize,
839 sdp->sd_sb.sb_bsize);
840 if (error != sdp->sd_sb.sb_bsize) {
841 if (error >= 0)
842 error = -EIO;
843 goto fail;
844 }
845 }
846
847 kfree(buf);
848
849 error = gfs2_meta_inode_buffer(dip, &dibh);
850 if (!gfs2_assert_withdraw(sdp, !error)) {
851 dip->i_di.di_depth++;
852 gfs2_dinode_out(&dip->i_di, dibh->b_data);
853 brelse(dibh);
854 }
855
856 return error;
857
858 fail:
859 kfree(buf);
860
861 return error;
862}
863
864/**
865 * compare_dents - compare directory entries by hash value
866 * @a: first dent
867 * @b: second dent
868 *
869 * When comparing the hash entries of @a to @b:
870 * gt: returns 1
871 * lt: returns -1
872 * eq: returns 0
873 */
874
875static int compare_dents(const void *a, const void *b)
876{
877 struct gfs2_dirent *dent_a, *dent_b;
878 uint32_t hash_a, hash_b;
879 int ret = 0;
880
881 dent_a = *(struct gfs2_dirent **)a;
882 hash_a = dent_a->de_hash;
883 hash_a = be32_to_cpu(hash_a);
884
885 dent_b = *(struct gfs2_dirent **)b;
886 hash_b = dent_b->de_hash;
887 hash_b = be32_to_cpu(hash_b);
888
889 if (hash_a > hash_b)
890 ret = 1;
891 else if (hash_a < hash_b)
892 ret = -1;
893 else {
894 unsigned int len_a = dent_a->de_name_len;
895 unsigned int len_b = dent_b->de_name_len;
896
897 if (len_a > len_b)
898 ret = 1;
899 else if (len_a < len_b)
900 ret = -1;
901 else
902 ret = memcmp((char *)(dent_a + 1),
903 (char *)(dent_b + 1),
904 len_a);
905 }
906
907 return ret;
908}
909
910/**
911 * do_filldir_main - read out directory entries
912 * @dip: The GFS2 inode
913 * @offset: The offset in the file to read from
914 * @opaque: opaque data to pass to filldir
915 * @filldir: The function to pass entries to
916 * @darr: an array of struct gfs2_dirent pointers to read
917 * @entries: the number of entries in darr
918 * @copied: pointer to int that's non-zero if a entry has been copied out
919 *
920 * Jump through some hoops to make sure that if there are hash collsions,
921 * they are read out at the beginning of a buffer. We want to minimize
922 * the possibility that they will fall into different readdir buffers or
923 * that someone will want to seek to that location.
924 *
925 * Returns: errno, >0 on exception from filldir
926 */
927
928static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
929 void *opaque, gfs2_filldir_t filldir,
930 struct gfs2_dirent **darr, uint32_t entries,
931 int *copied)
932{
933 struct gfs2_dirent *dent, *dent_next;
934 struct gfs2_inum inum;
935 uint64_t off, off_next;
936 unsigned int x, y;
937 int run = 0;
938 int error = 0;
939
940 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
941
942 dent_next = darr[0];
943 off_next = be32_to_cpu(dent_next->de_hash);
944 off_next = gfs2_disk_hash2offset(off_next);
945
946 for (x = 0, y = 1; x < entries; x++, y++) {
947 dent = dent_next;
948 off = off_next;
949
950 if (y < entries) {
951 dent_next = darr[y];
952 off_next = be32_to_cpu(dent_next->de_hash);
953 off_next = gfs2_disk_hash2offset(off_next);
954
955 if (off < *offset)
956 continue;
957 *offset = off;
958
959 if (off_next == off) {
960 if (*copied && !run)
961 return 1;
962 run = 1;
963 } else
964 run = 0;
965 } else {
966 if (off < *offset)
967 continue;
968 *offset = off;
969 }
970
971 gfs2_inum_in(&inum, (char *)&dent->de_inum);
972
973 error = filldir(opaque, (char *)(dent + 1),
974 dent->de_name_len,
975 off, &inum,
976 dent->de_type);
977 if (error)
978 return 1;
979
980 *copied = 1;
981 }
982
983 /* Increment the *offset by one, so the next time we come into the
984 do_filldir fxn, we get the next entry instead of the last one in the
985 current leaf */
986
987 (*offset)++;
988
989 return 0;
990}
991
992/**
993 * do_filldir_single - Read directory entries out of a single block
994 * @dip: The GFS2 inode
995 * @offset: The offset in the file to read from
996 * @opaque: opaque data to pass to filldir
997 * @filldir: The function to pass entries to
998 * @bh: the block
999 * @entries: the number of entries in the block
1000 * @copied: pointer to int that's non-zero if a entry has been copied out
1001 *
1002 * Returns: errno, >0 on exception from filldir
1003 */
1004
1005static int do_filldir_single(struct gfs2_inode *dip, uint64_t *offset,
1006 void *opaque, gfs2_filldir_t filldir,
1007 struct buffer_head *bh, uint32_t entries,
1008 int *copied)
1009{
1010 struct gfs2_dirent **darr;
1011 struct gfs2_dirent *de;
1012 unsigned int e = 0;
1013 int error;
1014
1015 if (!entries)
1016 return 0;
1017
1018 darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
1019 if (!darr)
1020 return -ENOMEM;
1021
1022 dirent_first(dip, bh, &de);
1023 do {
1024 if (!de->de_inum.no_addr)
1025 continue;
1026 if (e >= entries) {
1027 gfs2_consist_inode(dip);
1028 error = -EIO;
1029 goto out;
1030 }
1031 darr[e++] = de;
1032 }
1033 while (dirent_next(dip, bh, &de) == 0);
1034
1035 if (e != entries) {
1036 gfs2_consist_inode(dip);
1037 error = -EIO;
1038 goto out;
1039 }
1040
1041 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1042 entries, copied);
1043
1044 out:
1045 kfree(darr);
1046
1047 return error;
1048}
1049
1050/**
1051 * do_filldir_multi - Read directory entries out of a linked leaf list
1052 * @dip: The GFS2 inode
1053 * @offset: The offset in the file to read from
1054 * @opaque: opaque data to pass to filldir
1055 * @filldir: The function to pass entries to
1056 * @bh: the first leaf in the list
1057 * @copied: pointer to int that's non-zero if a entry has been copied out
1058 *
1059 * Returns: errno, >0 on exception from filldir
1060 */
1061
1062static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset,
1063 void *opaque, gfs2_filldir_t filldir,
1064 struct buffer_head *bh, int *copied)
1065{
1066 struct buffer_head **larr = NULL;
1067 struct gfs2_dirent **darr;
1068 struct gfs2_leaf *leaf;
1069 struct buffer_head *tmp_bh;
1070 struct gfs2_dirent *de;
1071 unsigned int entries, e = 0;
1072 unsigned int leaves = 0, l = 0;
1073 unsigned int x;
1074 uint64_t ln;
1075 int error = 0;
1076
1077 /* Count leaves and entries */
1078
1079 leaf = (struct gfs2_leaf *)bh->b_data;
1080 entries = be16_to_cpu(leaf->lf_entries);
1081 ln = leaf->lf_next;
1082
1083 while (ln) {
1084 ln = be64_to_cpu(ln);
1085
1086 error = get_leaf(dip, ln, &tmp_bh);
1087 if (error)
1088 return error;
1089
1090 leaf = (struct gfs2_leaf *)tmp_bh->b_data;
1091 if (leaf->lf_entries) {
1092 entries += be16_to_cpu(leaf->lf_entries);
1093 leaves++;
1094 }
1095 ln = leaf->lf_next;
1096
1097 brelse(tmp_bh);
1098 }
1099
1100 if (!entries)
1101 return 0;
1102
1103 if (leaves) {
1104 larr = kcalloc(leaves, sizeof(struct buffer_head *),GFP_KERNEL);
1105 if (!larr)
1106 return -ENOMEM;
1107 }
1108
1109 darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
1110 if (!darr) {
1111 kfree(larr);
1112 return -ENOMEM;
1113 }
1114
1115 leaf = (struct gfs2_leaf *)bh->b_data;
1116 if (leaf->lf_entries) {
1117 dirent_first(dip, bh, &de);
1118 do {
1119 if (!de->de_inum.no_addr)
1120 continue;
1121 if (e >= entries) {
1122 gfs2_consist_inode(dip);
1123 error = -EIO;
1124 goto out;
1125 }
1126 darr[e++] = de;
1127 }
1128 while (dirent_next(dip, bh, &de) == 0);
1129 }
1130 ln = leaf->lf_next;
1131
1132 while (ln) {
1133 ln = be64_to_cpu(ln);
1134
1135 error = get_leaf(dip, ln, &tmp_bh);
1136 if (error)
1137 goto out;
1138
1139 leaf = (struct gfs2_leaf *)tmp_bh->b_data;
1140 if (leaf->lf_entries) {
1141 dirent_first(dip, tmp_bh, &de);
1142 do {
1143 if (!de->de_inum.no_addr)
1144 continue;
1145 if (e >= entries) {
1146 gfs2_consist_inode(dip);
1147 error = -EIO;
1148 goto out;
1149 }
1150 darr[e++] = de;
1151 }
1152 while (dirent_next(dip, tmp_bh, &de) == 0);
1153
1154 larr[l++] = tmp_bh;
1155
1156 ln = leaf->lf_next;
1157 } else {
1158 ln = leaf->lf_next;
1159 brelse(tmp_bh);
1160 }
1161 }
1162
1163 if (gfs2_assert_withdraw(dip->i_sbd, l == leaves)) {
1164 error = -EIO;
1165 goto out;
1166 }
1167 if (e != entries) {
1168 gfs2_consist_inode(dip);
1169 error = -EIO;
1170 goto out;
1171 }
1172
1173 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1174 entries, copied);
1175
1176 out:
1177 kfree(darr);
1178 for (x = 0; x < l; x++)
1179 brelse(larr[x]);
1180 kfree(larr);
1181
1182 return error;
1183}
1184
1185/**
1186 * dir_e_search - Search exhash (leaf) dir for inode matching name
1187 * @dip: The GFS2 inode
1188 * @filename: Filename string
1189 * @inode: If non-NULL, function fills with formal inode # and block address
1190 * @type: If non-NULL, function fills with DT_... dinode type
1191 *
1192 * Returns:
1193 */
1194
1195static int dir_e_search(struct gfs2_inode *dip, struct qstr *filename,
1196 struct gfs2_inum *inum, unsigned int *type)
1197{
1198 struct buffer_head *bh;
1199 struct gfs2_dirent *dent;
1200 int error;
1201
1202 error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
1203 if (error)
1204 return error;
1205
1206 if (inum)
1207 gfs2_inum_in(inum, (char *)&dent->de_inum);
1208 if (type)
1209 *type = dent->de_type;
1210
1211 brelse(bh);
1212
1213 return 0;
1214}
1215
1216static int dir_e_add(struct gfs2_inode *dip, struct qstr *filename,
1217 struct gfs2_inum *inum, unsigned int type)
1218{
1219 struct buffer_head *bh, *nbh, *dibh;
1220 struct gfs2_leaf *leaf, *nleaf;
1221 struct gfs2_dirent *dent;
1222 uint32_t hsize, index;
1223 uint32_t hash;
1224 uint64_t leaf_no, bn;
1225 int error;
1226
1227 restart:
1228 hsize = 1 << dip->i_di.di_depth;
1229 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1230 gfs2_consist_inode(dip);
1231 return -EIO;
1232 }
1233
1234 /* Figure out the address of the leaf node. */
1235
1236 hash = gfs2_disk_hash(filename->name, filename->len);
1237 index = hash >> (32 - dip->i_di.di_depth);
1238
1239 error = get_leaf_nr(dip, index, &leaf_no);
1240 if (error)
1241 return error;
1242
1243 /* Add entry to the leaf */
1244
1245 for (;;) {
1246 error = get_leaf(dip, leaf_no, &bh);
1247 if (error)
1248 return error;
1249
1250 leaf = (struct gfs2_leaf *)bh->b_data;
1251
1252 if (gfs2_dirent_alloc(dip, bh, filename->len, &dent)) {
1253
1254 if (be16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) {
1255 /* Can we split the leaf? */
1256
1257 brelse(bh);
1258
1259 error = dir_split_leaf(dip, index, leaf_no);
1260 if (error)
1261 return error;
1262
1263 goto restart;
1264
1265 } else if (dip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
1266 /* Can we double the hash table? */
1267
1268 brelse(bh);
1269
1270 error = dir_double_exhash(dip);
1271 if (error)
1272 return error;
1273
1274 goto restart;
1275
1276 } else if (leaf->lf_next) {
1277 /* Can we try the next leaf in the list? */
1278 leaf_no = be64_to_cpu(leaf->lf_next);
1279 brelse(bh);
1280 continue;
1281
1282 } else {
1283 /* Create a new leaf and add it to the list. */
1284
1285 bn = gfs2_alloc_meta(dip);
1286
1287 nbh = gfs2_meta_new(dip->i_gl, bn);
1288 gfs2_trans_add_bh(dip->i_gl, nbh);
1289 gfs2_metatype_set(nbh,
1290 GFS2_METATYPE_LF,
1291 GFS2_FORMAT_LF);
1292 gfs2_buffer_clear_tail(nbh,
1293 sizeof(struct gfs2_meta_header));
1294
1295 gfs2_trans_add_bh(dip->i_gl, bh);
1296 leaf->lf_next = cpu_to_be64(bn);
1297
1298 nleaf = (struct gfs2_leaf *)nbh->b_data;
1299 nleaf->lf_depth = leaf->lf_depth;
1300 nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
1301
1302 gfs2_dirent_alloc(dip, nbh, filename->len,
1303 &dent);
1304
1305 dip->i_di.di_blocks++;
1306
1307 brelse(bh);
1308
1309 bh = nbh;
1310 leaf = nleaf;
1311 }
1312 }
1313
1314 /* If the gfs2_dirent_alloc() succeeded, it pinned the "bh" */
1315
1316 gfs2_inum_out(inum, (char *)&dent->de_inum);
1317 dent->de_hash = cpu_to_be32(hash);
1318 dent->de_type = type;
1319 memcpy((char *)(dent + 1), filename->name, filename->len);
1320
1321 leaf->lf_entries = be16_to_cpu(leaf->lf_entries) + 1;
1322 leaf->lf_entries = cpu_to_be16(leaf->lf_entries);
1323
1324 brelse(bh);
1325
1326 error = gfs2_meta_inode_buffer(dip, &dibh);
1327 if (error)
1328 return error;
1329
1330 dip->i_di.di_entries++;
1331 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1332
1333 gfs2_trans_add_bh(dip->i_gl, dibh);
1334 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1335 brelse(dibh);
1336
1337 return 0;
1338 }
1339
1340 return -ENOENT;
1341}
1342
1343static int dir_e_del(struct gfs2_inode *dip, struct qstr *filename)
1344{
1345 struct buffer_head *bh, *dibh;
1346 struct gfs2_dirent *dent, *prev;
1347 struct gfs2_leaf *leaf;
1348 unsigned int entries;
1349 int error;
1350
1351 error = linked_leaf_search(dip, filename, &dent, &prev, &bh);
1352 if (error == -ENOENT) {
1353 gfs2_consist_inode(dip);
1354 return -EIO;
1355 }
1356 if (error)
1357 return error;
1358
1359 dirent_del(dip, bh, prev, dent); /* Pins bh */
1360
1361 leaf = (struct gfs2_leaf *)bh->b_data;
1362 entries = be16_to_cpu(leaf->lf_entries);
1363 if (!entries)
1364 gfs2_consist_inode(dip);
1365 entries--;
1366 leaf->lf_entries = cpu_to_be16(entries);
1367
1368 brelse(bh);
1369
1370 error = gfs2_meta_inode_buffer(dip, &dibh);
1371 if (error)
1372 return error;
1373
1374 if (!dip->i_di.di_entries)
1375 gfs2_consist_inode(dip);
1376 dip->i_di.di_entries--;
1377 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1378
1379 gfs2_trans_add_bh(dip->i_gl, dibh);
1380 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1381 brelse(dibh);
1382
1383 return 0;
1384}
1385
1386/**
1387 * dir_e_read - Reads the entries from a directory into a filldir buffer
1388 * @dip: dinode pointer
1389 * @offset: the hash of the last entry read shifted to the right once
1390 * @opaque: buffer for the filldir function to fill
1391 * @filldir: points to the filldir function to use
1392 *
1393 * Returns: errno
1394 */
1395
1396static int dir_e_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1397 gfs2_filldir_t filldir)
1398{
1399 struct gfs2_sbd *sdp = dip->i_sbd;
1400 struct buffer_head *bh;
1401 struct gfs2_leaf leaf;
1402 uint32_t hsize, len;
1403 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1404 uint32_t hash, index;
1405 uint64_t *lp;
1406 int copied = 0;
1407 int error = 0;
1408
1409 hsize = 1 << dip->i_di.di_depth;
1410 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1411 gfs2_consist_inode(dip);
1412 return -EIO;
1413 }
1414
1415 hash = gfs2_dir_offset2hash(*offset);
1416 index = hash >> (32 - dip->i_di.di_depth);
1417
1418 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1419 if (!lp)
1420 return -ENOMEM;
1421
1422 while (index < hsize) {
1423 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1424 ht_offset = index - lp_offset;
1425
1426 if (ht_offset_cur != ht_offset) {
1427 error = gfs2_jdata_read_mem(dip, (char *)lp,
1428 ht_offset * sizeof(uint64_t),
1429 sdp->sd_hash_bsize);
1430 if (error != sdp->sd_hash_bsize) {
1431 if (error >= 0)
1432 error = -EIO;
1433 goto out;
1434 }
1435 ht_offset_cur = ht_offset;
1436 }
1437
1438 error = get_leaf(dip, be64_to_cpu(lp[lp_offset]), &bh);
1439 if (error)
1440 goto out;
1441
1442 gfs2_leaf_in(&leaf, bh->b_data);
1443
1444 if (leaf.lf_next)
1445 error = do_filldir_multi(dip, offset, opaque, filldir,
1446 bh, &copied);
1447 else
1448 error = do_filldir_single(dip, offset, opaque, filldir,
1449 bh, leaf.lf_entries, &copied);
1450
1451 brelse(bh);
1452
1453 if (error) {
1454 if (error > 0)
1455 error = 0;
1456 goto out;
1457 }
1458
1459 len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
1460 index = (index & ~(len - 1)) + len;
1461 }
1462
1463 out:
1464 kfree(lp);
1465
1466 return error;
1467}
1468
1469static int dir_e_mvino(struct gfs2_inode *dip, struct qstr *filename,
1470 struct gfs2_inum *inum, unsigned int new_type)
1471{
1472 struct buffer_head *bh, *dibh;
1473 struct gfs2_dirent *dent;
1474 int error;
1475
1476 error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
1477 if (error == -ENOENT) {
1478 gfs2_consist_inode(dip);
1479 return -EIO;
1480 }
1481 if (error)
1482 return error;
1483
1484 gfs2_trans_add_bh(dip->i_gl, bh);
1485
1486 gfs2_inum_out(inum, (char *)&dent->de_inum);
1487 dent->de_type = new_type;
1488
1489 brelse(bh);
1490
1491 error = gfs2_meta_inode_buffer(dip, &dibh);
1492 if (error)
1493 return error;
1494
1495 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1496
1497 gfs2_trans_add_bh(dip->i_gl, dibh);
1498 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1499 brelse(dibh);
1500
1501 return 0;
1502}
1503
1504/**
1505 * dir_l_search - Search linear (stuffed dinode) dir for inode matching name
1506 * @dip: The GFS2 inode
1507 * @filename: Filename string
1508 * @inode: If non-NULL, function fills with formal inode # and block address
1509 * @type: If non-NULL, function fills with DT_... dinode type
1510 *
1511 * Returns:
1512 */
1513
1514static int dir_l_search(struct gfs2_inode *dip, struct qstr *filename,
1515 struct gfs2_inum *inum, unsigned int *type)
1516{
1517 struct buffer_head *dibh;
1518 struct gfs2_dirent *dent;
1519 int error;
1520
1521 if (!gfs2_is_stuffed(dip)) {
1522 gfs2_consist_inode(dip);
1523 return -EIO;
1524 }
1525
1526 error = gfs2_meta_inode_buffer(dip, &dibh);
1527 if (error)
1528 return error;
1529
1530 error = leaf_search(dip, dibh, filename, &dent, NULL);
1531 if (!error) {
1532 if (inum)
1533 gfs2_inum_in(inum, (char *)&dent->de_inum);
1534 if (type)
1535 *type = dent->de_type;
1536 }
1537
1538 brelse(dibh);
1539
1540 return error;
1541}
1542
1543static int dir_l_add(struct gfs2_inode *dip, struct qstr *filename,
1544 struct gfs2_inum *inum, unsigned int type)
1545{
1546 struct buffer_head *dibh;
1547 struct gfs2_dirent *dent;
1548 int error;
1549
1550 if (!gfs2_is_stuffed(dip)) {
1551 gfs2_consist_inode(dip);
1552 return -EIO;
1553 }
1554
1555 error = gfs2_meta_inode_buffer(dip, &dibh);
1556 if (error)
1557 return error;
1558
1559 if (gfs2_dirent_alloc(dip, dibh, filename->len, &dent)) {
1560 brelse(dibh);
1561
1562 error = dir_make_exhash(dip);
1563 if (!error)
1564 error = dir_e_add(dip, filename, inum, type);
1565
1566 return error;
1567 }
1568
1569 /* gfs2_dirent_alloc() pins */
1570
1571 gfs2_inum_out(inum, (char *)&dent->de_inum);
1572 dent->de_hash = gfs2_disk_hash(filename->name, filename->len);
1573 dent->de_hash = cpu_to_be32(dent->de_hash);
1574 dent->de_type = type;
1575 memcpy((char *)(dent + 1), filename->name, filename->len);
1576
1577 dip->i_di.di_entries++;
1578 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1579
1580 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1581 brelse(dibh);
1582
1583 return 0;
1584}
1585
1586static int dir_l_del(struct gfs2_inode *dip, struct qstr *filename)
1587{
1588 struct buffer_head *dibh;
1589 struct gfs2_dirent *dent, *prev;
1590 int error;
1591
1592 if (!gfs2_is_stuffed(dip)) {
1593 gfs2_consist_inode(dip);
1594 return -EIO;
1595 }
1596
1597 error = gfs2_meta_inode_buffer(dip, &dibh);
1598 if (error)
1599 return error;
1600
1601 error = leaf_search(dip, dibh, filename, &dent, &prev);
1602 if (error == -ENOENT) {
1603 gfs2_consist_inode(dip);
1604 error = -EIO;
1605 goto out;
1606 }
1607 if (error)
1608 goto out;
1609
1610 dirent_del(dip, dibh, prev, dent);
1611
1612 /* dirent_del() pins */
1613
1614 if (!dip->i_di.di_entries)
1615 gfs2_consist_inode(dip);
1616 dip->i_di.di_entries--;
1617
1618 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1619
1620 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1621
1622 out:
1623 brelse(dibh);
1624
1625 return error;
1626}
1627
1628static int dir_l_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1629 gfs2_filldir_t filldir)
1630{
1631 struct buffer_head *dibh;
1632 int copied = 0;
1633 int error;
1634
1635 if (!gfs2_is_stuffed(dip)) {
1636 gfs2_consist_inode(dip);
1637 return -EIO;
1638 }
1639
1640 if (!dip->i_di.di_entries)
1641 return 0;
1642
1643 error = gfs2_meta_inode_buffer(dip, &dibh);
1644 if (error)
1645 return error;
1646
1647 error = do_filldir_single(dip, offset,
1648 opaque, filldir,
1649 dibh, dip->i_di.di_entries,
1650 &copied);
1651 if (error > 0)
1652 error = 0;
1653
1654 brelse(dibh);
1655
1656 return error;
1657}
1658
1659static int dir_l_mvino(struct gfs2_inode *dip, struct qstr *filename,
1660 struct gfs2_inum *inum, unsigned int new_type)
1661{
1662 struct buffer_head *dibh;
1663 struct gfs2_dirent *dent;
1664 int error;
1665
1666 if (!gfs2_is_stuffed(dip)) {
1667 gfs2_consist_inode(dip);
1668 return -EIO;
1669 }
1670
1671 error = gfs2_meta_inode_buffer(dip, &dibh);
1672 if (error)
1673 return error;
1674
1675 error = leaf_search(dip, dibh, filename, &dent, NULL);
1676 if (error == -ENOENT) {
1677 gfs2_consist_inode(dip);
1678 error = -EIO;
1679 goto out;
1680 }
1681 if (error)
1682 goto out;
1683
1684 gfs2_trans_add_bh(dip->i_gl, dibh);
1685
1686 gfs2_inum_out(inum, (char *)&dent->de_inum);
1687 dent->de_type = new_type;
1688
1689 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1690
1691 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1692
1693 out:
1694 brelse(dibh);
1695
1696 return error;
1697}
1698
1699/**
1700 * gfs2_dir_search - Search a directory
1701 * @dip: The GFS2 inode
1702 * @filename:
1703 * @inode:
1704 *
1705 * This routine searches a directory for a file or another directory.
1706 * Assumes a glock is held on dip.
1707 *
1708 * Returns: errno
1709 */
1710
1711int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename,
1712 struct gfs2_inum *inum, unsigned int *type)
1713{
1714 int error;
1715
1716 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1717 error = dir_e_search(dip, filename, inum, type);
1718 else
1719 error = dir_l_search(dip, filename, inum, type);
1720
1721 return error;
1722}
1723
1724/**
1725 * gfs2_dir_add - Add new filename into directory
1726 * @dip: The GFS2 inode
1727 * @filename: The new name
1728 * @inode: The inode number of the entry
1729 * @type: The type of the entry
1730 *
1731 * Returns: 0 on success, error code on failure
1732 */
1733
1734int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename,
1735 struct gfs2_inum *inum, unsigned int type)
1736{
1737 int error;
1738
1739 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1740 error = dir_e_add(dip, filename, inum, type);
1741 else
1742 error = dir_l_add(dip, filename, inum, type);
1743
1744 return error;
1745}
1746
1747/**
1748 * gfs2_dir_del - Delete a directory entry
1749 * @dip: The GFS2 inode
1750 * @filename: The filename
1751 *
1752 * Returns: 0 on success, error code on failure
1753 */
1754
1755int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename)
1756{
1757 int error;
1758
1759 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1760 error = dir_e_del(dip, filename);
1761 else
1762 error = dir_l_del(dip, filename);
1763
1764 return error;
1765}
1766
1767int gfs2_dir_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1768 gfs2_filldir_t filldir)
1769{
1770 int error;
1771
1772 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1773 error = dir_e_read(dip, offset, opaque, filldir);
1774 else
1775 error = dir_l_read(dip, offset, opaque, filldir);
1776
1777 return error;
1778}
1779
1780/**
1781 * gfs2_dir_mvino - Change inode number of directory entry
1782 * @dip: The GFS2 inode
1783 * @filename:
1784 * @new_inode:
1785 *
1786 * This routine changes the inode number of a directory entry. It's used
1787 * by rename to change ".." when a directory is moved.
1788 * Assumes a glock is held on dvp.
1789 *
1790 * Returns: errno
1791 */
1792
1793int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename,
1794 struct gfs2_inum *inum, unsigned int new_type)
1795{
1796 int error;
1797
1798 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1799 error = dir_e_mvino(dip, filename, inum, new_type);
1800 else
1801 error = dir_l_mvino(dip, filename, inum, new_type);
1802
1803 return error;
1804}
1805
1806/**
1807 * foreach_leaf - call a function for each leaf in a directory
1808 * @dip: the directory
1809 * @lc: the function to call for each each
1810 * @data: private data to pass to it
1811 *
1812 * Returns: errno
1813 */
1814
1815static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1816{
1817 struct gfs2_sbd *sdp = dip->i_sbd;
1818 struct buffer_head *bh;
1819 struct gfs2_leaf leaf;
1820 uint32_t hsize, len;
1821 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1822 uint32_t index = 0;
1823 uint64_t *lp;
1824 uint64_t leaf_no;
1825 int error = 0;
1826
1827 hsize = 1 << dip->i_di.di_depth;
1828 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1829 gfs2_consist_inode(dip);
1830 return -EIO;
1831 }
1832
1833 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1834 if (!lp)
1835 return -ENOMEM;
1836
1837 while (index < hsize) {
1838 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1839 ht_offset = index - lp_offset;
1840
1841 if (ht_offset_cur != ht_offset) {
1842 error = gfs2_jdata_read_mem(dip, (char *)lp,
1843 ht_offset * sizeof(uint64_t),
1844 sdp->sd_hash_bsize);
1845 if (error != sdp->sd_hash_bsize) {
1846 if (error >= 0)
1847 error = -EIO;
1848 goto out;
1849 }
1850 ht_offset_cur = ht_offset;
1851 }
1852
1853 leaf_no = be64_to_cpu(lp[lp_offset]);
1854 if (leaf_no) {
1855 error = get_leaf(dip, leaf_no, &bh);
1856 if (error)
1857 goto out;
1858 gfs2_leaf_in(&leaf, bh->b_data);
1859 brelse(bh);
1860
1861 len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
1862
1863 error = lc(dip, index, len, leaf_no, data);
1864 if (error)
1865 goto out;
1866
1867 index = (index & ~(len - 1)) + len;
1868 } else
1869 index++;
1870 }
1871
1872 if (index != hsize) {
1873 gfs2_consist_inode(dip);
1874 error = -EIO;
1875 }
1876
1877 out:
1878 kfree(lp);
1879
1880 return error;
1881}
1882
1883/**
1884 * leaf_dealloc - Deallocate a directory leaf
1885 * @dip: the directory
1886 * @index: the hash table offset in the directory
1887 * @len: the number of pointers to this leaf
1888 * @leaf_no: the leaf number
1889 * @data: not used
1890 *
1891 * Returns: errno
1892 */
1893
1894static int leaf_dealloc(struct gfs2_inode *dip, uint32_t index, uint32_t len,
1895 uint64_t leaf_no, void *data)
1896{
1897 struct gfs2_sbd *sdp = dip->i_sbd;
1898 struct gfs2_leaf tmp_leaf;
1899 struct gfs2_rgrp_list rlist;
1900 struct buffer_head *bh, *dibh;
1901 uint64_t blk;
1902 unsigned int rg_blocks = 0, l_blocks = 0;
1903 char *ht;
1904 unsigned int x, size = len * sizeof(uint64_t);
1905 int error;
1906
1907 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1908
1909 ht = kzalloc(size, GFP_KERNEL);
1910 if (!ht)
1911 return -ENOMEM;
1912
1913 gfs2_alloc_get(dip);
1914
1915 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1916 if (error)
1917 goto out;
1918
1919 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
1920 if (error)
1921 goto out_qs;
1922
1923 /* Count the number of leaves */
1924
1925 for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
1926 error = get_leaf(dip, blk, &bh);
1927 if (error)
1928 goto out_rlist;
1929 gfs2_leaf_in(&tmp_leaf, (bh)->b_data);
1930 brelse(bh);
1931
1932 gfs2_rlist_add(sdp, &rlist, blk);
1933 l_blocks++;
1934 }
1935
1936 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1937
1938 for (x = 0; x < rlist.rl_rgrps; x++) {
1939 struct gfs2_rgrpd *rgd;
1940 rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
1941 rg_blocks += rgd->rd_ri.ri_length;
1942 }
1943
1944 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1945 if (error)
1946 goto out_rlist;
1947
1948 error = gfs2_trans_begin(sdp,
1949 rg_blocks + (DIV_RU(size, sdp->sd_jbsize) + 1) +
1950 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
1951 if (error)
1952 goto out_rg_gunlock;
1953
1954 for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
1955 error = get_leaf(dip, blk, &bh);
1956 if (error)
1957 goto out_end_trans;
1958 gfs2_leaf_in(&tmp_leaf, bh->b_data);
1959 brelse(bh);
1960
1961 gfs2_free_meta(dip, blk, 1);
1962
1963 if (!dip->i_di.di_blocks)
1964 gfs2_consist_inode(dip);
1965 dip->i_di.di_blocks--;
1966 }
1967
1968 error = gfs2_jdata_write_mem(dip, ht, index * sizeof(uint64_t), size);
1969 if (error != size) {
1970 if (error >= 0)
1971 error = -EIO;
1972 goto out_end_trans;
1973 }
1974
1975 error = gfs2_meta_inode_buffer(dip, &dibh);
1976 if (error)
1977 goto out_end_trans;
1978
1979 gfs2_trans_add_bh(dip->i_gl, dibh);
1980 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1981 brelse(dibh);
1982
1983 out_end_trans:
1984 gfs2_trans_end(sdp);
1985
1986 out_rg_gunlock:
1987 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1988
1989 out_rlist:
1990 gfs2_rlist_free(&rlist);
1991 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
1992
1993 out_qs:
1994 gfs2_quota_unhold(dip);
1995
1996 out:
1997 gfs2_alloc_put(dip);
1998 kfree(ht);
1999
2000 return error;
2001}
2002
2003/**
2004 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
2005 * @dip: the directory
2006 *
2007 * Dealloc all on-disk directory leaves to FREEMETA state
2008 * Change on-disk inode type to "regular file"
2009 *
2010 * Returns: errno
2011 */
2012
2013int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
2014{
2015 struct gfs2_sbd *sdp = dip->i_sbd;
2016 struct buffer_head *bh;
2017 int error;
2018
2019 /* Dealloc on-disk leaves to FREEMETA state */
2020 error = foreach_leaf(dip, leaf_dealloc, NULL);
2021 if (error)
2022 return error;
2023
2024 /* Make this a regular file in case we crash.
2025 (We don't want to free these blocks a second time.) */
2026
2027 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2028 if (error)
2029 return error;
2030
2031 error = gfs2_meta_inode_buffer(dip, &bh);
2032 if (!error) {
2033 gfs2_trans_add_bh(dip->i_gl, bh);
2034 ((struct gfs2_dinode *)bh->b_data)->di_mode = cpu_to_be32(S_IFREG);
2035 brelse(bh);
2036 }
2037
2038 gfs2_trans_end(sdp);
2039
2040 return error;
2041}
2042
2043/**
2044 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
2045 * @ip: the file being written to
2046 * @filname: the filename that's going to be added
2047 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
2048 *
2049 * Returns: errno
2050 */
2051
2052int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
2053 int *alloc_required)
2054{
2055 struct buffer_head *bh = NULL, *bh_next;
2056 uint32_t hsize, hash, index;
2057 int error = 0;
2058
2059 *alloc_required = 0;
2060
2061 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
2062 hsize = 1 << dip->i_di.di_depth;
2063 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
2064 gfs2_consist_inode(dip);
2065 return -EIO;
2066 }
2067
2068 hash = gfs2_disk_hash(filename->name, filename->len);
2069 index = hash >> (32 - dip->i_di.di_depth);
2070
2071 error = get_first_leaf(dip, index, &bh_next);
2072 if (error)
2073 return error;
2074
2075 do {
2076 brelse(bh);
2077
2078 bh = bh_next;
2079
2080 if (dirent_fits(dip, bh, filename->len))
2081 break;
2082
2083 error = get_next_leaf(dip, bh, &bh_next);
2084 if (error == -ENOENT) {
2085 *alloc_required = 1;
2086 error = 0;
2087 break;
2088 }
2089 }
2090 while (!error);
2091
2092 brelse(bh);
2093 } else {
2094 error = gfs2_meta_inode_buffer(dip, &bh);
2095 if (error)
2096 return error;
2097
2098 if (!dirent_fits(dip, bh, filename->len))
2099 *alloc_required = 1;
2100
2101 brelse(bh);
2102 }
2103
2104 return error;
2105}
2106
2107/**
2108 * do_gdm - copy out one leaf (or list of leaves)
2109 * @dip: the directory
2110 * @index: the hash table offset in the directory
2111 * @len: the number of pointers to this leaf
2112 * @leaf_no: the leaf number
2113 * @data: a pointer to a struct gfs2_user_buffer structure
2114 *
2115 * Returns: errno
2116 */
2117
2118static int do_gdm(struct gfs2_inode *dip, uint32_t index, uint32_t len,
2119 uint64_t leaf_no, void *data)
2120{
2121 struct gfs2_user_buffer *ub = (struct gfs2_user_buffer *)data;
2122 struct gfs2_leaf leaf;
2123 struct buffer_head *bh;
2124 uint64_t blk;
2125 int error = 0;
2126
2127 for (blk = leaf_no; blk; blk = leaf.lf_next) {
2128 error = get_leaf(dip, blk, &bh);
2129 if (error)
2130 break;
2131
2132 gfs2_leaf_in(&leaf, bh->b_data);
2133
2134 error = gfs2_add_bh_to_ub(ub, bh);
2135
2136 brelse(bh);
2137
2138 if (error)
2139 break;
2140 }
2141
2142 return error;
2143}
2144
2145/**
2146 * gfs2_get_dir_meta - return all the leaf blocks of a directory
2147 * @dip: the directory
2148 * @ub: the structure representing the meta
2149 *
2150 * Returns: errno
2151 */
2152
2153int gfs2_get_dir_meta(struct gfs2_inode *dip, struct gfs2_user_buffer *ub)
2154{
2155 return foreach_leaf(dip, do_gdm, ub);
2156}
2157
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
new file mode 100644
index 000000000000..79f77aab4264
--- /dev/null
+++ b/fs/gfs2/dir.h
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DIR_DOT_H__
11#define __DIR_DOT_H__
12
13/**
14 * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
15 * @opaque: opaque data used by the function
16 * @name: the name of the directory entry
17 * @length: the length of the name
18 * @offset: the entry's offset in the directory
19 * @inum: the inode number the entry points to
20 * @type: the type of inode the entry points to
21 *
22 * Returns: 0 on success, 1 if buffer full
23 */
24
25typedef int (*gfs2_filldir_t) (void *opaque,
26 const char *name, unsigned int length,
27 uint64_t offset,
28 struct gfs2_inum *inum, unsigned int type);
29
30int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2);
31int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh,
32 int name_len, struct gfs2_dirent **dent_out);
33
34int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename,
35 struct gfs2_inum *inum, unsigned int *type);
36int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename,
37 struct gfs2_inum *inum, unsigned int type);
38int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename);
39int gfs2_dir_read(struct gfs2_inode *dip, uint64_t * offset, void *opaque,
40 gfs2_filldir_t filldir);
41int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename,
42 struct gfs2_inum *new_inum, unsigned int new_type);
43
44int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
45
46int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
47 int *alloc_required);
48
49int gfs2_get_dir_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub);
50
51#endif /* __DIR_DOT_H__ */
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
new file mode 100644
index 000000000000..2914731250c5
--- /dev/null
+++ b/fs/gfs2/eaops.c
@@ -0,0 +1,185 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <asm/semaphore.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "acl.h"
21#include "eaops.h"
22#include "eattr.h"
23
24/**
25 * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
26 * @namep: ea name, possibly with type appended
27 *
28 * Returns: GFS2_EATYPE_XXX
29 */
30
31unsigned int gfs2_ea_name2type(const char *name, char **truncated_name)
32{
33 unsigned int type;
34
35 if (strncmp(name, "system.", 7) == 0) {
36 type = GFS2_EATYPE_SYS;
37 if (truncated_name)
38 *truncated_name = strchr(name, '.') + 1;
39 } else if (strncmp(name, "user.", 5) == 0) {
40 type = GFS2_EATYPE_USR;
41 if (truncated_name)
42 *truncated_name = strchr(name, '.') + 1;
43 } else {
44 type = GFS2_EATYPE_UNUSED;
45 if (truncated_name)
46 *truncated_name = NULL;
47 }
48
49 return type;
50}
51
52static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
53{
54 struct inode *inode = ip->i_vnode;
55 int error = permission(inode, MAY_READ, NULL);
56 if (error)
57 return error;
58
59 return gfs2_ea_get_i(ip, er);
60}
61
62static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
63{
64 struct inode *inode = ip->i_vnode;
65
66 if (S_ISREG(inode->i_mode) ||
67 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
68 int error = permission(inode, MAY_WRITE, NULL);
69 if (error)
70 return error;
71 } else
72 return -EPERM;
73
74 return gfs2_ea_set_i(ip, er);
75}
76
77static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
78{
79 struct inode *inode = ip->i_vnode;
80
81 if (S_ISREG(inode->i_mode) ||
82 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
83 int error = permission(inode, MAY_WRITE, NULL);
84 if (error)
85 return error;
86 } else
87 return -EPERM;
88
89 return gfs2_ea_remove_i(ip, er);
90}
91
92static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
93{
94 if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
95 !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
96 !capable(CAP_SYS_ADMIN))
97 return -EPERM;
98
99 if (ip->i_sbd->sd_args.ar_posix_acl == 0 &&
100 (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
101 GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
102 return -EOPNOTSUPP;
103
104
105
106 return gfs2_ea_get_i(ip, er);
107}
108
109static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
110{
111 int remove = 0;
112 int error;
113
114 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
115 if (!(er->er_flags & GFS2_ERF_MODE)) {
116 er->er_mode = ip->i_di.di_mode;
117 er->er_flags |= GFS2_ERF_MODE;
118 }
119 error = gfs2_acl_validate_set(ip, 1, er,
120 &remove, &er->er_mode);
121 if (error)
122 return error;
123 error = gfs2_ea_set_i(ip, er);
124 if (error)
125 return error;
126 if (remove)
127 gfs2_ea_remove_i(ip, er);
128 return 0;
129
130 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
131 error = gfs2_acl_validate_set(ip, 0, er,
132 &remove, NULL);
133 if (error)
134 return error;
135 if (!remove)
136 error = gfs2_ea_set_i(ip, er);
137 else {
138 error = gfs2_ea_remove_i(ip, er);
139 if (error == -ENODATA)
140 error = 0;
141 }
142 return error;
143 }
144
145 return -EPERM;
146}
147
148static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
149{
150 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
151 int error = gfs2_acl_validate_remove(ip, 1);
152 if (error)
153 return error;
154
155 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
156 int error = gfs2_acl_validate_remove(ip, 0);
157 if (error)
158 return error;
159
160 } else
161 return -EPERM;
162
163 return gfs2_ea_remove_i(ip, er);
164}
165
166struct gfs2_eattr_operations gfs2_user_eaops = {
167 .eo_get = user_eo_get,
168 .eo_set = user_eo_set,
169 .eo_remove = user_eo_remove,
170 .eo_name = "user",
171};
172
173struct gfs2_eattr_operations gfs2_system_eaops = {
174 .eo_get = system_eo_get,
175 .eo_set = system_eo_set,
176 .eo_remove = system_eo_remove,
177 .eo_name = "system",
178};
179
180struct gfs2_eattr_operations *gfs2_ea_ops[] = {
181 NULL,
182 &gfs2_user_eaops,
183 &gfs2_system_eaops,
184};
185
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
new file mode 100644
index 000000000000..f83c497eddca
--- /dev/null
+++ b/fs/gfs2/eaops.h
@@ -0,0 +1,30 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __EAOPS_DOT_H__
11#define __EAOPS_DOT_H__
12
13struct gfs2_ea_request;
14
15struct gfs2_eattr_operations {
16 int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
17 int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
18 int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
19 char *eo_name;
20};
21
22unsigned int gfs2_ea_name2type(const char *name, char **truncated_name);
23
24extern struct gfs2_eattr_operations gfs2_user_eaops;
25extern struct gfs2_eattr_operations gfs2_system_eaops;
26
27extern struct gfs2_eattr_operations *gfs2_ea_ops[];
28
29#endif /* __EAOPS_DOT_H__ */
30
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
new file mode 100644
index 000000000000..63a5cf1e2472
--- /dev/null
+++ b/fs/gfs2/eattr.c
@@ -0,0 +1,1620 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <asm/semaphore.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "acl.h"
21#include "eaops.h"
22#include "eattr.h"
23#include "glock.h"
24#include "inode.h"
25#include "meta_io.h"
26#include "quota.h"
27#include "rgrp.h"
28#include "trans.h"
29
30/**
31 * ea_calc_size - returns the acutal number of bytes the request will take up
32 * (not counting any unstuffed data blocks)
33 * @sdp:
34 * @er:
35 * @size:
36 *
37 * Returns: 1 if the EA should be stuffed
38 */
39
40static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er,
41 unsigned int *size)
42{
43 *size = GFS2_EAREQ_SIZE_STUFFED(er);
44 if (*size <= sdp->sd_jbsize)
45 return 1;
46
47 *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er);
48
49 return 0;
50}
51
52static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er)
53{
54 unsigned int size;
55
56 if (er->er_data_len > GFS2_EA_MAX_DATA_LEN)
57 return -ERANGE;
58
59 ea_calc_size(sdp, er, &size);
60
61 /* This can only happen with 512 byte blocks */
62 if (size > sdp->sd_jbsize)
63 return -ERANGE;
64
65 return 0;
66}
67
68typedef int (*ea_call_t) (struct gfs2_inode *ip,
69 struct buffer_head *bh,
70 struct gfs2_ea_header *ea,
71 struct gfs2_ea_header *prev,
72 void *private);
73
74static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
75 ea_call_t ea_call, void *data)
76{
77 struct gfs2_ea_header *ea, *prev = NULL;
78 int error = 0;
79
80 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_EA))
81 return -EIO;
82
83 for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
84 if (!GFS2_EA_REC_LEN(ea))
85 goto fail;
86 if (!(bh->b_data <= (char *)ea &&
87 (char *)GFS2_EA2NEXT(ea) <=
88 bh->b_data + bh->b_size))
89 goto fail;
90 if (!GFS2_EATYPE_VALID(ea->ea_type))
91 goto fail;
92
93 error = ea_call(ip, bh, ea, prev, data);
94 if (error)
95 return error;
96
97 if (GFS2_EA_IS_LAST(ea)) {
98 if ((char *)GFS2_EA2NEXT(ea) !=
99 bh->b_data + bh->b_size)
100 goto fail;
101 break;
102 }
103 }
104
105 return error;
106
107 fail:
108 gfs2_consist_inode(ip);
109 return -EIO;
110}
111
112static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
113{
114 struct buffer_head *bh, *eabh;
115 uint64_t *eablk, *end;
116 int error;
117
118 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
119 DIO_START | DIO_WAIT, &bh);
120 if (error)
121 return error;
122
123 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
124 error = ea_foreach_i(ip, bh, ea_call, data);
125 goto out;
126 }
127
128 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_IN)) {
129 error = -EIO;
130 goto out;
131 }
132
133 eablk = (uint64_t *)(bh->b_data + sizeof(struct gfs2_meta_header));
134 end = eablk + ip->i_sbd->sd_inptrs;
135
136 for (; eablk < end; eablk++) {
137 uint64_t bn;
138
139 if (!*eablk)
140 break;
141 bn = be64_to_cpu(*eablk);
142
143 error = gfs2_meta_read(ip->i_gl, bn, DIO_START | DIO_WAIT,
144 &eabh);
145 if (error)
146 break;
147 error = ea_foreach_i(ip, eabh, ea_call, data);
148 brelse(eabh);
149 if (error)
150 break;
151 }
152 out:
153 brelse(bh);
154
155 return error;
156}
157
158struct ea_find {
159 struct gfs2_ea_request *ef_er;
160 struct gfs2_ea_location *ef_el;
161};
162
163static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
164 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
165 void *private)
166{
167 struct ea_find *ef = private;
168 struct gfs2_ea_request *er = ef->ef_er;
169
170 if (ea->ea_type == GFS2_EATYPE_UNUSED)
171 return 0;
172
173 if (ea->ea_type == er->er_type) {
174 if (ea->ea_name_len == er->er_name_len &&
175 !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
176 struct gfs2_ea_location *el = ef->ef_el;
177 get_bh(bh);
178 el->el_bh = bh;
179 el->el_ea = ea;
180 el->el_prev = prev;
181 return 1;
182 }
183 }
184
185#if 0
186 else if ((ip->i_di.di_flags & GFS2_DIF_EA_PACKED) &&
187 er->er_type == GFS2_EATYPE_SYS)
188 return 1;
189#endif
190
191 return 0;
192}
193
194int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er,
195 struct gfs2_ea_location *el)
196{
197 struct ea_find ef;
198 int error;
199
200 ef.ef_er = er;
201 ef.ef_el = el;
202
203 memset(el, 0, sizeof(struct gfs2_ea_location));
204
205 error = ea_foreach(ip, ea_find_i, &ef);
206 if (error > 0)
207 return 0;
208
209 return error;
210}
211
212/**
213 * ea_dealloc_unstuffed -
214 * @ip:
215 * @bh:
216 * @ea:
217 * @prev:
218 * @private:
219 *
220 * Take advantage of the fact that all unstuffed blocks are
221 * allocated from the same RG. But watch, this may not always
222 * be true.
223 *
224 * Returns: errno
225 */
226
227static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
228 struct gfs2_ea_header *ea,
229 struct gfs2_ea_header *prev, void *private)
230{
231 int *leave = private;
232 struct gfs2_sbd *sdp = ip->i_sbd;
233 struct gfs2_rgrpd *rgd;
234 struct gfs2_holder rg_gh;
235 struct buffer_head *dibh;
236 uint64_t *dataptrs, bn = 0;
237 uint64_t bstart = 0;
238 unsigned int blen = 0;
239 unsigned int blks = 0;
240 unsigned int x;
241 int error;
242
243 if (GFS2_EA_IS_STUFFED(ea))
244 return 0;
245
246 dataptrs = GFS2_EA2DATAPTRS(ea);
247 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++)
248 if (*dataptrs) {
249 blks++;
250 bn = be64_to_cpu(*dataptrs);
251 }
252 if (!blks)
253 return 0;
254
255 rgd = gfs2_blk2rgrpd(sdp, bn);
256 if (!rgd) {
257 gfs2_consist_inode(ip);
258 return -EIO;
259 }
260
261 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
262 if (error)
263 return error;
264
265 error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length +
266 RES_DINODE + RES_EATTR + RES_STATFS +
267 RES_QUOTA, blks);
268 if (error)
269 goto out_gunlock;
270
271 gfs2_trans_add_bh(ip->i_gl, bh);
272
273 dataptrs = GFS2_EA2DATAPTRS(ea);
274 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
275 if (!*dataptrs)
276 break;
277 bn = be64_to_cpu(*dataptrs);
278
279 if (bstart + blen == bn)
280 blen++;
281 else {
282 if (bstart)
283 gfs2_free_meta(ip, bstart, blen);
284 bstart = bn;
285 blen = 1;
286 }
287
288 *dataptrs = 0;
289 if (!ip->i_di.di_blocks)
290 gfs2_consist_inode(ip);
291 ip->i_di.di_blocks--;
292 }
293 if (bstart)
294 gfs2_free_meta(ip, bstart, blen);
295
296 if (prev && !leave) {
297 uint32_t len;
298
299 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
300 prev->ea_rec_len = cpu_to_be32(len);
301
302 if (GFS2_EA_IS_LAST(ea))
303 prev->ea_flags |= GFS2_EAFLAG_LAST;
304 } else {
305 ea->ea_type = GFS2_EATYPE_UNUSED;
306 ea->ea_num_ptrs = 0;
307 }
308
309 error = gfs2_meta_inode_buffer(ip, &dibh);
310 if (!error) {
311 ip->i_di.di_ctime = get_seconds();
312 gfs2_trans_add_bh(ip->i_gl, dibh);
313 gfs2_dinode_out(&ip->i_di, dibh->b_data);
314 brelse(dibh);
315 }
316
317 gfs2_trans_end(sdp);
318
319 out_gunlock:
320 gfs2_glock_dq_uninit(&rg_gh);
321
322 return error;
323}
324
325static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
326 struct gfs2_ea_header *ea,
327 struct gfs2_ea_header *prev, int leave)
328{
329 struct gfs2_alloc *al;
330 int error;
331
332 al = gfs2_alloc_get(ip);
333
334 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
335 if (error)
336 goto out_alloc;
337
338 error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
339 if (error)
340 goto out_quota;
341
342 error = ea_dealloc_unstuffed(ip,
343 bh, ea, prev,
344 (leave) ? &error : NULL);
345
346 gfs2_glock_dq_uninit(&al->al_ri_gh);
347
348 out_quota:
349 gfs2_quota_unhold(ip);
350
351 out_alloc:
352 gfs2_alloc_put(ip);
353
354 return error;
355}
356
357/******************************************************************************/
358
359static int gfs2_ea_repack_i(struct gfs2_inode *ip)
360{
361 return -EOPNOTSUPP;
362}
363
364int gfs2_ea_repack(struct gfs2_inode *ip)
365{
366 struct gfs2_holder gh;
367 int error;
368
369 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
370 if (error)
371 return error;
372
373 /* Some sort of permissions checking would be nice */
374
375 error = gfs2_ea_repack_i(ip);
376
377 gfs2_glock_dq_uninit(&gh);
378
379 return error;
380}
381
382struct ea_list {
383 struct gfs2_ea_request *ei_er;
384 unsigned int ei_size;
385};
386
387static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
388 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
389 void *private)
390{
391 struct ea_list *ei = private;
392 struct gfs2_ea_request *er = ei->ei_er;
393 unsigned int ea_size = GFS2_EA_STRLEN(ea);
394
395 if (ea->ea_type == GFS2_EATYPE_UNUSED)
396 return 0;
397
398 if (er->er_data_len) {
399 char *prefix;
400 unsigned int l;
401 char c = 0;
402
403 if (ei->ei_size + ea_size > er->er_data_len)
404 return -ERANGE;
405
406 if (ea->ea_type == GFS2_EATYPE_USR) {
407 prefix = "user.";
408 l = 5;
409 } else {
410 prefix = "system.";
411 l = 7;
412 }
413
414 memcpy(er->er_data + ei->ei_size,
415 prefix, l);
416 memcpy(er->er_data + ei->ei_size + l,
417 GFS2_EA2NAME(ea),
418 ea->ea_name_len);
419 memcpy(er->er_data + ei->ei_size +
420 ea_size - 1,
421 &c, 1);
422 }
423
424 ei->ei_size += ea_size;
425
426 return 0;
427}
428
429/**
430 * gfs2_ea_list -
431 * @ip:
432 * @er:
433 *
434 * Returns: actual size of data on success, -errno on error
435 */
436
437int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
438{
439 struct gfs2_holder i_gh;
440 int error;
441
442 if (!er->er_data || !er->er_data_len) {
443 er->er_data = NULL;
444 er->er_data_len = 0;
445 }
446
447 error = gfs2_glock_nq_init(ip->i_gl,
448 LM_ST_SHARED, LM_FLAG_ANY,
449 &i_gh);
450 if (error)
451 return error;
452
453 if (ip->i_di.di_eattr) {
454 struct ea_list ei = { .ei_er = er, .ei_size = 0 };
455
456 error = ea_foreach(ip, ea_list_i, &ei);
457 if (!error)
458 error = ei.ei_size;
459 }
460
461 gfs2_glock_dq_uninit(&i_gh);
462
463 return error;
464}
465
466/**
467 * ea_get_unstuffed - actually copies the unstuffed data into the
468 * request buffer
469 * @ip:
470 * @ea:
471 * @data:
472 *
473 * Returns: errno
474 */
475
476static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
477 char *data)
478{
479 struct gfs2_sbd *sdp = ip->i_sbd;
480 struct buffer_head **bh;
481 unsigned int amount = GFS2_EA_DATA_LEN(ea);
482 unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize);
483 uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
484 unsigned int x;
485 int error = 0;
486
487 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
488 if (!bh)
489 return -ENOMEM;
490
491 for (x = 0; x < nptrs; x++) {
492 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs),
493 DIO_START, bh + x);
494 if (error) {
495 while (x--)
496 brelse(bh[x]);
497 goto out;
498 }
499 dataptrs++;
500 }
501
502 for (x = 0; x < nptrs; x++) {
503 error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
504 if (error) {
505 for (; x < nptrs; x++)
506 brelse(bh[x]);
507 goto out;
508 }
509 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
510 for (; x < nptrs; x++)
511 brelse(bh[x]);
512 error = -EIO;
513 goto out;
514 }
515
516 memcpy(data,
517 bh[x]->b_data + sizeof(struct gfs2_meta_header),
518 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
519
520 amount -= sdp->sd_jbsize;
521 data += sdp->sd_jbsize;
522
523 brelse(bh[x]);
524 }
525
526 out:
527 kfree(bh);
528
529 return error;
530}
531
532int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
533 char *data)
534{
535 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
536 memcpy(data,
537 GFS2_EA2DATA(el->el_ea),
538 GFS2_EA_DATA_LEN(el->el_ea));
539 return 0;
540 } else
541 return ea_get_unstuffed(ip, el->el_ea, data);
542}
543
544/**
545 * gfs2_ea_get_i -
546 * @ip:
547 * @er:
548 *
549 * Returns: actual size of data on success, -errno on error
550 */
551
552int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
553{
554 struct gfs2_ea_location el;
555 int error;
556
557 if (!ip->i_di.di_eattr)
558 return -ENODATA;
559
560 error = gfs2_ea_find(ip, er, &el);
561 if (error)
562 return error;
563 if (!el.el_ea)
564 return -ENODATA;
565
566 if (er->er_data_len) {
567 if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len)
568 error = -ERANGE;
569 else
570 error = gfs2_ea_get_copy(ip, &el, er->er_data);
571 }
572 if (!error)
573 error = GFS2_EA_DATA_LEN(el.el_ea);
574
575 brelse(el.el_bh);
576
577 return error;
578}
579
580/**
581 * gfs2_ea_get -
582 * @ip:
583 * @er:
584 *
585 * Returns: actual size of data on success, -errno on error
586 */
587
588int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
589{
590 struct gfs2_holder i_gh;
591 int error;
592
593 if (!er->er_name_len ||
594 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
595 return -EINVAL;
596 if (!er->er_data || !er->er_data_len) {
597 er->er_data = NULL;
598 er->er_data_len = 0;
599 }
600
601 error = gfs2_glock_nq_init(ip->i_gl,
602 LM_ST_SHARED, LM_FLAG_ANY,
603 &i_gh);
604 if (error)
605 return error;
606
607 error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
608
609 gfs2_glock_dq_uninit(&i_gh);
610
611 return error;
612}
613
614/**
615 * ea_alloc_blk - allocates a new block for extended attributes.
616 * @ip: A pointer to the inode that's getting extended attributes
617 * @bhp:
618 *
619 * Returns: errno
620 */
621
622static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
623{
624 struct gfs2_sbd *sdp = ip->i_sbd;
625 struct gfs2_ea_header *ea;
626 uint64_t block;
627
628 block = gfs2_alloc_meta(ip);
629
630 *bhp = gfs2_meta_new(ip->i_gl, block);
631 gfs2_trans_add_bh(ip->i_gl, *bhp);
632 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
633 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
634
635 ea = GFS2_EA_BH2FIRST(*bhp);
636 ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize);
637 ea->ea_type = GFS2_EATYPE_UNUSED;
638 ea->ea_flags = GFS2_EAFLAG_LAST;
639 ea->ea_num_ptrs = 0;
640
641 ip->i_di.di_blocks++;
642
643 return 0;
644}
645
646/**
647 * ea_write - writes the request info to an ea, creating new blocks if
648 * necessary
649 * @ip: inode that is being modified
650 * @ea: the location of the new ea in a block
651 * @er: the write request
652 *
653 * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags
654 *
655 * returns : errno
656 */
657
658static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
659 struct gfs2_ea_request *er)
660{
661 struct gfs2_sbd *sdp = ip->i_sbd;
662
663 ea->ea_data_len = cpu_to_be32(er->er_data_len);
664 ea->ea_name_len = er->er_name_len;
665 ea->ea_type = er->er_type;
666 ea->__pad = 0;
667
668 memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len);
669
670 if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
671 ea->ea_num_ptrs = 0;
672 memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
673 } else {
674 uint64_t *dataptr = GFS2_EA2DATAPTRS(ea);
675 const char *data = er->er_data;
676 unsigned int data_len = er->er_data_len;
677 unsigned int copy;
678 unsigned int x;
679
680 ea->ea_num_ptrs = DIV_RU(er->er_data_len, sdp->sd_jbsize);
681 for (x = 0; x < ea->ea_num_ptrs; x++) {
682 struct buffer_head *bh;
683 uint64_t block;
684 int mh_size = sizeof(struct gfs2_meta_header);
685
686 block = gfs2_alloc_meta(ip);
687
688 bh = gfs2_meta_new(ip->i_gl, block);
689 gfs2_trans_add_bh(ip->i_gl, bh);
690 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
691
692 ip->i_di.di_blocks++;
693
694 copy = (data_len > sdp->sd_jbsize) ? sdp->sd_jbsize :
695 data_len;
696 memcpy(bh->b_data + mh_size, data, copy);
697 if (copy < sdp->sd_jbsize)
698 memset(bh->b_data + mh_size + copy, 0,
699 sdp->sd_jbsize - copy);
700
701 *dataptr++ = cpu_to_be64((uint64_t)bh->b_blocknr);
702 data += copy;
703 data_len -= copy;
704
705 brelse(bh);
706 }
707
708 gfs2_assert_withdraw(sdp, !data_len);
709 }
710
711 return 0;
712}
713
714typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip,
715 struct gfs2_ea_request *er,
716 void *private);
717
718static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
719 unsigned int blks,
720 ea_skeleton_call_t skeleton_call,
721 void *private)
722{
723 struct gfs2_alloc *al;
724 struct buffer_head *dibh;
725 int error;
726
727 al = gfs2_alloc_get(ip);
728
729 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
730 if (error)
731 goto out;
732
733 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
734 if (error)
735 goto out_gunlock_q;
736
737 al->al_requested = blks;
738
739 error = gfs2_inplace_reserve(ip);
740 if (error)
741 goto out_gunlock_q;
742
743 error = gfs2_trans_begin(ip->i_sbd,
744 blks + al->al_rgd->rd_ri.ri_length +
745 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
746 if (error)
747 goto out_ipres;
748
749 error = skeleton_call(ip, er, private);
750 if (error)
751 goto out_end_trans;
752
753 error = gfs2_meta_inode_buffer(ip, &dibh);
754 if (!error) {
755 if (er->er_flags & GFS2_ERF_MODE) {
756 gfs2_assert_withdraw(ip->i_sbd,
757 (ip->i_di.di_mode & S_IFMT) ==
758 (er->er_mode & S_IFMT));
759 ip->i_di.di_mode = er->er_mode;
760 }
761 ip->i_di.di_ctime = get_seconds();
762 gfs2_trans_add_bh(ip->i_gl, dibh);
763 gfs2_dinode_out(&ip->i_di, dibh->b_data);
764 brelse(dibh);
765 }
766
767 out_end_trans:
768 gfs2_trans_end(ip->i_sbd);
769
770 out_ipres:
771 gfs2_inplace_release(ip);
772
773 out_gunlock_q:
774 gfs2_quota_unlock(ip);
775
776 out:
777 gfs2_alloc_put(ip);
778
779 return error;
780}
781
782static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
783 void *private)
784{
785 struct buffer_head *bh;
786 int error;
787
788 error = ea_alloc_blk(ip, &bh);
789 if (error)
790 return error;
791
792 ip->i_di.di_eattr = bh->b_blocknr;
793 error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
794
795 brelse(bh);
796
797 return error;
798}
799
800/**
801 * ea_init - initializes a new eattr block
802 * @ip:
803 * @er:
804 *
805 * Returns: errno
806 */
807
808static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er)
809{
810 unsigned int jbsize = ip->i_sbd->sd_jbsize;
811 unsigned int blks = 1;
812
813 if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize)
814 blks += DIV_RU(er->er_data_len, jbsize);
815
816 return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL);
817}
818
819static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
820{
821 uint32_t ea_size = GFS2_EA_SIZE(ea);
822 struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea + ea_size);
823 uint32_t new_size = GFS2_EA_REC_LEN(ea) - ea_size;
824 int last = ea->ea_flags & GFS2_EAFLAG_LAST;
825
826 ea->ea_rec_len = cpu_to_be32(ea_size);
827 ea->ea_flags ^= last;
828
829 new->ea_rec_len = cpu_to_be32(new_size);
830 new->ea_flags = last;
831
832 return new;
833}
834
835static void ea_set_remove_stuffed(struct gfs2_inode *ip,
836 struct gfs2_ea_location *el)
837{
838 struct gfs2_ea_header *ea = el->el_ea;
839 struct gfs2_ea_header *prev = el->el_prev;
840 uint32_t len;
841
842 gfs2_trans_add_bh(ip->i_gl, el->el_bh);
843
844 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
845 ea->ea_type = GFS2_EATYPE_UNUSED;
846 return;
847 } else if (GFS2_EA2NEXT(prev) != ea) {
848 prev = GFS2_EA2NEXT(prev);
849 gfs2_assert_withdraw(ip->i_sbd, GFS2_EA2NEXT(prev) == ea);
850 }
851
852 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
853 prev->ea_rec_len = cpu_to_be32(len);
854
855 if (GFS2_EA_IS_LAST(ea))
856 prev->ea_flags |= GFS2_EAFLAG_LAST;
857}
858
859struct ea_set {
860 int ea_split;
861
862 struct gfs2_ea_request *es_er;
863 struct gfs2_ea_location *es_el;
864
865 struct buffer_head *es_bh;
866 struct gfs2_ea_header *es_ea;
867};
868
869static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
870 struct gfs2_ea_header *ea, struct ea_set *es)
871{
872 struct gfs2_ea_request *er = es->es_er;
873 struct buffer_head *dibh;
874 int error;
875
876 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + 2 * RES_EATTR, 0);
877 if (error)
878 return error;
879
880 gfs2_trans_add_bh(ip->i_gl, bh);
881
882 if (es->ea_split)
883 ea = ea_split_ea(ea);
884
885 ea_write(ip, ea, er);
886
887 if (es->es_el)
888 ea_set_remove_stuffed(ip, es->es_el);
889
890 error = gfs2_meta_inode_buffer(ip, &dibh);
891 if (error)
892 goto out;
893
894 if (er->er_flags & GFS2_ERF_MODE) {
895 gfs2_assert_withdraw(ip->i_sbd,
896 (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
897 ip->i_di.di_mode = er->er_mode;
898 }
899 ip->i_di.di_ctime = get_seconds();
900 gfs2_trans_add_bh(ip->i_gl, dibh);
901 gfs2_dinode_out(&ip->i_di, dibh->b_data);
902 brelse(dibh);
903 out:
904 gfs2_trans_end(ip->i_sbd);
905
906 return error;
907}
908
909static int ea_set_simple_alloc(struct gfs2_inode *ip,
910 struct gfs2_ea_request *er, void *private)
911{
912 struct ea_set *es = private;
913 struct gfs2_ea_header *ea = es->es_ea;
914 int error;
915
916 gfs2_trans_add_bh(ip->i_gl, es->es_bh);
917
918 if (es->ea_split)
919 ea = ea_split_ea(ea);
920
921 error = ea_write(ip, ea, er);
922 if (error)
923 return error;
924
925 if (es->es_el)
926 ea_set_remove_stuffed(ip, es->es_el);
927
928 return 0;
929}
930
931static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
932 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
933 void *private)
934{
935 struct ea_set *es = private;
936 unsigned int size;
937 int stuffed;
938 int error;
939
940 stuffed = ea_calc_size(ip->i_sbd, es->es_er, &size);
941
942 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
943 if (GFS2_EA_REC_LEN(ea) < size)
944 return 0;
945 if (!GFS2_EA_IS_STUFFED(ea)) {
946 error = ea_remove_unstuffed(ip, bh, ea, prev, 1);
947 if (error)
948 return error;
949 }
950 es->ea_split = 0;
951 } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size)
952 es->ea_split = 1;
953 else
954 return 0;
955
956 if (stuffed) {
957 error = ea_set_simple_noalloc(ip, bh, ea, es);
958 if (error)
959 return error;
960 } else {
961 unsigned int blks;
962
963 es->es_bh = bh;
964 es->es_ea = ea;
965 blks = 2 + DIV_RU(es->es_er->er_data_len, ip->i_sbd->sd_jbsize);
966
967 error = ea_alloc_skeleton(ip, es->es_er, blks,
968 ea_set_simple_alloc, es);
969 if (error)
970 return error;
971 }
972
973 return 1;
974}
975
976static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
977 void *private)
978{
979 struct gfs2_sbd *sdp = ip->i_sbd;
980 struct buffer_head *indbh, *newbh;
981 uint64_t *eablk;
982 int error;
983 int mh_size = sizeof(struct gfs2_meta_header);
984
985 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
986 uint64_t *end;
987
988 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
989 DIO_START | DIO_WAIT, &indbh);
990 if (error)
991 return error;
992
993 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
994 error = -EIO;
995 goto out;
996 }
997
998 eablk = (uint64_t *)(indbh->b_data + mh_size);
999 end = eablk + sdp->sd_inptrs;
1000
1001 for (; eablk < end; eablk++)
1002 if (!*eablk)
1003 break;
1004
1005 if (eablk == end) {
1006 error = -ENOSPC;
1007 goto out;
1008 }
1009
1010 gfs2_trans_add_bh(ip->i_gl, indbh);
1011 } else {
1012 uint64_t blk;
1013
1014 blk = gfs2_alloc_meta(ip);
1015
1016 indbh = gfs2_meta_new(ip->i_gl, blk);
1017 gfs2_trans_add_bh(ip->i_gl, indbh);
1018 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1019 gfs2_buffer_clear_tail(indbh, mh_size);
1020
1021 eablk = (uint64_t *)(indbh->b_data + mh_size);
1022 *eablk = cpu_to_be64(ip->i_di.di_eattr);
1023 ip->i_di.di_eattr = blk;
1024 ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
1025 ip->i_di.di_blocks++;
1026
1027 eablk++;
1028 }
1029
1030 error = ea_alloc_blk(ip, &newbh);
1031 if (error)
1032 goto out;
1033
1034 *eablk = cpu_to_be64((uint64_t)newbh->b_blocknr);
1035 error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er);
1036 brelse(newbh);
1037 if (error)
1038 goto out;
1039
1040 if (private)
1041 ea_set_remove_stuffed(ip, (struct gfs2_ea_location *)private);
1042
1043 out:
1044 brelse(indbh);
1045
1046 return error;
1047}
1048
1049static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1050 struct gfs2_ea_location *el)
1051{
1052 struct ea_set es;
1053 unsigned int blks = 2;
1054 int error;
1055
1056 memset(&es, 0, sizeof(struct ea_set));
1057 es.es_er = er;
1058 es.es_el = el;
1059
1060 error = ea_foreach(ip, ea_set_simple, &es);
1061 if (error > 0)
1062 return 0;
1063 if (error)
1064 return error;
1065
1066 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
1067 blks++;
1068 if (GFS2_EAREQ_SIZE_STUFFED(er) > ip->i_sbd->sd_jbsize)
1069 blks += DIV_RU(er->er_data_len, ip->i_sbd->sd_jbsize);
1070
1071 return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
1072}
1073
1074static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1075 struct gfs2_ea_location *el)
1076{
1077 if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) {
1078 el->el_prev = GFS2_EA2NEXT(el->el_prev);
1079 gfs2_assert_withdraw(ip->i_sbd,
1080 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1081 }
1082
1083 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0);
1084}
1085
1086int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1087{
1088 struct gfs2_ea_location el;
1089 int error;
1090
1091 if (!ip->i_di.di_eattr) {
1092 if (er->er_flags & XATTR_REPLACE)
1093 return -ENODATA;
1094 return ea_init(ip, er);
1095 }
1096
1097 error = gfs2_ea_find(ip, er, &el);
1098 if (error)
1099 return error;
1100
1101 if (el.el_ea) {
1102 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
1103 brelse(el.el_bh);
1104 return -EPERM;
1105 }
1106
1107 error = -EEXIST;
1108 if (!(er->er_flags & XATTR_CREATE)) {
1109 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1110 error = ea_set_i(ip, er, &el);
1111 if (!error && unstuffed)
1112 ea_set_remove_unstuffed(ip, &el);
1113 }
1114
1115 brelse(el.el_bh);
1116 } else {
1117 error = -ENODATA;
1118 if (!(er->er_flags & XATTR_REPLACE))
1119 error = ea_set_i(ip, er, NULL);
1120 }
1121
1122 return error;
1123}
1124
1125int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1126{
1127 struct gfs2_holder i_gh;
1128 int error;
1129
1130 if (!er->er_name_len ||
1131 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1132 return -EINVAL;
1133 if (!er->er_data || !er->er_data_len) {
1134 er->er_data = NULL;
1135 er->er_data_len = 0;
1136 }
1137 error = ea_check_size(ip->i_sbd, er);
1138 if (error)
1139 return error;
1140
1141 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1142 if (error)
1143 return error;
1144
1145 if (IS_IMMUTABLE(ip->i_vnode))
1146 error = -EPERM;
1147 else
1148 error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
1149
1150 gfs2_glock_dq_uninit(&i_gh);
1151
1152 return error;
1153}
1154
1155static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1156{
1157 struct gfs2_ea_header *ea = el->el_ea;
1158 struct gfs2_ea_header *prev = el->el_prev;
1159 struct buffer_head *dibh;
1160 int error;
1161
1162 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
1163 if (error)
1164 return error;
1165
1166 gfs2_trans_add_bh(ip->i_gl, el->el_bh);
1167
1168 if (prev) {
1169 uint32_t len;
1170
1171 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
1172 prev->ea_rec_len = cpu_to_be32(len);
1173
1174 if (GFS2_EA_IS_LAST(ea))
1175 prev->ea_flags |= GFS2_EAFLAG_LAST;
1176 } else
1177 ea->ea_type = GFS2_EATYPE_UNUSED;
1178
1179 error = gfs2_meta_inode_buffer(ip, &dibh);
1180 if (!error) {
1181 ip->i_di.di_ctime = get_seconds();
1182 gfs2_trans_add_bh(ip->i_gl, dibh);
1183 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1184 brelse(dibh);
1185 }
1186
1187 gfs2_trans_end(ip->i_sbd);
1188
1189 return error;
1190}
1191
1192int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1193{
1194 struct gfs2_ea_location el;
1195 int error;
1196
1197 if (!ip->i_di.di_eattr)
1198 return -ENODATA;
1199
1200 error = gfs2_ea_find(ip, er, &el);
1201 if (error)
1202 return error;
1203 if (!el.el_ea)
1204 return -ENODATA;
1205
1206 if (GFS2_EA_IS_STUFFED(el.el_ea))
1207 error = ea_remove_stuffed(ip, &el);
1208 else
1209 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev,
1210 0);
1211
1212 brelse(el.el_bh);
1213
1214 return error;
1215}
1216
1217/**
1218 * gfs2_ea_remove - sets (or creates or replaces) an extended attribute
1219 * @ip: pointer to the inode of the target file
1220 * @er: request information
1221 *
1222 * Returns: errno
1223 */
1224
1225int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1226{
1227 struct gfs2_holder i_gh;
1228 int error;
1229
1230 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1231 return -EINVAL;
1232
1233 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1234 if (error)
1235 return error;
1236
1237 if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1238 error = -EPERM;
1239 else
1240 error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er);
1241
1242 gfs2_glock_dq_uninit(&i_gh);
1243
1244 return error;
1245}
1246
1247static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1248 struct gfs2_ea_header *ea, char *data)
1249{
1250 struct gfs2_sbd *sdp = ip->i_sbd;
1251 struct buffer_head **bh;
1252 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1253 unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize);
1254 uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
1255 unsigned int x;
1256 int error;
1257
1258 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
1259 if (!bh)
1260 return -ENOMEM;
1261
1262 error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1263 if (error)
1264 goto out;
1265
1266 for (x = 0; x < nptrs; x++) {
1267 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs),
1268 DIO_START, bh + x);
1269 if (error) {
1270 while (x--)
1271 brelse(bh[x]);
1272 goto fail;
1273 }
1274 dataptrs++;
1275 }
1276
1277 for (x = 0; x < nptrs; x++) {
1278 error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
1279 if (error) {
1280 for (; x < nptrs; x++)
1281 brelse(bh[x]);
1282 goto fail;
1283 }
1284 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
1285 for (; x < nptrs; x++)
1286 brelse(bh[x]);
1287 error = -EIO;
1288 goto fail;
1289 }
1290
1291 gfs2_trans_add_bh(ip->i_gl, bh[x]);
1292
1293 memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header),
1294 data,
1295 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
1296
1297 amount -= sdp->sd_jbsize;
1298 data += sdp->sd_jbsize;
1299
1300 brelse(bh[x]);
1301 }
1302
1303 out:
1304 kfree(bh);
1305
1306 return error;
1307
1308 fail:
1309 gfs2_trans_end(sdp);
1310 kfree(bh);
1311
1312 return error;
1313}
1314
1315int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
1316 struct iattr *attr, char *data)
1317{
1318 struct buffer_head *dibh;
1319 int error;
1320
1321 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
1322 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
1323 if (error)
1324 return error;
1325
1326 gfs2_trans_add_bh(ip->i_gl, el->el_bh);
1327 memcpy(GFS2_EA2DATA(el->el_ea),
1328 data,
1329 GFS2_EA_DATA_LEN(el->el_ea));
1330 } else
1331 error = ea_acl_chmod_unstuffed(ip, el->el_ea, data);
1332
1333 if (error)
1334 return error;
1335
1336 error = gfs2_meta_inode_buffer(ip, &dibh);
1337 if (!error) {
1338 error = inode_setattr(ip->i_vnode, attr);
1339 gfs2_assert_warn(ip->i_sbd, !error);
1340 gfs2_inode_attr_out(ip);
1341 gfs2_trans_add_bh(ip->i_gl, dibh);
1342 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1343 brelse(dibh);
1344 }
1345
1346 gfs2_trans_end(ip->i_sbd);
1347
1348 return error;
1349}
1350
1351static int ea_dealloc_indirect(struct gfs2_inode *ip)
1352{
1353 struct gfs2_sbd *sdp = ip->i_sbd;
1354 struct gfs2_rgrp_list rlist;
1355 struct buffer_head *indbh, *dibh;
1356 uint64_t *eablk, *end;
1357 unsigned int rg_blocks = 0;
1358 uint64_t bstart = 0;
1359 unsigned int blen = 0;
1360 unsigned int blks = 0;
1361 unsigned int x;
1362 int error;
1363
1364 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1365
1366 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
1367 DIO_START | DIO_WAIT, &indbh);
1368 if (error)
1369 return error;
1370
1371 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
1372 error = -EIO;
1373 goto out;
1374 }
1375
1376 eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1377 end = eablk + sdp->sd_inptrs;
1378
1379 for (; eablk < end; eablk++) {
1380 uint64_t bn;
1381
1382 if (!*eablk)
1383 break;
1384 bn = be64_to_cpu(*eablk);
1385
1386 if (bstart + blen == bn)
1387 blen++;
1388 else {
1389 if (bstart)
1390 gfs2_rlist_add(sdp, &rlist, bstart);
1391 bstart = bn;
1392 blen = 1;
1393 }
1394 blks++;
1395 }
1396 if (bstart)
1397 gfs2_rlist_add(sdp, &rlist, bstart);
1398 else
1399 goto out;
1400
1401 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1402
1403 for (x = 0; x < rlist.rl_rgrps; x++) {
1404 struct gfs2_rgrpd *rgd;
1405 rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
1406 rg_blocks += rgd->rd_ri.ri_length;
1407 }
1408
1409 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1410 if (error)
1411 goto out_rlist_free;
1412
1413 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
1414 RES_INDIRECT + RES_STATFS +
1415 RES_QUOTA, blks);
1416 if (error)
1417 goto out_gunlock;
1418
1419 gfs2_trans_add_bh(ip->i_gl, indbh);
1420
1421 eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1422 bstart = 0;
1423 blen = 0;
1424
1425 for (; eablk < end; eablk++) {
1426 uint64_t bn;
1427
1428 if (!*eablk)
1429 break;
1430 bn = be64_to_cpu(*eablk);
1431
1432 if (bstart + blen == bn)
1433 blen++;
1434 else {
1435 if (bstart)
1436 gfs2_free_meta(ip, bstart, blen);
1437 bstart = bn;
1438 blen = 1;
1439 }
1440
1441 *eablk = 0;
1442 if (!ip->i_di.di_blocks)
1443 gfs2_consist_inode(ip);
1444 ip->i_di.di_blocks--;
1445 }
1446 if (bstart)
1447 gfs2_free_meta(ip, bstart, blen);
1448
1449 ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
1450
1451 error = gfs2_meta_inode_buffer(ip, &dibh);
1452 if (!error) {
1453 gfs2_trans_add_bh(ip->i_gl, dibh);
1454 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1455 brelse(dibh);
1456 }
1457
1458 gfs2_trans_end(sdp);
1459
1460 out_gunlock:
1461 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1462
1463 out_rlist_free:
1464 gfs2_rlist_free(&rlist);
1465
1466 out:
1467 brelse(indbh);
1468
1469 return error;
1470}
1471
1472static int ea_dealloc_block(struct gfs2_inode *ip)
1473{
1474 struct gfs2_sbd *sdp = ip->i_sbd;
1475 struct gfs2_alloc *al = &ip->i_alloc;
1476 struct gfs2_rgrpd *rgd;
1477 struct buffer_head *dibh;
1478 int error;
1479
1480 rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
1481 if (!rgd) {
1482 gfs2_consist_inode(ip);
1483 return -EIO;
1484 }
1485
1486 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1487 &al->al_rgd_gh);
1488 if (error)
1489 return error;
1490
1491 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE +
1492 RES_STATFS + RES_QUOTA, 1);
1493 if (error)
1494 goto out_gunlock;
1495
1496 gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
1497
1498 ip->i_di.di_eattr = 0;
1499 if (!ip->i_di.di_blocks)
1500 gfs2_consist_inode(ip);
1501 ip->i_di.di_blocks--;
1502
1503 error = gfs2_meta_inode_buffer(ip, &dibh);
1504 if (!error) {
1505 gfs2_trans_add_bh(ip->i_gl, dibh);
1506 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1507 brelse(dibh);
1508 }
1509
1510 gfs2_trans_end(sdp);
1511
1512 out_gunlock:
1513 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1514
1515 return error;
1516}
1517
1518/**
1519 * gfs2_ea_dealloc - deallocate the extended attribute fork
1520 * @ip: the inode
1521 *
1522 * Returns: errno
1523 */
1524
1525int gfs2_ea_dealloc(struct gfs2_inode *ip)
1526{
1527 struct gfs2_alloc *al;
1528 int error;
1529
1530 al = gfs2_alloc_get(ip);
1531
1532 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1533 if (error)
1534 goto out_alloc;
1535
1536 error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
1537 if (error)
1538 goto out_quota;
1539
1540 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
1541 if (error)
1542 goto out_rindex;
1543
1544 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
1545 error = ea_dealloc_indirect(ip);
1546 if (error)
1547 goto out_rindex;
1548 }
1549
1550 error = ea_dealloc_block(ip);
1551
1552 out_rindex:
1553 gfs2_glock_dq_uninit(&al->al_ri_gh);
1554
1555 out_quota:
1556 gfs2_quota_unhold(ip);
1557
1558 out_alloc:
1559 gfs2_alloc_put(ip);
1560
1561 return error;
1562}
1563
1564/**
1565 * gfs2_get_eattr_meta - return all the eattr blocks of a file
1566 * @dip: the directory
1567 * @ub: the structure representing the user buffer to copy to
1568 *
1569 * Returns: errno
1570 */
1571
1572int gfs2_get_eattr_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub)
1573{
1574 struct buffer_head *bh;
1575 int error;
1576
1577 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
1578 DIO_START | DIO_WAIT, &bh);
1579 if (error)
1580 return error;
1581
1582 gfs2_add_bh_to_ub(ub, bh);
1583
1584 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
1585 struct buffer_head *eabh;
1586 uint64_t *eablk, *end;
1587
1588 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_IN)) {
1589 error = -EIO;
1590 goto out;
1591 }
1592
1593 eablk = (uint64_t *)(bh->b_data +
1594 sizeof(struct gfs2_meta_header));
1595 end = eablk + ip->i_sbd->sd_inptrs;
1596
1597 for (; eablk < end; eablk++) {
1598 uint64_t bn;
1599
1600 if (!*eablk)
1601 break;
1602 bn = be64_to_cpu(*eablk);
1603
1604 error = gfs2_meta_read(ip->i_gl, bn,
1605 DIO_START | DIO_WAIT, &eabh);
1606 if (error)
1607 break;
1608 gfs2_add_bh_to_ub(ub, eabh);
1609 brelse(eabh);
1610 if (error)
1611 break;
1612 }
1613 }
1614
1615 out:
1616 brelse(bh);
1617
1618 return error;
1619}
1620
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
new file mode 100644
index 000000000000..a64039295759
--- /dev/null
+++ b/fs/gfs2/eattr.h
@@ -0,0 +1,90 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __EATTR_DOT_H__
11#define __EATTR_DOT_H__
12
13#define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len)
14#define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len)
15
16#define GFS2_EA_SIZE(ea) \
17ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
18 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
19 (sizeof(uint64_t) * (ea)->ea_num_ptrs)), 8)
20
21#define GFS2_EA_STRLEN(ea) \
22((((ea)->ea_type == GFS2_EATYPE_USR) ? 5 : 7) + (ea)->ea_name_len + 1)
23
24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
26
27#define GFS2_EAREQ_SIZE_STUFFED(er) \
28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
29
30#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
31ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
32 sizeof(uint64_t) * DIV_RU((er)->er_data_len, (sdp)->sd_jbsize), 8)
33
34#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
35#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
36
37#define GFS2_EA2DATAPTRS(ea) \
38((uint64_t *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
39
40#define GFS2_EA2NEXT(ea) \
41((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
42
43#define GFS2_EA_BH2FIRST(bh) \
44((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
45
46#define GFS2_ERF_MODE 0x80000000
47
48struct gfs2_ea_request {
49 char *er_name;
50 char *er_data;
51 unsigned int er_name_len;
52 unsigned int er_data_len;
53 unsigned int er_type; /* GFS2_EATYPE_... */
54 int er_flags;
55 mode_t er_mode;
56};
57
58struct gfs2_ea_location {
59 struct buffer_head *el_bh;
60 struct gfs2_ea_header *el_ea;
61 struct gfs2_ea_header *el_prev;
62};
63
64int gfs2_ea_repack(struct gfs2_inode *ip);
65
66int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
67int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
68int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
69
70int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
71int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
72int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
73int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
74
75int gfs2_ea_dealloc(struct gfs2_inode *ip);
76
77int gfs2_get_eattr_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub);
78
79/* Exported to acl.c */
80
81int gfs2_ea_find(struct gfs2_inode *ip,
82 struct gfs2_ea_request *er,
83 struct gfs2_ea_location *el);
84int gfs2_ea_get_copy(struct gfs2_inode *ip,
85 struct gfs2_ea_location *el,
86 char *data);
87int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
88 struct iattr *attr, char *data);
89
90#endif /* __EATTR_DOT_H__ */
diff --git a/fs/gfs2/format.h b/fs/gfs2/format.h
new file mode 100644
index 000000000000..c7bf32ce3eca
--- /dev/null
+++ b/fs/gfs2/format.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __FORMAT_DOT_H__
11#define __FORMAT_DOT_H__
12
13static const uint32_t gfs2_old_fs_formats[] = {
14 0
15};
16
17static const uint32_t gfs2_old_multihost_formats[] = {
18 0
19};
20
21#endif /* __FORMAT_DOT_H__ */
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h
new file mode 100644
index 000000000000..a5d118238466
--- /dev/null
+++ b/fs/gfs2/gfs2.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GFS2_DOT_H__
11#define __GFS2_DOT_H__
12
13#include <linux/gfs2_ondisk.h>
14
15#include "lm_interface.h"
16#include "lvb.h"
17#include "incore.h"
18#include "util.h"
19
20enum {
21 NO_CREATE = 0,
22 CREATE = 1,
23};
24
25enum {
26 NO_WAIT = 0,
27 WAIT = 1,
28};
29
30enum {
31 NO_FORCE = 0,
32 FORCE = 1,
33};
34
35/* Divide num by den. Round up if there is a remainder. */
36#define DIV_RU(num, den) (((num) + (den) - 1) / (den))
37
38#define GFS2_FAST_NAME_SIZE 8
39
40#define get_v2sdp(sb) ((struct gfs2_sbd *)(sb)->s_fs_info)
41#define set_v2sdp(sb, sdp) (sb)->s_fs_info = (sdp)
42#define get_v2ip(inode) ((struct gfs2_inode *)(inode)->u.generic_ip)
43#define set_v2ip(inode, ip) (inode)->u.generic_ip = (ip)
44#define get_v2fp(file) ((struct gfs2_file *)(file)->private_data)
45#define set_v2fp(file, fp) (file)->private_data = (fp)
46#define get_v2bd(bh) ((struct gfs2_bufdata *)(bh)->b_private)
47#define set_v2bd(bh, bd) (bh)->b_private = (bd)
48#define get_v2db(bh) ((struct gfs2_databuf *)(bh)->b_private)
49#define set_v2db(bh, db) (bh)->b_private = (db)
50
51#define get_transaction ((struct gfs2_trans *)(current->journal_info))
52#define set_transaction(tr) (current->journal_info) = (tr)
53
54#define get_gl2ip(gl) ((struct gfs2_inode *)(gl)->gl_object)
55#define set_gl2ip(gl, ip) (gl)->gl_object = (ip)
56#define get_gl2rgd(gl) ((struct gfs2_rgrpd *)(gl)->gl_object)
57#define set_gl2rgd(gl, rgd) (gl)->gl_object = (rgd)
58#define get_gl2gl(gl) ((struct gfs2_glock *)(gl)->gl_object)
59#define set_gl2gl(gl, gl2) (gl)->gl_object = (gl2)
60
61#endif /* __GFS2_DOT_H__ */
62
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
new file mode 100644
index 000000000000..321945fde12d
--- /dev/null
+++ b/fs/gfs2/glock.c
@@ -0,0 +1,2513 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/sort.h>
17#include <linux/jhash.h>
18#include <linux/kref.h>
19#include <asm/semaphore.h>
20#include <asm/uaccess.h>
21
22#include "gfs2.h"
23#include "glock.h"
24#include "glops.h"
25#include "inode.h"
26#include "lm.h"
27#include "lops.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "super.h"
31
32/* Must be kept in sync with the beginning of struct gfs2_glock */
33struct glock_plug {
34 struct list_head gl_list;
35 unsigned long gl_flags;
36};
37
38struct greedy {
39 struct gfs2_holder gr_gh;
40 struct work_struct gr_work;
41};
42
43typedef void (*glock_examiner) (struct gfs2_glock * gl);
44
45/**
46 * relaxed_state_ok - is a requested lock compatible with the current lock mode?
47 * @actual: the current state of the lock
48 * @requested: the lock state that was requested by the caller
49 * @flags: the modifier flags passed in by the caller
50 *
51 * Returns: 1 if the locks are compatible, 0 otherwise
52 */
53
54static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
55 int flags)
56{
57 if (actual == requested)
58 return 1;
59
60 if (flags & GL_EXACT)
61 return 0;
62
63 if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
64 return 1;
65
66 if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
67 return 1;
68
69 return 0;
70}
71
72/**
73 * gl_hash() - Turn glock number into hash bucket number
74 * @lock: The glock number
75 *
76 * Returns: The number of the corresponding hash bucket
77 */
78
79static unsigned int gl_hash(struct lm_lockname *name)
80{
81 unsigned int h;
82
83 h = jhash(&name->ln_number, sizeof(uint64_t), 0);
84 h = jhash(&name->ln_type, sizeof(unsigned int), h);
85 h &= GFS2_GL_HASH_MASK;
86
87 return h;
88}
89
90/**
91 * glock_free() - Perform a few checks and then release struct gfs2_glock
92 * @gl: The glock to release
93 *
94 * Also calls lock module to release its internal structure for this glock.
95 *
96 */
97
98static void glock_free(struct gfs2_glock *gl)
99{
100 struct gfs2_sbd *sdp = gl->gl_sbd;
101 struct inode *aspace = gl->gl_aspace;
102
103 gfs2_lm_put_lock(sdp, gl->gl_lock);
104
105 if (aspace)
106 gfs2_aspace_put(aspace);
107
108 kmem_cache_free(gfs2_glock_cachep, gl);
109
110 atomic_dec(&sdp->sd_glock_count);
111}
112
113/**
114 * gfs2_glock_hold() - increment reference count on glock
115 * @gl: The glock to hold
116 *
117 */
118
119void gfs2_glock_hold(struct gfs2_glock *gl)
120{
121 kref_get(&gl->gl_ref);
122}
123
124/* All work is done after the return from kref_put() so we
125 can release the write_lock before the free. */
126
127static void kill_glock(struct kref *kref)
128{
129 struct gfs2_glock *gl = container_of(kref, struct gfs2_glock, gl_ref);
130 struct gfs2_sbd *sdp = gl->gl_sbd;
131
132 gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
133 gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
134 gfs2_assert(sdp, list_empty(&gl->gl_holders));
135 gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
136 gfs2_assert(sdp, list_empty(&gl->gl_waiters2));
137 gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
138}
139
140/**
141 * gfs2_glock_put() - Decrement reference count on glock
142 * @gl: The glock to put
143 *
144 */
145
146int gfs2_glock_put(struct gfs2_glock *gl)
147{
148 struct gfs2_sbd *sdp = gl->gl_sbd;
149 struct gfs2_gl_hash_bucket *bucket = gl->gl_bucket;
150 int rv = 0;
151
152 down(&sdp->sd_invalidate_inodes_mutex);
153
154 write_lock(&bucket->hb_lock);
155 if (kref_put(&gl->gl_ref, kill_glock)) {
156 list_del_init(&gl->gl_list);
157 write_unlock(&bucket->hb_lock);
158 glock_free(gl);
159 rv = 1;
160 goto out;
161 }
162 write_unlock(&bucket->hb_lock);
163 out:
164 up(&sdp->sd_invalidate_inodes_mutex);
165 return rv;
166}
167
168/**
169 * queue_empty - check to see if a glock's queue is empty
170 * @gl: the glock
171 * @head: the head of the queue to check
172 *
173 * This function protects the list in the event that a process already
174 * has a holder on the list and is adding a second holder for itself.
175 * The glmutex lock is what generally prevents processes from working
176 * on the same glock at once, but the special case of adding a second
177 * holder for yourself ("recursive" locking) doesn't involve locking
178 * glmutex, making the spin lock necessary.
179 *
180 * Returns: 1 if the queue is empty
181 */
182
183static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
184{
185 int empty;
186 spin_lock(&gl->gl_spin);
187 empty = list_empty(head);
188 spin_unlock(&gl->gl_spin);
189 return empty;
190}
191
192/**
193 * search_bucket() - Find struct gfs2_glock by lock number
194 * @bucket: the bucket to search
195 * @name: The lock name
196 *
197 * Returns: NULL, or the struct gfs2_glock with the requested number
198 */
199
200static struct gfs2_glock *search_bucket(struct gfs2_gl_hash_bucket *bucket,
201 struct lm_lockname *name)
202{
203 struct gfs2_glock *gl;
204
205 list_for_each_entry(gl, &bucket->hb_list, gl_list) {
206 if (test_bit(GLF_PLUG, &gl->gl_flags))
207 continue;
208 if (!lm_name_equal(&gl->gl_name, name))
209 continue;
210
211 kref_get(&gl->gl_ref);
212
213 return gl;
214 }
215
216 return NULL;
217}
218
219/**
220 * gfs2_glock_find() - Find glock by lock number
221 * @sdp: The GFS2 superblock
222 * @name: The lock name
223 *
224 * Returns: NULL, or the struct gfs2_glock with the requested number
225 */
226
227struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
228 struct lm_lockname *name)
229{
230 struct gfs2_gl_hash_bucket *bucket = &sdp->sd_gl_hash[gl_hash(name)];
231 struct gfs2_glock *gl;
232
233 read_lock(&bucket->hb_lock);
234 gl = search_bucket(bucket, name);
235 read_unlock(&bucket->hb_lock);
236
237 return gl;
238}
239
240/**
241 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
242 * @sdp: The GFS2 superblock
243 * @number: the lock number
244 * @glops: The glock_operations to use
245 * @create: If 0, don't create the glock if it doesn't exist
246 * @glp: the glock is returned here
247 *
248 * This does not lock a glock, just finds/creates structures for one.
249 *
250 * Returns: errno
251 */
252
253int gfs2_glock_get(struct gfs2_sbd *sdp, uint64_t number,
254 struct gfs2_glock_operations *glops, int create,
255 struct gfs2_glock **glp)
256{
257 struct lm_lockname name;
258 struct gfs2_glock *gl, *tmp;
259 struct gfs2_gl_hash_bucket *bucket;
260 int error;
261
262 name.ln_number = number;
263 name.ln_type = glops->go_type;
264 bucket = &sdp->sd_gl_hash[gl_hash(&name)];
265
266 read_lock(&bucket->hb_lock);
267 gl = search_bucket(bucket, &name);
268 read_unlock(&bucket->hb_lock);
269
270 if (gl || !create) {
271 *glp = gl;
272 return 0;
273 }
274
275 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
276 if (!gl)
277 return -ENOMEM;
278
279 memset(gl, 0, sizeof(struct gfs2_glock));
280
281 INIT_LIST_HEAD(&gl->gl_list);
282 gl->gl_name = name;
283 kref_init(&gl->gl_ref);
284
285 spin_lock_init(&gl->gl_spin);
286
287 gl->gl_state = LM_ST_UNLOCKED;
288 INIT_LIST_HEAD(&gl->gl_holders);
289 INIT_LIST_HEAD(&gl->gl_waiters1);
290 INIT_LIST_HEAD(&gl->gl_waiters2);
291 INIT_LIST_HEAD(&gl->gl_waiters3);
292
293 gl->gl_ops = glops;
294
295 gl->gl_bucket = bucket;
296 INIT_LIST_HEAD(&gl->gl_reclaim);
297
298 gl->gl_sbd = sdp;
299
300 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
301 INIT_LIST_HEAD(&gl->gl_ail_list);
302
303 /* If this glock protects actual on-disk data or metadata blocks,
304 create a VFS inode to manage the pages/buffers holding them. */
305 if (glops == &gfs2_inode_glops ||
306 glops == &gfs2_rgrp_glops ||
307 glops == &gfs2_meta_glops) {
308 gl->gl_aspace = gfs2_aspace_get(sdp);
309 if (!gl->gl_aspace) {
310 error = -ENOMEM;
311 goto fail;
312 }
313 }
314
315 error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
316 if (error)
317 goto fail_aspace;
318
319 atomic_inc(&sdp->sd_glock_count);
320
321 write_lock(&bucket->hb_lock);
322 tmp = search_bucket(bucket, &name);
323 if (tmp) {
324 write_unlock(&bucket->hb_lock);
325 glock_free(gl);
326 gl = tmp;
327 } else {
328 list_add_tail(&gl->gl_list, &bucket->hb_list);
329 write_unlock(&bucket->hb_lock);
330 }
331
332 *glp = gl;
333
334 return 0;
335
336 fail_aspace:
337 if (gl->gl_aspace)
338 gfs2_aspace_put(gl->gl_aspace);
339
340 fail:
341 kmem_cache_free(gfs2_glock_cachep, gl);
342
343 return error;
344}
345
346/**
347 * gfs2_holder_init - initialize a struct gfs2_holder in the default way
348 * @gl: the glock
349 * @state: the state we're requesting
350 * @flags: the modifier flags
351 * @gh: the holder structure
352 *
353 */
354
355void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, int flags,
356 struct gfs2_holder *gh)
357{
358 INIT_LIST_HEAD(&gh->gh_list);
359 gh->gh_gl = gl;
360 gh->gh_owner = (flags & GL_NEVER_RECURSE) ? NULL : current;
361 gh->gh_state = state;
362 gh->gh_flags = flags;
363 gh->gh_error = 0;
364 gh->gh_iflags = 0;
365 init_completion(&gh->gh_wait);
366
367 if (gh->gh_state == LM_ST_EXCLUSIVE)
368 gh->gh_flags |= GL_LOCAL_EXCL;
369
370 gfs2_glock_hold(gl);
371}
372
373/**
374 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
375 * @state: the state we're requesting
376 * @flags: the modifier flags
377 * @gh: the holder structure
378 *
379 * Don't mess with the glock.
380 *
381 */
382
383void gfs2_holder_reinit(unsigned int state, int flags, struct gfs2_holder *gh)
384{
385 gh->gh_state = state;
386 gh->gh_flags = flags;
387 if (gh->gh_state == LM_ST_EXCLUSIVE)
388 gh->gh_flags |= GL_LOCAL_EXCL;
389
390 gh->gh_iflags &= 1 << HIF_ALLOCED;
391}
392
393/**
394 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
395 * @gh: the holder structure
396 *
397 */
398
399void gfs2_holder_uninit(struct gfs2_holder *gh)
400{
401 gfs2_glock_put(gh->gh_gl);
402 gh->gh_gl = NULL;
403}
404
405/**
406 * gfs2_holder_get - get a struct gfs2_holder structure
407 * @gl: the glock
408 * @state: the state we're requesting
409 * @flags: the modifier flags
410 * @gfp_flags: __GFP_NOFAIL
411 *
412 * Figure out how big an impact this function has. Either:
413 * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
414 * 2) Leave it like it is
415 *
416 * Returns: the holder structure, NULL on ENOMEM
417 */
418
419struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, unsigned int state,
420 int flags, gfp_t gfp_flags)
421{
422 struct gfs2_holder *gh;
423
424 gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags);
425 if (!gh)
426 return NULL;
427
428 gfs2_holder_init(gl, state, flags, gh);
429 set_bit(HIF_ALLOCED, &gh->gh_iflags);
430
431 return gh;
432}
433
434/**
435 * gfs2_holder_put - get rid of a struct gfs2_holder structure
436 * @gh: the holder structure
437 *
438 */
439
440void gfs2_holder_put(struct gfs2_holder *gh)
441{
442 gfs2_holder_uninit(gh);
443 kfree(gh);
444}
445
446/**
447 * handle_recurse - put other holder structures (marked recursive)
448 * into the holders list
449 * @gh: the holder structure
450 *
451 */
452
453static void handle_recurse(struct gfs2_holder *gh)
454{
455 struct gfs2_glock *gl = gh->gh_gl;
456 struct gfs2_sbd *sdp = gl->gl_sbd;
457 struct gfs2_holder *tmp_gh, *safe;
458 int found = 0;
459
460 if (gfs2_assert_warn(sdp, gh->gh_owner))
461 return;
462
463 list_for_each_entry_safe(tmp_gh, safe, &gl->gl_waiters3, gh_list) {
464 if (tmp_gh->gh_owner != gh->gh_owner)
465 continue;
466
467 gfs2_assert_warn(sdp,
468 test_bit(HIF_RECURSE, &tmp_gh->gh_iflags));
469
470 list_move_tail(&tmp_gh->gh_list, &gl->gl_holders);
471 tmp_gh->gh_error = 0;
472 set_bit(HIF_HOLDER, &tmp_gh->gh_iflags);
473
474 complete(&tmp_gh->gh_wait);
475
476 found = 1;
477 }
478
479 gfs2_assert_warn(sdp, found);
480}
481
482/**
483 * do_unrecurse - a recursive holder was just dropped of the waiters3 list
484 * @gh: the holder
485 *
486 * If there is only one other recursive holder, clear its HIF_RECURSE bit.
487 * If there is more than one, leave them alone.
488 *
489 */
490
491static void do_unrecurse(struct gfs2_holder *gh)
492{
493 struct gfs2_glock *gl = gh->gh_gl;
494 struct gfs2_sbd *sdp = gl->gl_sbd;
495 struct gfs2_holder *tmp_gh, *last_gh = NULL;
496 int found = 0;
497
498 if (gfs2_assert_warn(sdp, gh->gh_owner))
499 return;
500
501 list_for_each_entry(tmp_gh, &gl->gl_waiters3, gh_list) {
502 if (tmp_gh->gh_owner != gh->gh_owner)
503 continue;
504
505 gfs2_assert_warn(sdp,
506 test_bit(HIF_RECURSE, &tmp_gh->gh_iflags));
507
508 if (found)
509 return;
510
511 found = 1;
512 last_gh = tmp_gh;
513 }
514
515 if (!gfs2_assert_warn(sdp, found))
516 clear_bit(HIF_RECURSE, &last_gh->gh_iflags);
517}
518
519/**
520 * rq_mutex - process a mutex request in the queue
521 * @gh: the glock holder
522 *
523 * Returns: 1 if the queue is blocked
524 */
525
526static int rq_mutex(struct gfs2_holder *gh)
527{
528 struct gfs2_glock *gl = gh->gh_gl;
529
530 list_del_init(&gh->gh_list);
531 /* gh->gh_error never examined. */
532 set_bit(GLF_LOCK, &gl->gl_flags);
533 complete(&gh->gh_wait);
534
535 return 1;
536}
537
538/**
539 * rq_promote - process a promote request in the queue
540 * @gh: the glock holder
541 *
542 * Acquire a new inter-node lock, or change a lock state to more restrictive.
543 *
544 * Returns: 1 if the queue is blocked
545 */
546
547static int rq_promote(struct gfs2_holder *gh)
548{
549 struct gfs2_glock *gl = gh->gh_gl;
550 struct gfs2_sbd *sdp = gl->gl_sbd;
551 struct gfs2_glock_operations *glops = gl->gl_ops;
552 int recurse;
553
554 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
555 if (list_empty(&gl->gl_holders)) {
556 gl->gl_req_gh = gh;
557 set_bit(GLF_LOCK, &gl->gl_flags);
558 spin_unlock(&gl->gl_spin);
559
560 if (atomic_read(&sdp->sd_reclaim_count) >
561 gfs2_tune_get(sdp, gt_reclaim_limit) &&
562 !(gh->gh_flags & LM_FLAG_PRIORITY)) {
563 gfs2_reclaim_glock(sdp);
564 gfs2_reclaim_glock(sdp);
565 }
566
567 glops->go_xmote_th(gl, gh->gh_state,
568 gh->gh_flags);
569
570 spin_lock(&gl->gl_spin);
571 }
572 return 1;
573 }
574
575 if (list_empty(&gl->gl_holders)) {
576 set_bit(HIF_FIRST, &gh->gh_iflags);
577 set_bit(GLF_LOCK, &gl->gl_flags);
578 recurse = 0;
579 } else {
580 struct gfs2_holder *next_gh;
581 if (gh->gh_flags & GL_LOCAL_EXCL)
582 return 1;
583 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
584 gh_list);
585 if (next_gh->gh_flags & GL_LOCAL_EXCL)
586 return 1;
587 recurse = test_bit(HIF_RECURSE, &gh->gh_iflags);
588 }
589
590 list_move_tail(&gh->gh_list, &gl->gl_holders);
591 gh->gh_error = 0;
592 set_bit(HIF_HOLDER, &gh->gh_iflags);
593
594 if (recurse)
595 handle_recurse(gh);
596
597 complete(&gh->gh_wait);
598
599 return 0;
600}
601
602/**
603 * rq_demote - process a demote request in the queue
604 * @gh: the glock holder
605 *
606 * Returns: 1 if the queue is blocked
607 */
608
609static int rq_demote(struct gfs2_holder *gh)
610{
611 struct gfs2_glock *gl = gh->gh_gl;
612 struct gfs2_glock_operations *glops = gl->gl_ops;
613
614 if (!list_empty(&gl->gl_holders))
615 return 1;
616
617 if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
618 list_del_init(&gh->gh_list);
619 gh->gh_error = 0;
620 spin_unlock(&gl->gl_spin);
621 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
622 gfs2_holder_put(gh);
623 else
624 complete(&gh->gh_wait);
625 spin_lock(&gl->gl_spin);
626 } else {
627 gl->gl_req_gh = gh;
628 set_bit(GLF_LOCK, &gl->gl_flags);
629 spin_unlock(&gl->gl_spin);
630
631 if (gh->gh_state == LM_ST_UNLOCKED ||
632 gl->gl_state != LM_ST_EXCLUSIVE)
633 glops->go_drop_th(gl);
634 else
635 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
636
637 spin_lock(&gl->gl_spin);
638 }
639
640 return 0;
641}
642
643/**
644 * rq_greedy - process a queued request to drop greedy status
645 * @gh: the glock holder
646 *
647 * Returns: 1 if the queue is blocked
648 */
649
650static int rq_greedy(struct gfs2_holder *gh)
651{
652 struct gfs2_glock *gl = gh->gh_gl;
653
654 list_del_init(&gh->gh_list);
655 /* gh->gh_error never examined. */
656 clear_bit(GLF_GREEDY, &gl->gl_flags);
657 spin_unlock(&gl->gl_spin);
658
659 gfs2_holder_uninit(gh);
660 kfree(container_of(gh, struct greedy, gr_gh));
661
662 spin_lock(&gl->gl_spin);
663
664 return 0;
665}
666
667/**
668 * run_queue - process holder structures on a glock
669 * @gl: the glock
670 *
671 */
672
673static void run_queue(struct gfs2_glock *gl)
674{
675 struct gfs2_holder *gh;
676 int blocked = 1;
677
678 for (;;) {
679 if (test_bit(GLF_LOCK, &gl->gl_flags))
680 break;
681
682 if (!list_empty(&gl->gl_waiters1)) {
683 gh = list_entry(gl->gl_waiters1.next,
684 struct gfs2_holder, gh_list);
685
686 if (test_bit(HIF_MUTEX, &gh->gh_iflags))
687 blocked = rq_mutex(gh);
688 else
689 gfs2_assert_warn(gl->gl_sbd, 0);
690
691 } else if (!list_empty(&gl->gl_waiters2) &&
692 !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
693 gh = list_entry(gl->gl_waiters2.next,
694 struct gfs2_holder, gh_list);
695
696 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
697 blocked = rq_demote(gh);
698 else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
699 blocked = rq_greedy(gh);
700 else
701 gfs2_assert_warn(gl->gl_sbd, 0);
702
703 } else if (!list_empty(&gl->gl_waiters3)) {
704 gh = list_entry(gl->gl_waiters3.next,
705 struct gfs2_holder, gh_list);
706
707 if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
708 blocked = rq_promote(gh);
709 else
710 gfs2_assert_warn(gl->gl_sbd, 0);
711
712 } else
713 break;
714
715 if (blocked)
716 break;
717 }
718}
719
720/**
721 * gfs2_glmutex_lock - acquire a local lock on a glock
722 * @gl: the glock
723 *
724 * Gives caller exclusive access to manipulate a glock structure.
725 */
726
727void gfs2_glmutex_lock(struct gfs2_glock *gl)
728{
729 struct gfs2_holder gh;
730
731 gfs2_holder_init(gl, 0, 0, &gh);
732 set_bit(HIF_MUTEX, &gh.gh_iflags);
733
734 spin_lock(&gl->gl_spin);
735 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
736 list_add_tail(&gh.gh_list, &gl->gl_waiters1);
737 else
738 complete(&gh.gh_wait);
739 spin_unlock(&gl->gl_spin);
740
741 wait_for_completion(&gh.gh_wait);
742 gfs2_holder_uninit(&gh);
743}
744
745/**
746 * gfs2_glmutex_trylock - try to acquire a local lock on a glock
747 * @gl: the glock
748 *
749 * Returns: 1 if the glock is acquired
750 */
751
752int gfs2_glmutex_trylock(struct gfs2_glock *gl)
753{
754 int acquired = 1;
755
756 spin_lock(&gl->gl_spin);
757 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
758 acquired = 0;
759 spin_unlock(&gl->gl_spin);
760
761 return acquired;
762}
763
764/**
765 * gfs2_glmutex_unlock - release a local lock on a glock
766 * @gl: the glock
767 *
768 */
769
770void gfs2_glmutex_unlock(struct gfs2_glock *gl)
771{
772 spin_lock(&gl->gl_spin);
773 clear_bit(GLF_LOCK, &gl->gl_flags);
774 run_queue(gl);
775 spin_unlock(&gl->gl_spin);
776}
777
778/**
779 * handle_callback - add a demote request to a lock's queue
780 * @gl: the glock
781 * @state: the state the caller wants us to change to
782 *
783 */
784
785static void handle_callback(struct gfs2_glock *gl, unsigned int state)
786{
787 struct gfs2_holder *gh, *new_gh = NULL;
788
789 restart:
790 spin_lock(&gl->gl_spin);
791
792 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
793 if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
794 gl->gl_req_gh != gh) {
795 if (gh->gh_state != state)
796 gh->gh_state = LM_ST_UNLOCKED;
797 goto out;
798 }
799 }
800
801 if (new_gh) {
802 list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
803 new_gh = NULL;
804 } else {
805 spin_unlock(&gl->gl_spin);
806
807 new_gh = gfs2_holder_get(gl, state,
808 LM_FLAG_TRY | GL_NEVER_RECURSE,
809 GFP_KERNEL | __GFP_NOFAIL),
810 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
811 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
812
813 goto restart;
814 }
815
816 out:
817 spin_unlock(&gl->gl_spin);
818
819 if (new_gh)
820 gfs2_holder_put(new_gh);
821}
822
823/**
824 * state_change - record that the glock is now in a different state
825 * @gl: the glock
826 * @new_state the new state
827 *
828 */
829
830static void state_change(struct gfs2_glock *gl, unsigned int new_state)
831{
832 struct gfs2_sbd *sdp = gl->gl_sbd;
833 int held1, held2;
834
835 held1 = (gl->gl_state != LM_ST_UNLOCKED);
836 held2 = (new_state != LM_ST_UNLOCKED);
837
838 if (held1 != held2) {
839 if (held2) {
840 atomic_inc(&sdp->sd_glock_held_count);
841 gfs2_glock_hold(gl);
842 } else {
843 atomic_dec(&sdp->sd_glock_held_count);
844 gfs2_glock_put(gl);
845 }
846 }
847
848 gl->gl_state = new_state;
849}
850
851/**
852 * xmote_bh - Called after the lock module is done acquiring a lock
853 * @gl: The glock in question
854 * @ret: the int returned from the lock module
855 *
856 */
857
858static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
859{
860 struct gfs2_sbd *sdp = gl->gl_sbd;
861 struct gfs2_glock_operations *glops = gl->gl_ops;
862 struct gfs2_holder *gh = gl->gl_req_gh;
863 int prev_state = gl->gl_state;
864 int op_done = 1;
865
866 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
867 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
868 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
869
870 state_change(gl, ret & LM_OUT_ST_MASK);
871
872 if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
873 if (glops->go_inval)
874 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
875 } else if (gl->gl_state == LM_ST_DEFERRED) {
876 /* We might not want to do this here.
877 Look at moving to the inode glops. */
878 if (glops->go_inval)
879 glops->go_inval(gl, DIO_DATA);
880 }
881
882 /* Deal with each possible exit condition */
883
884 if (!gh)
885 gl->gl_stamp = jiffies;
886
887 else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
888 spin_lock(&gl->gl_spin);
889 list_del_init(&gh->gh_list);
890 gh->gh_error = -EIO;
891 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
892 do_unrecurse(gh);
893 spin_unlock(&gl->gl_spin);
894
895 } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
896 spin_lock(&gl->gl_spin);
897 list_del_init(&gh->gh_list);
898 if (gl->gl_state == gh->gh_state ||
899 gl->gl_state == LM_ST_UNLOCKED)
900 gh->gh_error = 0;
901 else {
902 if (gfs2_assert_warn(sdp, gh->gh_flags &
903 (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1)
904 fs_warn(sdp, "ret = 0x%.8X\n", ret);
905 gh->gh_error = GLR_TRYFAILED;
906 }
907 spin_unlock(&gl->gl_spin);
908
909 if (ret & LM_OUT_CANCELED)
910 handle_callback(gl, LM_ST_UNLOCKED); /* Lame */
911
912 } else if (ret & LM_OUT_CANCELED) {
913 spin_lock(&gl->gl_spin);
914 list_del_init(&gh->gh_list);
915 gh->gh_error = GLR_CANCELED;
916 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
917 do_unrecurse(gh);
918 spin_unlock(&gl->gl_spin);
919
920 } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
921 spin_lock(&gl->gl_spin);
922 list_move_tail(&gh->gh_list, &gl->gl_holders);
923 gh->gh_error = 0;
924 set_bit(HIF_HOLDER, &gh->gh_iflags);
925 spin_unlock(&gl->gl_spin);
926
927 set_bit(HIF_FIRST, &gh->gh_iflags);
928
929 op_done = 0;
930
931 } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
932 spin_lock(&gl->gl_spin);
933 list_del_init(&gh->gh_list);
934 gh->gh_error = GLR_TRYFAILED;
935 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
936 do_unrecurse(gh);
937 spin_unlock(&gl->gl_spin);
938
939 } else {
940 if (gfs2_assert_withdraw(sdp, 0) == -1)
941 fs_err(sdp, "ret = 0x%.8X\n", ret);
942 }
943
944 if (glops->go_xmote_bh)
945 glops->go_xmote_bh(gl);
946
947 if (op_done) {
948 spin_lock(&gl->gl_spin);
949 gl->gl_req_gh = NULL;
950 gl->gl_req_bh = NULL;
951 clear_bit(GLF_LOCK, &gl->gl_flags);
952 run_queue(gl);
953 spin_unlock(&gl->gl_spin);
954 }
955
956 gfs2_glock_put(gl);
957
958 if (gh) {
959 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
960 gfs2_holder_put(gh);
961 else
962 complete(&gh->gh_wait);
963 }
964}
965
966/**
967 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
968 * @gl: The glock in question
969 * @state: the requested state
970 * @flags: modifier flags to the lock call
971 *
972 */
973
974void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
975{
976 struct gfs2_sbd *sdp = gl->gl_sbd;
977 struct gfs2_glock_operations *glops = gl->gl_ops;
978 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
979 LM_FLAG_NOEXP | LM_FLAG_ANY |
980 LM_FLAG_PRIORITY);
981 unsigned int lck_ret;
982
983 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
984 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
985 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
986 gfs2_assert_warn(sdp, state != gl->gl_state);
987
988 if (gl->gl_state == LM_ST_EXCLUSIVE) {
989 if (glops->go_sync)
990 glops->go_sync(gl,
991 DIO_METADATA | DIO_DATA | DIO_RELEASE);
992 }
993
994 gfs2_glock_hold(gl);
995 gl->gl_req_bh = xmote_bh;
996
997 atomic_inc(&sdp->sd_lm_lock_calls);
998
999 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state,
1000 lck_flags);
1001
1002 if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR)))
1003 return;
1004
1005 if (lck_ret & LM_OUT_ASYNC)
1006 gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
1007 else
1008 xmote_bh(gl, lck_ret);
1009}
1010
1011/**
1012 * drop_bh - Called after a lock module unlock completes
1013 * @gl: the glock
1014 * @ret: the return status
1015 *
1016 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
1017 * Doesn't drop the reference on the glock the top half took out
1018 *
1019 */
1020
1021static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
1022{
1023 struct gfs2_sbd *sdp = gl->gl_sbd;
1024 struct gfs2_glock_operations *glops = gl->gl_ops;
1025 struct gfs2_holder *gh = gl->gl_req_gh;
1026
1027 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1028
1029 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1030 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1031 gfs2_assert_warn(sdp, !ret);
1032
1033 state_change(gl, LM_ST_UNLOCKED);
1034
1035 if (glops->go_inval)
1036 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
1037
1038 if (gh) {
1039 spin_lock(&gl->gl_spin);
1040 list_del_init(&gh->gh_list);
1041 gh->gh_error = 0;
1042 spin_unlock(&gl->gl_spin);
1043 }
1044
1045 if (glops->go_drop_bh)
1046 glops->go_drop_bh(gl);
1047
1048 spin_lock(&gl->gl_spin);
1049 gl->gl_req_gh = NULL;
1050 gl->gl_req_bh = NULL;
1051 clear_bit(GLF_LOCK, &gl->gl_flags);
1052 run_queue(gl);
1053 spin_unlock(&gl->gl_spin);
1054
1055 gfs2_glock_put(gl);
1056
1057 if (gh) {
1058 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
1059 gfs2_holder_put(gh);
1060 else
1061 complete(&gh->gh_wait);
1062 }
1063}
1064
1065/**
1066 * gfs2_glock_drop_th - call into the lock module to unlock a lock
1067 * @gl: the glock
1068 *
1069 */
1070
1071void gfs2_glock_drop_th(struct gfs2_glock *gl)
1072{
1073 struct gfs2_sbd *sdp = gl->gl_sbd;
1074 struct gfs2_glock_operations *glops = gl->gl_ops;
1075 unsigned int ret;
1076
1077 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1078 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1079 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
1080
1081 if (gl->gl_state == LM_ST_EXCLUSIVE) {
1082 if (glops->go_sync)
1083 glops->go_sync(gl,
1084 DIO_METADATA | DIO_DATA | DIO_RELEASE);
1085 }
1086
1087 gfs2_glock_hold(gl);
1088 gl->gl_req_bh = drop_bh;
1089
1090 atomic_inc(&sdp->sd_lm_unlock_calls);
1091
1092 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
1093
1094 if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR)))
1095 return;
1096
1097 if (!ret)
1098 drop_bh(gl, ret);
1099 else
1100 gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
1101}
1102
1103/**
1104 * do_cancels - cancel requests for locks stuck waiting on an expire flag
1105 * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
1106 *
1107 * Don't cancel GL_NOCANCEL requests.
1108 */
1109
1110static void do_cancels(struct gfs2_holder *gh)
1111{
1112 struct gfs2_glock *gl = gh->gh_gl;
1113
1114 spin_lock(&gl->gl_spin);
1115
1116 while (gl->gl_req_gh != gh &&
1117 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1118 !list_empty(&gh->gh_list)) {
1119 if (gl->gl_req_bh &&
1120 !(gl->gl_req_gh &&
1121 (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
1122 spin_unlock(&gl->gl_spin);
1123 gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock);
1124 msleep(100);
1125 spin_lock(&gl->gl_spin);
1126 } else {
1127 spin_unlock(&gl->gl_spin);
1128 msleep(100);
1129 spin_lock(&gl->gl_spin);
1130 }
1131 }
1132
1133 spin_unlock(&gl->gl_spin);
1134}
1135
1136/**
1137 * glock_wait_internal - wait on a glock acquisition
1138 * @gh: the glock holder
1139 *
1140 * Returns: 0 on success
1141 */
1142
1143static int glock_wait_internal(struct gfs2_holder *gh)
1144{
1145 struct gfs2_glock *gl = gh->gh_gl;
1146 struct gfs2_sbd *sdp = gl->gl_sbd;
1147 struct gfs2_glock_operations *glops = gl->gl_ops;
1148
1149 if (test_bit(HIF_ABORTED, &gh->gh_iflags))
1150 return -EIO;
1151
1152 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1153 spin_lock(&gl->gl_spin);
1154 if (gl->gl_req_gh != gh &&
1155 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1156 !list_empty(&gh->gh_list)) {
1157 list_del_init(&gh->gh_list);
1158 gh->gh_error = GLR_TRYFAILED;
1159 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
1160 do_unrecurse(gh);
1161 run_queue(gl);
1162 spin_unlock(&gl->gl_spin);
1163 return gh->gh_error;
1164 }
1165 spin_unlock(&gl->gl_spin);
1166 }
1167
1168 if (gh->gh_flags & LM_FLAG_PRIORITY)
1169 do_cancels(gh);
1170
1171 wait_for_completion(&gh->gh_wait);
1172
1173 if (gh->gh_error)
1174 return gh->gh_error;
1175
1176 gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
1177 gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state,
1178 gh->gh_state,
1179 gh->gh_flags));
1180
1181 if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
1182 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1183
1184 if (glops->go_lock) {
1185 gh->gh_error = glops->go_lock(gh);
1186 if (gh->gh_error) {
1187 spin_lock(&gl->gl_spin);
1188 list_del_init(&gh->gh_list);
1189 if (test_and_clear_bit(HIF_RECURSE,
1190 &gh->gh_iflags))
1191 do_unrecurse(gh);
1192 spin_unlock(&gl->gl_spin);
1193 }
1194 }
1195
1196 spin_lock(&gl->gl_spin);
1197 gl->gl_req_gh = NULL;
1198 gl->gl_req_bh = NULL;
1199 clear_bit(GLF_LOCK, &gl->gl_flags);
1200 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
1201 handle_recurse(gh);
1202 run_queue(gl);
1203 spin_unlock(&gl->gl_spin);
1204 }
1205
1206 return gh->gh_error;
1207}
1208
1209static inline struct gfs2_holder *
1210find_holder_by_owner(struct list_head *head, struct task_struct *owner)
1211{
1212 struct gfs2_holder *gh;
1213
1214 list_for_each_entry(gh, head, gh_list) {
1215 if (gh->gh_owner == owner)
1216 return gh;
1217 }
1218
1219 return NULL;
1220}
1221
1222/**
1223 * recurse_check -
1224 *
1225 * Make sure the new holder is compatible with the pre-existing one.
1226 *
1227 */
1228
1229static int recurse_check(struct gfs2_holder *existing, struct gfs2_holder *new,
1230 unsigned int state)
1231{
1232 struct gfs2_sbd *sdp = existing->gh_gl->gl_sbd;
1233
1234 if (gfs2_assert_warn(sdp, (new->gh_flags & LM_FLAG_ANY) ||
1235 !(existing->gh_flags & LM_FLAG_ANY)))
1236 goto fail;
1237
1238 if (gfs2_assert_warn(sdp, (existing->gh_flags & GL_LOCAL_EXCL) ||
1239 !(new->gh_flags & GL_LOCAL_EXCL)))
1240 goto fail;
1241
1242 if (gfs2_assert_warn(sdp, relaxed_state_ok(state, new->gh_state,
1243 new->gh_flags)))
1244 goto fail;
1245
1246 return 0;
1247
1248 fail:
1249 set_bit(HIF_ABORTED, &new->gh_iflags);
1250 return -EINVAL;
1251}
1252
1253/**
1254 * add_to_queue - Add a holder to the wait queue (but look for recursion)
1255 * @gh: the holder structure to add
1256 *
1257 */
1258
1259static void add_to_queue(struct gfs2_holder *gh)
1260{
1261 struct gfs2_glock *gl = gh->gh_gl;
1262 struct gfs2_holder *existing;
1263
1264 if (!gh->gh_owner)
1265 goto out;
1266
1267 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
1268 if (existing) {
1269 if (recurse_check(existing, gh, gl->gl_state))
1270 return;
1271
1272 list_add_tail(&gh->gh_list, &gl->gl_holders);
1273 set_bit(HIF_HOLDER, &gh->gh_iflags);
1274
1275 gh->gh_error = 0;
1276 complete(&gh->gh_wait);
1277
1278 return;
1279 }
1280
1281 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
1282 if (existing) {
1283 if (recurse_check(existing, gh, existing->gh_state))
1284 return;
1285
1286 set_bit(HIF_RECURSE, &gh->gh_iflags);
1287 set_bit(HIF_RECURSE, &existing->gh_iflags);
1288
1289 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1290
1291 return;
1292 }
1293
1294 out:
1295 if (gh->gh_flags & LM_FLAG_PRIORITY)
1296 list_add(&gh->gh_list, &gl->gl_waiters3);
1297 else
1298 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1299}
1300
1301/**
1302 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1303 * @gh: the holder structure
1304 *
1305 * if (gh->gh_flags & GL_ASYNC), this never returns an error
1306 *
1307 * Returns: 0, GLR_TRYFAILED, or errno on failure
1308 */
1309
1310int gfs2_glock_nq(struct gfs2_holder *gh)
1311{
1312 struct gfs2_glock *gl = gh->gh_gl;
1313 struct gfs2_sbd *sdp = gl->gl_sbd;
1314 int error = 0;
1315
1316 atomic_inc(&sdp->sd_glock_nq_calls);
1317
1318 restart:
1319 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
1320 set_bit(HIF_ABORTED, &gh->gh_iflags);
1321 return -EIO;
1322 }
1323
1324 set_bit(HIF_PROMOTE, &gh->gh_iflags);
1325
1326 spin_lock(&gl->gl_spin);
1327 add_to_queue(gh);
1328 run_queue(gl);
1329 spin_unlock(&gl->gl_spin);
1330
1331 if (!(gh->gh_flags & GL_ASYNC)) {
1332 error = glock_wait_internal(gh);
1333 if (error == GLR_CANCELED) {
1334 msleep(1000);
1335 goto restart;
1336 }
1337 }
1338
1339 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1340
1341 return error;
1342}
1343
1344/**
1345 * gfs2_glock_poll - poll to see if an async request has been completed
1346 * @gh: the holder
1347 *
1348 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1349 */
1350
1351int gfs2_glock_poll(struct gfs2_holder *gh)
1352{
1353 struct gfs2_glock *gl = gh->gh_gl;
1354 int ready = 0;
1355
1356 spin_lock(&gl->gl_spin);
1357
1358 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1359 ready = 1;
1360 else if (list_empty(&gh->gh_list)) {
1361 if (gh->gh_error == GLR_CANCELED) {
1362 spin_unlock(&gl->gl_spin);
1363 msleep(1000);
1364 if (gfs2_glock_nq(gh))
1365 return 1;
1366 return 0;
1367 } else
1368 ready = 1;
1369 }
1370
1371 spin_unlock(&gl->gl_spin);
1372
1373 return ready;
1374}
1375
1376/**
1377 * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
1378 * @gh: the holder structure
1379 *
1380 * Returns: 0, GLR_TRYFAILED, or errno on failure
1381 */
1382
1383int gfs2_glock_wait(struct gfs2_holder *gh)
1384{
1385 int error;
1386
1387 error = glock_wait_internal(gh);
1388 if (error == GLR_CANCELED) {
1389 msleep(1000);
1390 gh->gh_flags &= ~GL_ASYNC;
1391 error = gfs2_glock_nq(gh);
1392 }
1393
1394 return error;
1395}
1396
1397/**
1398 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1399 * @gh: the glock holder
1400 *
1401 */
1402
1403void gfs2_glock_dq(struct gfs2_holder *gh)
1404{
1405 struct gfs2_glock *gl = gh->gh_gl;
1406 struct gfs2_sbd *sdp = gl->gl_sbd;
1407 struct gfs2_glock_operations *glops = gl->gl_ops;
1408
1409 atomic_inc(&sdp->sd_glock_dq_calls);
1410
1411 if (gh->gh_flags & GL_SYNC)
1412 set_bit(GLF_SYNC, &gl->gl_flags);
1413
1414 if (gh->gh_flags & GL_NOCACHE)
1415 handle_callback(gl, LM_ST_UNLOCKED);
1416
1417 gfs2_glmutex_lock(gl);
1418
1419 spin_lock(&gl->gl_spin);
1420 list_del_init(&gh->gh_list);
1421
1422 if (list_empty(&gl->gl_holders)) {
1423 spin_unlock(&gl->gl_spin);
1424
1425 if (glops->go_unlock)
1426 glops->go_unlock(gh);
1427
1428 if (test_bit(GLF_SYNC, &gl->gl_flags)) {
1429 if (glops->go_sync)
1430 glops->go_sync(gl, DIO_METADATA | DIO_DATA);
1431 }
1432
1433 gl->gl_stamp = jiffies;
1434
1435 spin_lock(&gl->gl_spin);
1436 }
1437
1438 clear_bit(GLF_LOCK, &gl->gl_flags);
1439 run_queue(gl);
1440 spin_unlock(&gl->gl_spin);
1441}
1442
1443/**
1444 * gfs2_glock_prefetch - Try to prefetch a glock
1445 * @gl: the glock
1446 * @state: the state to prefetch in
1447 * @flags: flags passed to go_xmote_th()
1448 *
1449 */
1450
1451void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, int flags)
1452{
1453 struct gfs2_glock_operations *glops = gl->gl_ops;
1454
1455 spin_lock(&gl->gl_spin);
1456
1457 if (test_bit(GLF_LOCK, &gl->gl_flags) ||
1458 !list_empty(&gl->gl_holders) ||
1459 !list_empty(&gl->gl_waiters1) ||
1460 !list_empty(&gl->gl_waiters2) ||
1461 !list_empty(&gl->gl_waiters3) ||
1462 relaxed_state_ok(gl->gl_state, state, flags)) {
1463 spin_unlock(&gl->gl_spin);
1464 return;
1465 }
1466
1467 set_bit(GLF_PREFETCH, &gl->gl_flags);
1468 set_bit(GLF_LOCK, &gl->gl_flags);
1469 spin_unlock(&gl->gl_spin);
1470
1471 glops->go_xmote_th(gl, state, flags);
1472
1473 atomic_inc(&gl->gl_sbd->sd_glock_prefetch_calls);
1474}
1475
1476/**
1477 * gfs2_glock_force_drop - Force a glock to be uncached
1478 * @gl: the glock
1479 *
1480 */
1481
1482void gfs2_glock_force_drop(struct gfs2_glock *gl)
1483{
1484 struct gfs2_holder gh;
1485
1486 gfs2_holder_init(gl, LM_ST_UNLOCKED, GL_NEVER_RECURSE, &gh);
1487 set_bit(HIF_DEMOTE, &gh.gh_iflags);
1488
1489 spin_lock(&gl->gl_spin);
1490 list_add_tail(&gh.gh_list, &gl->gl_waiters2);
1491 run_queue(gl);
1492 spin_unlock(&gl->gl_spin);
1493
1494 wait_for_completion(&gh.gh_wait);
1495 gfs2_holder_uninit(&gh);
1496}
1497
1498static void greedy_work(void *data)
1499{
1500 struct greedy *gr = (struct greedy *)data;
1501 struct gfs2_holder *gh = &gr->gr_gh;
1502 struct gfs2_glock *gl = gh->gh_gl;
1503 struct gfs2_glock_operations *glops = gl->gl_ops;
1504
1505 clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1506
1507 if (glops->go_greedy)
1508 glops->go_greedy(gl);
1509
1510 spin_lock(&gl->gl_spin);
1511
1512 if (list_empty(&gl->gl_waiters2)) {
1513 clear_bit(GLF_GREEDY, &gl->gl_flags);
1514 spin_unlock(&gl->gl_spin);
1515 gfs2_holder_uninit(gh);
1516 kfree(gr);
1517 } else {
1518 gfs2_glock_hold(gl);
1519 list_add_tail(&gh->gh_list, &gl->gl_waiters2);
1520 run_queue(gl);
1521 spin_unlock(&gl->gl_spin);
1522 gfs2_glock_put(gl);
1523 }
1524}
1525
1526/**
1527 * gfs2_glock_be_greedy -
1528 * @gl:
1529 * @time:
1530 *
1531 * Returns: 0 if go_greedy will be called, 1 otherwise
1532 */
1533
1534int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
1535{
1536 struct greedy *gr;
1537 struct gfs2_holder *gh;
1538
1539 if (!time ||
1540 gl->gl_sbd->sd_args.ar_localcaching ||
1541 test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
1542 return 1;
1543
1544 gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
1545 if (!gr) {
1546 clear_bit(GLF_GREEDY, &gl->gl_flags);
1547 return 1;
1548 }
1549 gh = &gr->gr_gh;
1550
1551 gfs2_holder_init(gl, 0, GL_NEVER_RECURSE, gh);
1552 set_bit(HIF_GREEDY, &gh->gh_iflags);
1553 INIT_WORK(&gr->gr_work, greedy_work, gr);
1554
1555 set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1556 schedule_delayed_work(&gr->gr_work, time);
1557
1558 return 0;
1559}
1560
1561/**
1562 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
1563 * @gl: the glock
1564 * @state: the state we're requesting
1565 * @flags: the modifier flags
1566 * @gh: the holder structure
1567 *
1568 * Returns: 0, GLR_*, or errno
1569 */
1570
1571int gfs2_glock_nq_init(struct gfs2_glock *gl, unsigned int state, int flags,
1572 struct gfs2_holder *gh)
1573{
1574 int error;
1575
1576 gfs2_holder_init(gl, state, flags, gh);
1577
1578 error = gfs2_glock_nq(gh);
1579 if (error)
1580 gfs2_holder_uninit(gh);
1581
1582 return error;
1583}
1584
1585/**
1586 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1587 * @gh: the holder structure
1588 *
1589 */
1590
1591void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1592{
1593 gfs2_glock_dq(gh);
1594 gfs2_holder_uninit(gh);
1595}
1596
1597/**
1598 * gfs2_glock_nq_num - acquire a glock based on lock number
1599 * @sdp: the filesystem
1600 * @number: the lock number
1601 * @glops: the glock operations for the type of glock
1602 * @state: the state to acquire the glock in
1603 * @flags: modifier flags for the aquisition
1604 * @gh: the struct gfs2_holder
1605 *
1606 * Returns: errno
1607 */
1608
1609int gfs2_glock_nq_num(struct gfs2_sbd *sdp, uint64_t number,
1610 struct gfs2_glock_operations *glops, unsigned int state,
1611 int flags, struct gfs2_holder *gh)
1612{
1613 struct gfs2_glock *gl;
1614 int error;
1615
1616 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1617 if (!error) {
1618 error = gfs2_glock_nq_init(gl, state, flags, gh);
1619 gfs2_glock_put(gl);
1620 }
1621
1622 return error;
1623}
1624
1625/**
1626 * glock_compare - Compare two struct gfs2_glock structures for sorting
1627 * @arg_a: the first structure
1628 * @arg_b: the second structure
1629 *
1630 */
1631
1632static int glock_compare(const void *arg_a, const void *arg_b)
1633{
1634 struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1635 struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1636 struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1637 struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1638 int ret = 0;
1639
1640 if (a->ln_number > b->ln_number)
1641 ret = 1;
1642 else if (a->ln_number < b->ln_number)
1643 ret = -1;
1644 else {
1645 if (gh_a->gh_state == LM_ST_SHARED &&
1646 gh_b->gh_state == LM_ST_EXCLUSIVE)
1647 ret = 1;
1648 else if (!(gh_a->gh_flags & GL_LOCAL_EXCL) &&
1649 (gh_b->gh_flags & GL_LOCAL_EXCL))
1650 ret = 1;
1651 }
1652
1653 return ret;
1654}
1655
1656/**
1657 * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1658 * @num_gh: the number of structures
1659 * @ghs: an array of struct gfs2_holder structures
1660 *
1661 * Returns: 0 on success (all glocks acquired),
1662 * errno on failure (no glocks acquired)
1663 */
1664
1665static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1666 struct gfs2_holder **p)
1667{
1668 unsigned int x;
1669 int error = 0;
1670
1671 for (x = 0; x < num_gh; x++)
1672 p[x] = &ghs[x];
1673
1674 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1675
1676 for (x = 0; x < num_gh; x++) {
1677 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1678
1679 error = gfs2_glock_nq(p[x]);
1680 if (error) {
1681 while (x--)
1682 gfs2_glock_dq(p[x]);
1683 break;
1684 }
1685 }
1686
1687 return error;
1688}
1689
1690/**
1691 * gfs2_glock_nq_m - acquire multiple glocks
1692 * @num_gh: the number of structures
1693 * @ghs: an array of struct gfs2_holder structures
1694 *
1695 * Figure out how big an impact this function has. Either:
1696 * 1) Replace this code with code that calls gfs2_glock_prefetch()
1697 * 2) Forget async stuff and just call nq_m_sync()
1698 * 3) Leave it like it is
1699 *
1700 * Returns: 0 on success (all glocks acquired),
1701 * errno on failure (no glocks acquired)
1702 */
1703
1704int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1705{
1706 int *e;
1707 unsigned int x;
1708 int borked = 0, serious = 0;
1709 int error = 0;
1710
1711 if (!num_gh)
1712 return 0;
1713
1714 if (num_gh == 1) {
1715 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1716 return gfs2_glock_nq(ghs);
1717 }
1718
1719 e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1720 if (!e)
1721 return -ENOMEM;
1722
1723 for (x = 0; x < num_gh; x++) {
1724 ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
1725 error = gfs2_glock_nq(&ghs[x]);
1726 if (error) {
1727 borked = 1;
1728 serious = error;
1729 num_gh = x;
1730 break;
1731 }
1732 }
1733
1734 for (x = 0; x < num_gh; x++) {
1735 error = e[x] = glock_wait_internal(&ghs[x]);
1736 if (error) {
1737 borked = 1;
1738 if (error != GLR_TRYFAILED && error != GLR_CANCELED)
1739 serious = error;
1740 }
1741 }
1742
1743 if (!borked) {
1744 kfree(e);
1745 return 0;
1746 }
1747
1748 for (x = 0; x < num_gh; x++)
1749 if (!e[x])
1750 gfs2_glock_dq(&ghs[x]);
1751
1752 if (serious)
1753 error = serious;
1754 else {
1755 for (x = 0; x < num_gh; x++)
1756 gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
1757 &ghs[x]);
1758 error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
1759 }
1760
1761 kfree(e);
1762
1763 return error;
1764}
1765
1766/**
1767 * gfs2_glock_dq_m - release multiple glocks
1768 * @num_gh: the number of structures
1769 * @ghs: an array of struct gfs2_holder structures
1770 *
1771 */
1772
1773void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1774{
1775 unsigned int x;
1776
1777 for (x = 0; x < num_gh; x++)
1778 gfs2_glock_dq(&ghs[x]);
1779}
1780
1781/**
1782 * gfs2_glock_dq_uninit_m - release multiple glocks
1783 * @num_gh: the number of structures
1784 * @ghs: an array of struct gfs2_holder structures
1785 *
1786 */
1787
1788void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1789{
1790 unsigned int x;
1791
1792 for (x = 0; x < num_gh; x++)
1793 gfs2_glock_dq_uninit(&ghs[x]);
1794}
1795
1796/**
1797 * gfs2_glock_prefetch_num - prefetch a glock based on lock number
1798 * @sdp: the filesystem
1799 * @number: the lock number
1800 * @glops: the glock operations for the type of glock
1801 * @state: the state to acquire the glock in
1802 * @flags: modifier flags for the aquisition
1803 *
1804 * Returns: errno
1805 */
1806
1807void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
1808 struct gfs2_glock_operations *glops,
1809 unsigned int state, int flags)
1810{
1811 struct gfs2_glock *gl;
1812 int error;
1813
1814 if (atomic_read(&sdp->sd_reclaim_count) <
1815 gfs2_tune_get(sdp, gt_reclaim_limit)) {
1816 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1817 if (!error) {
1818 gfs2_glock_prefetch(gl, state, flags);
1819 gfs2_glock_put(gl);
1820 }
1821 }
1822}
1823
1824/**
1825 * gfs2_lvb_hold - attach a LVB from a glock
1826 * @gl: The glock in question
1827 *
1828 */
1829
1830int gfs2_lvb_hold(struct gfs2_glock *gl)
1831{
1832 int error;
1833
1834 gfs2_glmutex_lock(gl);
1835
1836 if (!atomic_read(&gl->gl_lvb_count)) {
1837 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1838 if (error) {
1839 gfs2_glmutex_unlock(gl);
1840 return error;
1841 }
1842 gfs2_glock_hold(gl);
1843 }
1844 atomic_inc(&gl->gl_lvb_count);
1845
1846 gfs2_glmutex_unlock(gl);
1847
1848 return 0;
1849}
1850
1851/**
1852 * gfs2_lvb_unhold - detach a LVB from a glock
1853 * @gl: The glock in question
1854 *
1855 */
1856
1857void gfs2_lvb_unhold(struct gfs2_glock *gl)
1858{
1859 gfs2_glock_hold(gl);
1860 gfs2_glmutex_lock(gl);
1861
1862 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1863 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1864 gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1865 gl->gl_lvb = NULL;
1866 gfs2_glock_put(gl);
1867 }
1868
1869 gfs2_glmutex_unlock(gl);
1870 gfs2_glock_put(gl);
1871}
1872
1873void gfs2_lvb_sync(struct gfs2_glock *gl)
1874{
1875 gfs2_glmutex_lock(gl);
1876
1877 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count));
1878 if (!gfs2_assert_warn(gl->gl_sbd, gfs2_glock_is_held_excl(gl)))
1879 gfs2_lm_sync_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1880
1881 gfs2_glmutex_unlock(gl);
1882}
1883
1884static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1885 unsigned int state)
1886{
1887 struct gfs2_glock *gl;
1888
1889 gl = gfs2_glock_find(sdp, name);
1890 if (!gl)
1891 return;
1892
1893 if (gl->gl_ops->go_callback)
1894 gl->gl_ops->go_callback(gl, state);
1895 handle_callback(gl, state);
1896
1897 spin_lock(&gl->gl_spin);
1898 run_queue(gl);
1899 spin_unlock(&gl->gl_spin);
1900
1901 gfs2_glock_put(gl);
1902}
1903
1904/**
1905 * gfs2_glock_cb - Callback used by locking module
1906 * @fsdata: Pointer to the superblock
1907 * @type: Type of callback
1908 * @data: Type dependent data pointer
1909 *
1910 * Called by the locking module when it wants to tell us something.
1911 * Either we need to drop a lock, one of our ASYNC requests completed, or
1912 * a journal from another client needs to be recovered.
1913 */
1914
1915void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data)
1916{
1917 struct gfs2_sbd *sdp = (struct gfs2_sbd *)fsdata;
1918
1919 atomic_inc(&sdp->sd_lm_callbacks);
1920
1921 switch (type) {
1922 case LM_CB_NEED_E:
1923 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_UNLOCKED);
1924 return;
1925
1926 case LM_CB_NEED_D:
1927 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_DEFERRED);
1928 return;
1929
1930 case LM_CB_NEED_S:
1931 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_SHARED);
1932 return;
1933
1934 case LM_CB_ASYNC: {
1935 struct lm_async_cb *async = (struct lm_async_cb *)data;
1936 struct gfs2_glock *gl;
1937
1938 gl = gfs2_glock_find(sdp, &async->lc_name);
1939 if (gfs2_assert_warn(sdp, gl))
1940 return;
1941 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1942 gl->gl_req_bh(gl, async->lc_ret);
1943 gfs2_glock_put(gl);
1944
1945 return;
1946 }
1947
1948 case LM_CB_NEED_RECOVERY:
1949 gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
1950 if (sdp->sd_recoverd_process)
1951 wake_up_process(sdp->sd_recoverd_process);
1952 return;
1953
1954 case LM_CB_DROPLOCKS:
1955 gfs2_gl_hash_clear(sdp, NO_WAIT);
1956 gfs2_quota_scan(sdp);
1957 return;
1958
1959 default:
1960 gfs2_assert_warn(sdp, 0);
1961 return;
1962 }
1963}
1964
1965/**
1966 * gfs2_try_toss_inode - try to remove a particular inode struct from cache
1967 * sdp: the filesystem
1968 * inum: the inode number
1969 *
1970 */
1971
1972void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum)
1973{
1974 struct gfs2_glock *gl;
1975 struct gfs2_inode *ip;
1976 int error;
1977
1978 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops,
1979 NO_CREATE, &gl);
1980 if (error || !gl)
1981 return;
1982
1983 if (!gfs2_glmutex_trylock(gl))
1984 goto out;
1985
1986 ip = get_gl2ip(gl);
1987 if (!ip)
1988 goto out_unlock;
1989
1990 if (atomic_read(&ip->i_count))
1991 goto out_unlock;
1992
1993 gfs2_inode_destroy(ip);
1994
1995 out_unlock:
1996 gfs2_glmutex_unlock(gl);
1997
1998 out:
1999 gfs2_glock_put(gl);
2000}
2001
2002/**
2003 * gfs2_iopen_go_callback - Try to kick the inode/vnode associated with an
2004 * iopen glock from memory
2005 * @io_gl: the iopen glock
2006 * @state: the state into which the glock should be put
2007 *
2008 */
2009
2010void gfs2_iopen_go_callback(struct gfs2_glock *io_gl, unsigned int state)
2011{
2012 struct gfs2_glock *i_gl;
2013
2014 if (state != LM_ST_UNLOCKED)
2015 return;
2016
2017 spin_lock(&io_gl->gl_spin);
2018 i_gl = get_gl2gl(io_gl);
2019 if (i_gl) {
2020 gfs2_glock_hold(i_gl);
2021 spin_unlock(&io_gl->gl_spin);
2022 } else {
2023 spin_unlock(&io_gl->gl_spin);
2024 return;
2025 }
2026
2027 if (gfs2_glmutex_trylock(i_gl)) {
2028 struct gfs2_inode *ip = get_gl2ip(i_gl);
2029 if (ip) {
2030 gfs2_try_toss_vnode(ip);
2031 gfs2_glmutex_unlock(i_gl);
2032 gfs2_glock_schedule_for_reclaim(i_gl);
2033 goto out;
2034 }
2035 gfs2_glmutex_unlock(i_gl);
2036 }
2037
2038 out:
2039 gfs2_glock_put(i_gl);
2040}
2041
2042/**
2043 * demote_ok - Check to see if it's ok to unlock a glock
2044 * @gl: the glock
2045 *
2046 * Returns: 1 if it's ok
2047 */
2048
2049static int demote_ok(struct gfs2_glock *gl)
2050{
2051 struct gfs2_sbd *sdp = gl->gl_sbd;
2052 struct gfs2_glock_operations *glops = gl->gl_ops;
2053 int demote = 1;
2054
2055 if (test_bit(GLF_STICKY, &gl->gl_flags))
2056 demote = 0;
2057 else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
2058 demote = time_after_eq(jiffies,
2059 gl->gl_stamp +
2060 gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
2061 else if (glops->go_demote_ok)
2062 demote = glops->go_demote_ok(gl);
2063
2064 return demote;
2065}
2066
2067/**
2068 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
2069 * @gl: the glock
2070 *
2071 */
2072
2073void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
2074{
2075 struct gfs2_sbd *sdp = gl->gl_sbd;
2076
2077 spin_lock(&sdp->sd_reclaim_lock);
2078 if (list_empty(&gl->gl_reclaim)) {
2079 gfs2_glock_hold(gl);
2080 list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
2081 atomic_inc(&sdp->sd_reclaim_count);
2082 }
2083 spin_unlock(&sdp->sd_reclaim_lock);
2084
2085 wake_up(&sdp->sd_reclaim_wq);
2086}
2087
2088/**
2089 * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
2090 * @sdp: the filesystem
2091 *
2092 * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
2093 * different glock and we notice that there are a lot of glocks in the
2094 * reclaim list.
2095 *
2096 */
2097
2098void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
2099{
2100 struct gfs2_glock *gl;
2101
2102 spin_lock(&sdp->sd_reclaim_lock);
2103 if (list_empty(&sdp->sd_reclaim_list)) {
2104 spin_unlock(&sdp->sd_reclaim_lock);
2105 return;
2106 }
2107 gl = list_entry(sdp->sd_reclaim_list.next,
2108 struct gfs2_glock, gl_reclaim);
2109 list_del_init(&gl->gl_reclaim);
2110 spin_unlock(&sdp->sd_reclaim_lock);
2111
2112 atomic_dec(&sdp->sd_reclaim_count);
2113 atomic_inc(&sdp->sd_reclaimed);
2114
2115 if (gfs2_glmutex_trylock(gl)) {
2116 if (gl->gl_ops == &gfs2_inode_glops) {
2117 struct gfs2_inode *ip = get_gl2ip(gl);
2118 if (ip && !atomic_read(&ip->i_count))
2119 gfs2_inode_destroy(ip);
2120 }
2121 if (queue_empty(gl, &gl->gl_holders) &&
2122 gl->gl_state != LM_ST_UNLOCKED &&
2123 demote_ok(gl))
2124 handle_callback(gl, LM_ST_UNLOCKED);
2125 gfs2_glmutex_unlock(gl);
2126 }
2127
2128 gfs2_glock_put(gl);
2129}
2130
2131/**
2132 * examine_bucket - Call a function for glock in a hash bucket
2133 * @examiner: the function
2134 * @sdp: the filesystem
2135 * @bucket: the bucket
2136 *
2137 * Returns: 1 if the bucket has entries
2138 */
2139
2140static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
2141 struct gfs2_gl_hash_bucket *bucket)
2142{
2143 struct glock_plug plug;
2144 struct list_head *tmp;
2145 struct gfs2_glock *gl;
2146 int entries;
2147
2148 /* Add "plug" to end of bucket list, work back up list from there */
2149 memset(&plug.gl_flags, 0, sizeof(unsigned long));
2150 set_bit(GLF_PLUG, &plug.gl_flags);
2151
2152 write_lock(&bucket->hb_lock);
2153 list_add(&plug.gl_list, &bucket->hb_list);
2154 write_unlock(&bucket->hb_lock);
2155
2156 for (;;) {
2157 write_lock(&bucket->hb_lock);
2158
2159 for (;;) {
2160 tmp = plug.gl_list.next;
2161
2162 if (tmp == &bucket->hb_list) {
2163 list_del(&plug.gl_list);
2164 entries = !list_empty(&bucket->hb_list);
2165 write_unlock(&bucket->hb_lock);
2166 return entries;
2167 }
2168 gl = list_entry(tmp, struct gfs2_glock, gl_list);
2169
2170 /* Move plug up list */
2171 list_move(&plug.gl_list, &gl->gl_list);
2172
2173 if (test_bit(GLF_PLUG, &gl->gl_flags))
2174 continue;
2175
2176 /* examiner() must glock_put() */
2177 gfs2_glock_hold(gl);
2178
2179 break;
2180 }
2181
2182 write_unlock(&bucket->hb_lock);
2183
2184 examiner(gl);
2185 }
2186}
2187
2188/**
2189 * scan_glock - look at a glock and see if we can reclaim it
2190 * @gl: the glock to look at
2191 *
2192 */
2193
2194static void scan_glock(struct gfs2_glock *gl)
2195{
2196 if (gfs2_glmutex_trylock(gl)) {
2197 if (gl->gl_ops == &gfs2_inode_glops) {
2198 struct gfs2_inode *ip = get_gl2ip(gl);
2199 if (ip && !atomic_read(&ip->i_count))
2200 goto out_schedule;
2201 }
2202 if (queue_empty(gl, &gl->gl_holders) &&
2203 gl->gl_state != LM_ST_UNLOCKED &&
2204 demote_ok(gl))
2205 goto out_schedule;
2206
2207 gfs2_glmutex_unlock(gl);
2208 }
2209
2210 gfs2_glock_put(gl);
2211
2212 return;
2213
2214 out_schedule:
2215 gfs2_glmutex_unlock(gl);
2216 gfs2_glock_schedule_for_reclaim(gl);
2217 gfs2_glock_put(gl);
2218}
2219
2220/**
2221 * gfs2_scand_internal - Look for glocks and inodes to toss from memory
2222 * @sdp: the filesystem
2223 *
2224 */
2225
2226void gfs2_scand_internal(struct gfs2_sbd *sdp)
2227{
2228 unsigned int x;
2229
2230 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2231 examine_bucket(scan_glock, sdp, &sdp->sd_gl_hash[x]);
2232 cond_resched();
2233 }
2234}
2235
2236/**
2237 * clear_glock - look at a glock and see if we can free it from glock cache
2238 * @gl: the glock to look at
2239 *
2240 */
2241
2242static void clear_glock(struct gfs2_glock *gl)
2243{
2244 struct gfs2_sbd *sdp = gl->gl_sbd;
2245 int released;
2246
2247 spin_lock(&sdp->sd_reclaim_lock);
2248 if (!list_empty(&gl->gl_reclaim)) {
2249 list_del_init(&gl->gl_reclaim);
2250 atomic_dec(&sdp->sd_reclaim_count);
2251 released = gfs2_glock_put(gl);
2252 gfs2_assert(sdp, !released);
2253 }
2254 spin_unlock(&sdp->sd_reclaim_lock);
2255
2256 if (gfs2_glmutex_trylock(gl)) {
2257 if (gl->gl_ops == &gfs2_inode_glops) {
2258 struct gfs2_inode *ip = get_gl2ip(gl);
2259 if (ip && !atomic_read(&ip->i_count))
2260 gfs2_inode_destroy(ip);
2261 }
2262 if (queue_empty(gl, &gl->gl_holders) &&
2263 gl->gl_state != LM_ST_UNLOCKED)
2264 handle_callback(gl, LM_ST_UNLOCKED);
2265
2266 gfs2_glmutex_unlock(gl);
2267 }
2268
2269 gfs2_glock_put(gl);
2270}
2271
2272/**
2273 * gfs2_gl_hash_clear - Empty out the glock hash table
2274 * @sdp: the filesystem
2275 * @wait: wait until it's all gone
2276 *
2277 * Called when unmounting the filesystem, or when inter-node lock manager
2278 * requests DROPLOCKS because it is running out of capacity.
2279 */
2280
2281void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
2282{
2283 unsigned long t;
2284 unsigned int x;
2285 int cont;
2286
2287 t = jiffies;
2288
2289 for (;;) {
2290 cont = 0;
2291
2292 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
2293 if (examine_bucket(clear_glock, sdp,
2294 &sdp->sd_gl_hash[x]))
2295 cont = 1;
2296
2297 if (!wait || !cont)
2298 break;
2299
2300 if (time_after_eq(jiffies,
2301 t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
2302 fs_warn(sdp, "Unmount seems to be stalled. "
2303 "Dumping lock state...\n");
2304 gfs2_dump_lockstate(sdp);
2305 t = jiffies;
2306 }
2307
2308 /* invalidate_inodes() requires that the sb inodes list
2309 not change, but an async completion callback for an
2310 unlock can occur which does glock_put() which
2311 can call iput() which will change the sb inodes list.
2312 invalidate_inodes_mutex prevents glock_put()'s during
2313 an invalidate_inodes() */
2314
2315 down(&sdp->sd_invalidate_inodes_mutex);
2316 invalidate_inodes(sdp->sd_vfs);
2317 up(&sdp->sd_invalidate_inodes_mutex);
2318 yield();
2319 }
2320}
2321
2322/*
2323 * Diagnostic routines to help debug distributed deadlock
2324 */
2325
2326/**
2327 * dump_holder - print information about a glock holder
2328 * @str: a string naming the type of holder
2329 * @gh: the glock holder
2330 *
2331 * Returns: 0 on success, -ENOBUFS when we run out of space
2332 */
2333
2334static int dump_holder(char *str, struct gfs2_holder *gh)
2335{
2336 unsigned int x;
2337 int error = -ENOBUFS;
2338
2339 printk(" %s\n", str);
2340 printk(" owner = %ld\n",
2341 (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
2342 printk(" gh_state = %u\n", gh->gh_state);
2343 printk(" gh_flags =");
2344 for (x = 0; x < 32; x++)
2345 if (gh->gh_flags & (1 << x))
2346 printk(" %u", x);
2347 printk(" \n");
2348 printk(" error = %d\n", gh->gh_error);
2349 printk(" gh_iflags =");
2350 for (x = 0; x < 32; x++)
2351 if (test_bit(x, &gh->gh_iflags))
2352 printk(" %u", x);
2353 printk(" \n");
2354
2355 error = 0;
2356
2357 return error;
2358}
2359
2360/**
2361 * dump_inode - print information about an inode
2362 * @ip: the inode
2363 *
2364 * Returns: 0 on success, -ENOBUFS when we run out of space
2365 */
2366
2367static int dump_inode(struct gfs2_inode *ip)
2368{
2369 unsigned int x;
2370 int error = -ENOBUFS;
2371
2372 printk(" Inode:\n");
2373 printk(" num = %llu %llu\n",
2374 ip->i_num.no_formal_ino, ip->i_num.no_addr);
2375 printk(" type = %u\n", IF2DT(ip->i_di.di_mode));
2376 printk(" i_count = %d\n", atomic_read(&ip->i_count));
2377 printk(" i_flags =");
2378 for (x = 0; x < 32; x++)
2379 if (test_bit(x, &ip->i_flags))
2380 printk(" %u", x);
2381 printk(" \n");
2382 printk(" vnode = %s\n", (ip->i_vnode) ? "yes" : "no");
2383
2384 error = 0;
2385
2386 return error;
2387}
2388
2389/**
2390 * dump_glock - print information about a glock
2391 * @gl: the glock
2392 * @count: where we are in the buffer
2393 *
2394 * Returns: 0 on success, -ENOBUFS when we run out of space
2395 */
2396
2397static int dump_glock(struct gfs2_glock *gl)
2398{
2399 struct gfs2_holder *gh;
2400 unsigned int x;
2401 int error = -ENOBUFS;
2402
2403 spin_lock(&gl->gl_spin);
2404
2405 printk("Glock (%u, %llu)\n",
2406 gl->gl_name.ln_type,
2407 gl->gl_name.ln_number);
2408 printk(" gl_flags =");
2409 for (x = 0; x < 32; x++)
2410 if (test_bit(x, &gl->gl_flags))
2411 printk(" %u", x);
2412 printk(" \n");
2413 printk(" gl_ref = %d\n", atomic_read(&gl->gl_ref.refcount));
2414 printk(" gl_state = %u\n", gl->gl_state);
2415 printk(" req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
2416 printk(" req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
2417 printk(" lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
2418 printk(" object = %s\n", (gl->gl_object) ? "yes" : "no");
2419 printk(" le = %s\n",
2420 (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
2421 printk(" reclaim = %s\n",
2422 (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
2423 if (gl->gl_aspace)
2424 printk(" aspace = %lu\n",
2425 gl->gl_aspace->i_mapping->nrpages);
2426 else
2427 printk(" aspace = no\n");
2428 printk(" ail = %d\n", atomic_read(&gl->gl_ail_count));
2429 if (gl->gl_req_gh) {
2430 error = dump_holder("Request", gl->gl_req_gh);
2431 if (error)
2432 goto out;
2433 }
2434 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
2435 error = dump_holder("Holder", gh);
2436 if (error)
2437 goto out;
2438 }
2439 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
2440 error = dump_holder("Waiter1", gh);
2441 if (error)
2442 goto out;
2443 }
2444 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
2445 error = dump_holder("Waiter2", gh);
2446 if (error)
2447 goto out;
2448 }
2449 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
2450 error = dump_holder("Waiter3", gh);
2451 if (error)
2452 goto out;
2453 }
2454 if (gl->gl_ops == &gfs2_inode_glops && get_gl2ip(gl)) {
2455 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
2456 list_empty(&gl->gl_holders)) {
2457 error = dump_inode(get_gl2ip(gl));
2458 if (error)
2459 goto out;
2460 } else {
2461 error = -ENOBUFS;
2462 printk(" Inode: busy\n");
2463 }
2464 }
2465
2466 error = 0;
2467
2468 out:
2469 spin_unlock(&gl->gl_spin);
2470
2471 return error;
2472}
2473
2474/**
2475 * gfs2_dump_lockstate - print out the current lockstate
2476 * @sdp: the filesystem
2477 * @ub: the buffer to copy the information into
2478 *
2479 * If @ub is NULL, dump the lockstate to the console.
2480 *
2481 */
2482
2483int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
2484{
2485 struct gfs2_gl_hash_bucket *bucket;
2486 struct gfs2_glock *gl;
2487 unsigned int x;
2488 int error = 0;
2489
2490 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2491 bucket = &sdp->sd_gl_hash[x];
2492
2493 read_lock(&bucket->hb_lock);
2494
2495 list_for_each_entry(gl, &bucket->hb_list, gl_list) {
2496 if (test_bit(GLF_PLUG, &gl->gl_flags))
2497 continue;
2498
2499 error = dump_glock(gl);
2500 if (error)
2501 break;
2502 }
2503
2504 read_unlock(&bucket->hb_lock);
2505
2506 if (error)
2507 break;
2508 }
2509
2510
2511 return error;
2512}
2513
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
new file mode 100644
index 000000000000..06847ebebdee
--- /dev/null
+++ b/fs/gfs2/glock.h
@@ -0,0 +1,143 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GLOCK_DOT_H__
11#define __GLOCK_DOT_H__
12
13/* Flags for lock requests; used in gfs2_holder gh_flag field.
14 From lm_interface.h:
15#define LM_FLAG_TRY 0x00000001
16#define LM_FLAG_TRY_1CB 0x00000002
17#define LM_FLAG_NOEXP 0x00000004
18#define LM_FLAG_ANY 0x00000008
19#define LM_FLAG_PRIORITY 0x00000010 */
20
21#define GL_LOCAL_EXCL 0x00000020
22#define GL_ASYNC 0x00000040
23#define GL_EXACT 0x00000080
24#define GL_SKIP 0x00000100
25#define GL_ATIME 0x00000200
26#define GL_NOCACHE 0x00000400
27#define GL_SYNC 0x00000800
28#define GL_NOCANCEL 0x00001000
29#define GL_NEVER_RECURSE 0x00002000
30
31#define GLR_TRYFAILED 13
32#define GLR_CANCELED 14
33
34static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
35{
36 struct gfs2_holder *gh;
37 int locked = 0;
38
39 /* Look in glock's list of holders for one with current task as owner */
40 spin_lock(&gl->gl_spin);
41 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
42 if (gh->gh_owner == current) {
43 locked = 1;
44 break;
45 }
46 }
47 spin_unlock(&gl->gl_spin);
48
49 return locked;
50}
51
52static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
53{
54 return (gl->gl_state == LM_ST_EXCLUSIVE);
55}
56
57static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
58{
59 return (gl->gl_state == LM_ST_DEFERRED);
60}
61
62static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
63{
64 return (gl->gl_state == LM_ST_SHARED);
65}
66
67static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
68{
69 int ret;
70 spin_lock(&gl->gl_spin);
71 ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
72 spin_unlock(&gl->gl_spin);
73 return ret;
74}
75
76struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
77 struct lm_lockname *name);
78int gfs2_glock_get(struct gfs2_sbd *sdp,
79 uint64_t number, struct gfs2_glock_operations *glops,
80 int create, struct gfs2_glock **glp);
81void gfs2_glock_hold(struct gfs2_glock *gl);
82int gfs2_glock_put(struct gfs2_glock *gl);
83
84void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, int flags,
85 struct gfs2_holder *gh);
86void gfs2_holder_reinit(unsigned int state, int flags, struct gfs2_holder *gh);
87void gfs2_holder_uninit(struct gfs2_holder *gh);
88struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, unsigned int state,
89 int flags, gfp_t gfp_flags);
90void gfs2_holder_put(struct gfs2_holder *gh);
91
92void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
93void gfs2_glock_drop_th(struct gfs2_glock *gl);
94
95void gfs2_glmutex_lock(struct gfs2_glock *gl);
96int gfs2_glmutex_trylock(struct gfs2_glock *gl);
97void gfs2_glmutex_unlock(struct gfs2_glock *gl);
98
99int gfs2_glock_nq(struct gfs2_holder *gh);
100int gfs2_glock_poll(struct gfs2_holder *gh);
101int gfs2_glock_wait(struct gfs2_holder *gh);
102void gfs2_glock_dq(struct gfs2_holder *gh);
103
104void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, int flags);
105void gfs2_glock_force_drop(struct gfs2_glock *gl);
106
107int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
108
109int gfs2_glock_nq_init(struct gfs2_glock *gl, unsigned int state, int flags,
110 struct gfs2_holder *gh);
111void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
112int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
113 uint64_t number, struct gfs2_glock_operations *glops,
114 unsigned int state, int flags, struct gfs2_holder *gh);
115
116int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
117void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
118void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
119
120void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
121 struct gfs2_glock_operations *glops,
122 unsigned int state, int flags);
123
124/* Lock Value Block functions */
125
126int gfs2_lvb_hold(struct gfs2_glock *gl);
127void gfs2_lvb_unhold(struct gfs2_glock *gl);
128void gfs2_lvb_sync(struct gfs2_glock *gl);
129
130void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data);
131
132void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum);
133void gfs2_iopen_go_callback(struct gfs2_glock *gl, unsigned int state);
134
135void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
136void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
137
138void gfs2_scand_internal(struct gfs2_sbd *sdp);
139void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
140
141int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
142
143#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
new file mode 100644
index 000000000000..127008146a57
--- /dev/null
+++ b/fs/gfs2/glops.c
@@ -0,0 +1,487 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "glock.h"
20#include "glops.h"
21#include "inode.h"
22#include "log.h"
23#include "meta_io.h"
24#include "page.h"
25#include "recovery.h"
26#include "rgrp.h"
27
28/**
29 * meta_go_sync - sync out the metadata for this glock
30 * @gl: the glock
31 * @flags: DIO_*
32 *
33 * Called when demoting or unlocking an EX glock. We must flush
34 * to disk all dirty buffers/pages relating to this glock, and must not
35 * not return to caller to demote/unlock the glock until I/O is complete.
36 */
37
38static void meta_go_sync(struct gfs2_glock *gl, int flags)
39{
40 if (!(flags & DIO_METADATA))
41 return;
42
43 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
44 gfs2_log_flush_glock(gl);
45 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
46 if (flags & DIO_RELEASE)
47 gfs2_ail_empty_gl(gl);
48 }
49
50 clear_bit(GLF_SYNC, &gl->gl_flags);
51}
52
53/**
54 * meta_go_inval - invalidate the metadata for this glock
55 * @gl: the glock
56 * @flags:
57 *
58 */
59
60static void meta_go_inval(struct gfs2_glock *gl, int flags)
61{
62 if (!(flags & DIO_METADATA))
63 return;
64
65 gfs2_meta_inval(gl);
66 gl->gl_vn++;
67}
68
69/**
70 * meta_go_demote_ok - Check to see if it's ok to unlock a glock
71 * @gl: the glock
72 *
73 * Returns: 1 if we have no cached data; ok to demote meta glock
74 */
75
76static int meta_go_demote_ok(struct gfs2_glock *gl)
77{
78 return !gl->gl_aspace->i_mapping->nrpages;
79}
80
81/**
82 * inode_go_xmote_th - promote/demote a glock
83 * @gl: the glock
84 * @state: the requested state
85 * @flags:
86 *
87 */
88
89static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
90 int flags)
91{
92 if (gl->gl_state != LM_ST_UNLOCKED)
93 gfs2_pte_inval(gl);
94 gfs2_glock_xmote_th(gl, state, flags);
95}
96
97/**
98 * inode_go_xmote_bh - After promoting/demoting a glock
99 * @gl: the glock
100 *
101 */
102
103static void inode_go_xmote_bh(struct gfs2_glock *gl)
104{
105 struct gfs2_holder *gh = gl->gl_req_gh;
106 struct buffer_head *bh;
107 int error;
108
109 if (gl->gl_state != LM_ST_UNLOCKED &&
110 (!gh || !(gh->gh_flags & GL_SKIP))) {
111 error = gfs2_meta_read(gl, gl->gl_name.ln_number, DIO_START,
112 &bh);
113 if (!error)
114 brelse(bh);
115 }
116}
117
118/**
119 * inode_go_drop_th - unlock a glock
120 * @gl: the glock
121 *
122 * Invoked from rq_demote().
123 * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
124 * is being purged from our node's glock cache; we're dropping lock.
125 */
126
127static void inode_go_drop_th(struct gfs2_glock *gl)
128{
129 gfs2_pte_inval(gl);
130 gfs2_glock_drop_th(gl);
131}
132
133/**
134 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
135 * @gl: the glock protecting the inode
136 * @flags:
137 *
138 */
139
140static void inode_go_sync(struct gfs2_glock *gl, int flags)
141{
142 int meta = (flags & DIO_METADATA);
143 int data = (flags & DIO_DATA);
144
145 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
146 if (meta && data) {
147 gfs2_page_sync(gl, flags | DIO_START);
148 gfs2_log_flush_glock(gl);
149 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
150 gfs2_page_sync(gl, flags | DIO_WAIT);
151 clear_bit(GLF_DIRTY, &gl->gl_flags);
152 } else if (meta) {
153 gfs2_log_flush_glock(gl);
154 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
155 } else if (data)
156 gfs2_page_sync(gl, flags | DIO_START | DIO_WAIT);
157 if (flags & DIO_RELEASE)
158 gfs2_ail_empty_gl(gl);
159 }
160
161 clear_bit(GLF_SYNC, &gl->gl_flags);
162}
163
164/**
165 * inode_go_inval - prepare a inode glock to be released
166 * @gl: the glock
167 * @flags:
168 *
169 */
170
171static void inode_go_inval(struct gfs2_glock *gl, int flags)
172{
173 int meta = (flags & DIO_METADATA);
174 int data = (flags & DIO_DATA);
175
176 if (meta) {
177 gfs2_meta_inval(gl);
178 gl->gl_vn++;
179 }
180 if (data)
181 gfs2_page_inval(gl);
182}
183
184/**
185 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
186 * @gl: the glock
187 *
188 * Returns: 1 if it's ok
189 */
190
191static int inode_go_demote_ok(struct gfs2_glock *gl)
192{
193 struct gfs2_sbd *sdp = gl->gl_sbd;
194 int demote = 0;
195
196 if (!get_gl2ip(gl) && !gl->gl_aspace->i_mapping->nrpages)
197 demote = 1;
198 else if (!sdp->sd_args.ar_localcaching &&
199 time_after_eq(jiffies, gl->gl_stamp +
200 gfs2_tune_get(sdp, gt_demote_secs) * HZ))
201 demote = 1;
202
203 return demote;
204}
205
206/**
207 * inode_go_lock - operation done after an inode lock is locked by a process
208 * @gl: the glock
209 * @flags:
210 *
211 * Returns: errno
212 */
213
214static int inode_go_lock(struct gfs2_holder *gh)
215{
216 struct gfs2_glock *gl = gh->gh_gl;
217 struct gfs2_inode *ip = get_gl2ip(gl);
218 int error = 0;
219
220 if (!ip)
221 return 0;
222
223 if (ip->i_vn != gl->gl_vn) {
224 error = gfs2_inode_refresh(ip);
225 if (error)
226 return error;
227 gfs2_inode_attr_in(ip);
228 }
229
230 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
231 (gl->gl_state == LM_ST_EXCLUSIVE) &&
232 (gh->gh_flags & GL_LOCAL_EXCL))
233 error = gfs2_truncatei_resume(ip);
234
235 return error;
236}
237
238/**
239 * inode_go_unlock - operation done before an inode lock is unlocked by a
240 * process
241 * @gl: the glock
242 * @flags:
243 *
244 */
245
246static void inode_go_unlock(struct gfs2_holder *gh)
247{
248 struct gfs2_glock *gl = gh->gh_gl;
249 struct gfs2_inode *ip = get_gl2ip(gl);
250
251 if (ip && test_bit(GLF_DIRTY, &gl->gl_flags))
252 gfs2_inode_attr_in(ip);
253
254 if (ip)
255 gfs2_meta_cache_flush(ip);
256}
257
258/**
259 * inode_greedy -
260 * @gl: the glock
261 *
262 */
263
264static void inode_greedy(struct gfs2_glock *gl)
265{
266 struct gfs2_sbd *sdp = gl->gl_sbd;
267 struct gfs2_inode *ip = get_gl2ip(gl);
268 unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
269 unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
270 unsigned int new_time;
271
272 spin_lock(&ip->i_spin);
273
274 if (time_after(ip->i_last_pfault + quantum, jiffies)) {
275 new_time = ip->i_greedy + quantum;
276 if (new_time > max)
277 new_time = max;
278 } else {
279 new_time = ip->i_greedy - quantum;
280 if (!new_time || new_time > max)
281 new_time = 1;
282 }
283
284 ip->i_greedy = new_time;
285
286 spin_unlock(&ip->i_spin);
287
288 gfs2_inode_put(ip);
289}
290
291/**
292 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
293 * @gl: the glock
294 *
295 * Returns: 1 if it's ok
296 */
297
298static int rgrp_go_demote_ok(struct gfs2_glock *gl)
299{
300 return !gl->gl_aspace->i_mapping->nrpages;
301}
302
303/**
304 * rgrp_go_lock - operation done after an rgrp lock is locked by
305 * a first holder on this node.
306 * @gl: the glock
307 * @flags:
308 *
309 * Returns: errno
310 */
311
312static int rgrp_go_lock(struct gfs2_holder *gh)
313{
314 return gfs2_rgrp_bh_get(get_gl2rgd(gh->gh_gl));
315}
316
317/**
318 * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
319 * a last holder on this node.
320 * @gl: the glock
321 * @flags:
322 *
323 */
324
325static void rgrp_go_unlock(struct gfs2_holder *gh)
326{
327 gfs2_rgrp_bh_put(get_gl2rgd(gh->gh_gl));
328}
329
330/**
331 * trans_go_xmote_th - promote/demote the transaction glock
332 * @gl: the glock
333 * @state: the requested state
334 * @flags:
335 *
336 */
337
338static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
339 int flags)
340{
341 struct gfs2_sbd *sdp = gl->gl_sbd;
342
343 if (gl->gl_state != LM_ST_UNLOCKED &&
344 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
345 gfs2_meta_syncfs(sdp);
346 gfs2_log_shutdown(sdp);
347 }
348
349 gfs2_glock_xmote_th(gl, state, flags);
350}
351
352/**
353 * trans_go_xmote_bh - After promoting/demoting the transaction glock
354 * @gl: the glock
355 *
356 */
357
358static void trans_go_xmote_bh(struct gfs2_glock *gl)
359{
360 struct gfs2_sbd *sdp = gl->gl_sbd;
361 struct gfs2_glock *j_gl = sdp->sd_jdesc->jd_inode->i_gl;
362 struct gfs2_log_header head;
363 int error;
364
365 if (gl->gl_state != LM_ST_UNLOCKED &&
366 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
367 gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode);
368 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
369
370 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
371 if (error)
372 gfs2_consist(sdp);
373 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
374 gfs2_consist(sdp);
375
376 /* Initialize some head of the log stuff */
377 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
378 sdp->sd_log_sequence = head.lh_sequence + 1;
379 gfs2_log_pointers_init(sdp, head.lh_blkno);
380 }
381 }
382}
383
384/**
385 * trans_go_drop_th - unlock the transaction glock
386 * @gl: the glock
387 *
388 * We want to sync the device even with localcaching. Remember
389 * that localcaching journal replay only marks buffers dirty.
390 */
391
392static void trans_go_drop_th(struct gfs2_glock *gl)
393{
394 struct gfs2_sbd *sdp = gl->gl_sbd;
395
396 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
397 gfs2_meta_syncfs(sdp);
398 gfs2_log_shutdown(sdp);
399 }
400
401 gfs2_glock_drop_th(gl);
402}
403
404/**
405 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
406 * @gl: the glock
407 *
408 * Returns: 1 if it's ok
409 */
410
411static int quota_go_demote_ok(struct gfs2_glock *gl)
412{
413 return !atomic_read(&gl->gl_lvb_count);
414}
415
416struct gfs2_glock_operations gfs2_meta_glops = {
417 .go_xmote_th = gfs2_glock_xmote_th,
418 .go_drop_th = gfs2_glock_drop_th,
419 .go_sync = meta_go_sync,
420 .go_inval = meta_go_inval,
421 .go_demote_ok = meta_go_demote_ok,
422 .go_type = LM_TYPE_META
423};
424
425struct gfs2_glock_operations gfs2_inode_glops = {
426 .go_xmote_th = inode_go_xmote_th,
427 .go_xmote_bh = inode_go_xmote_bh,
428 .go_drop_th = inode_go_drop_th,
429 .go_sync = inode_go_sync,
430 .go_inval = inode_go_inval,
431 .go_demote_ok = inode_go_demote_ok,
432 .go_lock = inode_go_lock,
433 .go_unlock = inode_go_unlock,
434 .go_greedy = inode_greedy,
435 .go_type = LM_TYPE_INODE
436};
437
438struct gfs2_glock_operations gfs2_rgrp_glops = {
439 .go_xmote_th = gfs2_glock_xmote_th,
440 .go_drop_th = gfs2_glock_drop_th,
441 .go_sync = meta_go_sync,
442 .go_inval = meta_go_inval,
443 .go_demote_ok = rgrp_go_demote_ok,
444 .go_lock = rgrp_go_lock,
445 .go_unlock = rgrp_go_unlock,
446 .go_type = LM_TYPE_RGRP
447};
448
449struct gfs2_glock_operations gfs2_trans_glops = {
450 .go_xmote_th = trans_go_xmote_th,
451 .go_xmote_bh = trans_go_xmote_bh,
452 .go_drop_th = trans_go_drop_th,
453 .go_type = LM_TYPE_NONDISK
454};
455
456struct gfs2_glock_operations gfs2_iopen_glops = {
457 .go_xmote_th = gfs2_glock_xmote_th,
458 .go_drop_th = gfs2_glock_drop_th,
459 .go_callback = gfs2_iopen_go_callback,
460 .go_type = LM_TYPE_IOPEN
461};
462
463struct gfs2_glock_operations gfs2_flock_glops = {
464 .go_xmote_th = gfs2_glock_xmote_th,
465 .go_drop_th = gfs2_glock_drop_th,
466 .go_type = LM_TYPE_FLOCK
467};
468
469struct gfs2_glock_operations gfs2_nondisk_glops = {
470 .go_xmote_th = gfs2_glock_xmote_th,
471 .go_drop_th = gfs2_glock_drop_th,
472 .go_type = LM_TYPE_NONDISK
473};
474
475struct gfs2_glock_operations gfs2_quota_glops = {
476 .go_xmote_th = gfs2_glock_xmote_th,
477 .go_drop_th = gfs2_glock_drop_th,
478 .go_demote_ok = quota_go_demote_ok,
479 .go_type = LM_TYPE_QUOTA
480};
481
482struct gfs2_glock_operations gfs2_journal_glops = {
483 .go_xmote_th = gfs2_glock_xmote_th,
484 .go_drop_th = gfs2_glock_drop_th,
485 .go_type = LM_TYPE_JOURNAL
486};
487
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
new file mode 100644
index 000000000000..94f2d264aa64
--- /dev/null
+++ b/fs/gfs2/glops.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GLOPS_DOT_H__
11#define __GLOPS_DOT_H__
12
13extern struct gfs2_glock_operations gfs2_meta_glops;
14extern struct gfs2_glock_operations gfs2_inode_glops;
15extern struct gfs2_glock_operations gfs2_rgrp_glops;
16extern struct gfs2_glock_operations gfs2_trans_glops;
17extern struct gfs2_glock_operations gfs2_iopen_glops;
18extern struct gfs2_glock_operations gfs2_flock_glops;
19extern struct gfs2_glock_operations gfs2_nondisk_glops;
20extern struct gfs2_glock_operations gfs2_quota_glops;
21extern struct gfs2_glock_operations gfs2_journal_glops;
22
23#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
new file mode 100644
index 000000000000..3ed0a7f26e45
--- /dev/null
+++ b/fs/gfs2/incore.h
@@ -0,0 +1,703 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __INCORE_DOT_H__
11#define __INCORE_DOT_H__
12
13#define DIO_FORCE 0x00000001
14#define DIO_CLEAN 0x00000002
15#define DIO_DIRTY 0x00000004
16#define DIO_START 0x00000008
17#define DIO_WAIT 0x00000010
18#define DIO_METADATA 0x00000020
19#define DIO_DATA 0x00000040
20#define DIO_RELEASE 0x00000080
21#define DIO_ALL 0x00000100
22
23struct gfs2_log_operations;
24struct gfs2_log_element;
25struct gfs2_bitmap;
26struct gfs2_rgrpd;
27struct gfs2_bufdata;
28struct gfs2_databuf;
29struct gfs2_glock_operations;
30struct gfs2_holder;
31struct gfs2_glock;
32struct gfs2_alloc;
33struct gfs2_inode;
34struct gfs2_file;
35struct gfs2_revoke;
36struct gfs2_revoke_replay;
37struct gfs2_unlinked;
38struct gfs2_quota_data;
39struct gfs2_log_buf;
40struct gfs2_trans;
41struct gfs2_ail;
42struct gfs2_jdesc;
43struct gfs2_args;
44struct gfs2_tune;
45struct gfs2_gl_hash_bucket;
46struct gfs2_sbd;
47
48typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
49
50/*
51 * Structure of operations that are associated with each
52 * type of element in the log.
53 */
54
55struct gfs2_log_operations {
56 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
57 void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
58 void (*lo_before_commit) (struct gfs2_sbd *sdp);
59 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
60 void (*lo_before_scan) (struct gfs2_jdesc *jd,
61 struct gfs2_log_header *head, int pass);
62 int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
63 struct gfs2_log_descriptor *ld, __be64 *ptr,
64 int pass);
65 void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
66 char *lo_name;
67};
68
69struct gfs2_log_element {
70 struct list_head le_list;
71 struct gfs2_log_operations *le_ops;
72};
73
74struct gfs2_bitmap {
75 struct buffer_head *bi_bh;
76 char *bi_clone;
77 uint32_t bi_offset;
78 uint32_t bi_start;
79 uint32_t bi_len;
80};
81
82struct gfs2_rgrpd {
83 struct list_head rd_list; /* Link with superblock */
84 struct list_head rd_list_mru;
85 struct list_head rd_recent; /* Recently used rgrps */
86 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
87 struct gfs2_rindex rd_ri;
88 struct gfs2_rgrp rd_rg;
89 uint64_t rd_rg_vn;
90 struct gfs2_bitmap *rd_bits;
91 unsigned int rd_bh_count;
92 struct semaphore rd_mutex;
93 uint32_t rd_free_clone;
94 struct gfs2_log_element rd_le;
95 uint32_t rd_last_alloc_data;
96 uint32_t rd_last_alloc_meta;
97 struct gfs2_sbd *rd_sbd;
98};
99
100enum gfs2_state_bits {
101 BH_Pinned = BH_PrivateStart,
102};
103
104BUFFER_FNS(Pinned, pinned)
105TAS_BUFFER_FNS(Pinned, pinned)
106
107struct gfs2_bufdata {
108 struct buffer_head *bd_bh;
109 struct gfs2_glock *bd_gl;
110
111 struct list_head bd_list_tr;
112 struct gfs2_log_element bd_le;
113
114 struct gfs2_ail *bd_ail;
115 struct list_head bd_ail_st_list;
116 struct list_head bd_ail_gl_list;
117};
118
119struct gfs2_databuf {
120 struct gfs2_log_element db_le;
121 struct buffer_head *db_bh;
122};
123
124struct gfs2_glock_operations {
125 void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
126 int flags);
127 void (*go_xmote_bh) (struct gfs2_glock * gl);
128 void (*go_drop_th) (struct gfs2_glock * gl);
129 void (*go_drop_bh) (struct gfs2_glock * gl);
130 void (*go_sync) (struct gfs2_glock * gl, int flags);
131 void (*go_inval) (struct gfs2_glock * gl, int flags);
132 int (*go_demote_ok) (struct gfs2_glock * gl);
133 int (*go_lock) (struct gfs2_holder * gh);
134 void (*go_unlock) (struct gfs2_holder * gh);
135 void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
136 void (*go_greedy) (struct gfs2_glock * gl);
137 int go_type;
138};
139
140enum {
141 /* Actions */
142 HIF_MUTEX = 0,
143 HIF_PROMOTE = 1,
144 HIF_DEMOTE = 2,
145 HIF_GREEDY = 3,
146
147 /* States */
148 HIF_ALLOCED = 4,
149 HIF_DEALLOC = 5,
150 HIF_HOLDER = 6,
151 HIF_FIRST = 7,
152 HIF_RECURSE = 8,
153 HIF_ABORTED = 9,
154};
155
156struct gfs2_holder {
157 struct list_head gh_list;
158
159 struct gfs2_glock *gh_gl;
160 struct task_struct *gh_owner;
161 unsigned int gh_state;
162 int gh_flags;
163
164 int gh_error;
165 unsigned long gh_iflags;
166 struct completion gh_wait;
167};
168
169enum {
170 GLF_PLUG = 0,
171 GLF_LOCK = 1,
172 GLF_STICKY = 2,
173 GLF_PREFETCH = 3,
174 GLF_SYNC = 4,
175 GLF_DIRTY = 5,
176 GLF_SKIP_WAITERS2 = 6,
177 GLF_GREEDY = 7,
178};
179
180struct gfs2_glock {
181 struct list_head gl_list;
182 unsigned long gl_flags; /* GLF_... */
183 struct lm_lockname gl_name;
184 struct kref gl_ref;
185
186 spinlock_t gl_spin;
187
188 unsigned int gl_state;
189 struct list_head gl_holders;
190 struct list_head gl_waiters1; /* HIF_MUTEX */
191 struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
192 struct list_head gl_waiters3; /* HIF_PROMOTE */
193
194 struct gfs2_glock_operations *gl_ops;
195
196 struct gfs2_holder *gl_req_gh;
197 gfs2_glop_bh_t gl_req_bh;
198
199 lm_lock_t *gl_lock;
200 char *gl_lvb;
201 atomic_t gl_lvb_count;
202
203 uint64_t gl_vn;
204 unsigned long gl_stamp;
205 void *gl_object;
206
207 struct gfs2_gl_hash_bucket *gl_bucket;
208 struct list_head gl_reclaim;
209
210 struct gfs2_sbd *gl_sbd;
211
212 struct inode *gl_aspace;
213 struct gfs2_log_element gl_le;
214 struct list_head gl_ail_list;
215 atomic_t gl_ail_count;
216};
217
218struct gfs2_alloc {
219 /* Quota stuff */
220
221 unsigned int al_qd_num;
222 struct gfs2_quota_data *al_qd[4];
223 struct gfs2_holder al_qd_ghs[4];
224
225 /* Filled in by the caller to gfs2_inplace_reserve() */
226
227 uint32_t al_requested;
228
229 /* Filled in by gfs2_inplace_reserve() */
230
231 char *al_file;
232 unsigned int al_line;
233 struct gfs2_holder al_ri_gh;
234 struct gfs2_holder al_rgd_gh;
235 struct gfs2_rgrpd *al_rgd;
236
237 /* Filled in by gfs2_alloc_*() */
238
239 uint32_t al_alloced;
240};
241
242enum {
243 GIF_MIN_INIT = 0,
244 GIF_QD_LOCKED = 1,
245 GIF_PAGED = 2,
246 GIF_SW_PAGED = 3,
247};
248
249struct gfs2_inode {
250 struct gfs2_inum i_num;
251
252 atomic_t i_count;
253 unsigned long i_flags; /* GIF_... */
254
255 uint64_t i_vn;
256 struct gfs2_dinode i_di;
257
258 struct gfs2_glock *i_gl;
259 struct gfs2_sbd *i_sbd;
260 struct inode *i_vnode;
261
262 struct gfs2_holder i_iopen_gh;
263
264 struct gfs2_alloc i_alloc;
265 uint64_t i_last_rg_alloc;
266
267 spinlock_t i_spin;
268 struct rw_semaphore i_rw_mutex;
269
270 unsigned int i_greedy;
271 unsigned long i_last_pfault;
272
273 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
274};
275
276enum {
277 GFF_DID_DIRECT_ALLOC = 0,
278};
279
280struct gfs2_file {
281 unsigned long f_flags; /* GFF_... */
282
283 struct semaphore f_fl_mutex;
284 struct gfs2_holder f_fl_gh;
285
286 struct gfs2_inode *f_inode;
287 struct file *f_vfile;
288};
289
290struct gfs2_revoke {
291 struct gfs2_log_element rv_le;
292 uint64_t rv_blkno;
293};
294
295struct gfs2_revoke_replay {
296 struct list_head rr_list;
297 uint64_t rr_blkno;
298 unsigned int rr_where;
299};
300
301enum {
302 ULF_LOCKED = 0,
303};
304
305struct gfs2_unlinked {
306 struct list_head ul_list;
307 unsigned int ul_count;
308 struct gfs2_unlinked_tag ul_ut;
309 unsigned long ul_flags; /* ULF_... */
310 unsigned int ul_slot;
311};
312
313enum {
314 QDF_USER = 0,
315 QDF_CHANGE = 1,
316 QDF_LOCKED = 2,
317};
318
319struct gfs2_quota_data {
320 struct list_head qd_list;
321 unsigned int qd_count;
322
323 uint32_t qd_id;
324 unsigned long qd_flags; /* QDF_... */
325
326 int64_t qd_change;
327 int64_t qd_change_sync;
328
329 unsigned int qd_slot;
330 unsigned int qd_slot_count;
331
332 struct buffer_head *qd_bh;
333 struct gfs2_quota_change *qd_bh_qc;
334 unsigned int qd_bh_count;
335
336 struct gfs2_glock *qd_gl;
337 struct gfs2_quota_lvb qd_qb;
338
339 uint64_t qd_sync_gen;
340 unsigned long qd_last_warn;
341 unsigned long qd_last_touched;
342};
343
344struct gfs2_log_buf {
345 struct list_head lb_list;
346 struct buffer_head *lb_bh;
347 struct buffer_head *lb_real;
348};
349
350struct gfs2_trans {
351 char *tr_file;
352 unsigned int tr_line;
353
354 unsigned int tr_blocks;
355 unsigned int tr_revokes;
356 unsigned int tr_reserved;
357
358 struct gfs2_holder *tr_t_gh;
359
360 int tr_touched;
361
362 unsigned int tr_num_buf;
363 unsigned int tr_num_buf_new;
364 unsigned int tr_num_buf_rm;
365 struct list_head tr_list_buf;
366
367 unsigned int tr_num_revoke;
368 unsigned int tr_num_revoke_rm;
369};
370
371struct gfs2_ail {
372 struct list_head ai_list;
373
374 unsigned int ai_first;
375 struct list_head ai_ail1_list;
376 struct list_head ai_ail2_list;
377
378 uint64_t ai_sync_gen;
379};
380
381struct gfs2_jdesc {
382 struct list_head jd_list;
383
384 struct gfs2_inode *jd_inode;
385 unsigned int jd_jid;
386 int jd_dirty;
387
388 unsigned int jd_blocks;
389};
390
391#define GFS2_GLOCKD_DEFAULT 1
392#define GFS2_GLOCKD_MAX 16
393
394#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF
395#define GFS2_QUOTA_OFF 0
396#define GFS2_QUOTA_ACCOUNT 1
397#define GFS2_QUOTA_ON 2
398
399#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
400#define GFS2_DATA_WRITEBACK 1
401#define GFS2_DATA_ORDERED 2
402
403struct gfs2_args {
404 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
405 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
406 char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
407 int ar_spectator; /* Don't get a journal because we're always RO */
408 int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
409 int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
410 int ar_localcaching; /* Local-style caching (dangerous on multihost) */
411 int ar_debug; /* Oops on errors instead of trying to be graceful */
412 int ar_upgrade; /* Upgrade ondisk/multihost format */
413 unsigned int ar_num_glockd; /* Number of glockd threads */
414 int ar_posix_acl; /* Enable posix acls */
415 int ar_quota; /* off/account/on */
416 int ar_suiddir; /* suiddir support */
417 int ar_data; /* ordered/writeback */
418};
419
420struct gfs2_tune {
421 spinlock_t gt_spin;
422
423 unsigned int gt_ilimit;
424 unsigned int gt_ilimit_tries;
425 unsigned int gt_ilimit_min;
426 unsigned int gt_demote_secs; /* Cache retention for unheld glock */
427 unsigned int gt_incore_log_blocks;
428 unsigned int gt_log_flush_secs;
429 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
430
431 unsigned int gt_scand_secs;
432 unsigned int gt_recoverd_secs;
433 unsigned int gt_logd_secs;
434 unsigned int gt_quotad_secs;
435 unsigned int gt_inoded_secs;
436
437 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
438 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
439 unsigned int gt_quota_scale_num; /* Numerator */
440 unsigned int gt_quota_scale_den; /* Denominator */
441 unsigned int gt_quota_cache_secs;
442 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
443 unsigned int gt_atime_quantum; /* Min secs between atime updates */
444 unsigned int gt_new_files_jdata;
445 unsigned int gt_new_files_directio;
446 unsigned int gt_max_atomic_write; /* Split big writes into this size */
447 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
448 unsigned int gt_lockdump_size;
449 unsigned int gt_stall_secs; /* Detects trouble! */
450 unsigned int gt_complain_secs;
451 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
452 unsigned int gt_entries_per_readdir;
453 unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
454 unsigned int gt_greedy_default;
455 unsigned int gt_greedy_quantum;
456 unsigned int gt_greedy_max;
457 unsigned int gt_statfs_quantum;
458 unsigned int gt_statfs_slow;
459};
460
461struct gfs2_gl_hash_bucket {
462 rwlock_t hb_lock;
463 struct list_head hb_list;
464};
465
466enum {
467 SDF_JOURNAL_CHECKED = 0,
468 SDF_JOURNAL_LIVE = 1,
469 SDF_SHUTDOWN = 2,
470 SDF_NOATIME = 3,
471};
472
473#define GFS2_GL_HASH_SHIFT 13
474#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
475#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
476#define GFS2_FSNAME_LEN 256
477
478struct gfs2_sbd {
479 struct super_block *sd_vfs;
480 struct kobject sd_kobj;
481 unsigned long sd_flags; /* SDF_... */
482 struct gfs2_sb sd_sb;
483
484 /* Constants computed on mount */
485
486 uint32_t sd_fsb2bb;
487 uint32_t sd_fsb2bb_shift;
488 uint32_t sd_diptrs; /* Number of pointers in a dinode */
489 uint32_t sd_inptrs; /* Number of pointers in a indirect block */
490 uint32_t sd_jbsize; /* Size of a journaled data block */
491 uint32_t sd_hash_bsize; /* sizeof(exhash block) */
492 uint32_t sd_hash_bsize_shift;
493 uint32_t sd_hash_ptrs; /* Number of pointers in a hash block */
494 uint32_t sd_ut_per_block;
495 uint32_t sd_qc_per_block;
496 uint32_t sd_max_dirres; /* Max blocks needed to add a directory entry */
497 uint32_t sd_max_height; /* Max height of a file's metadata tree */
498 uint64_t sd_heightsize[GFS2_MAX_META_HEIGHT];
499 uint32_t sd_max_jheight; /* Max height of journaled file's meta tree */
500 uint64_t sd_jheightsize[GFS2_MAX_META_HEIGHT];
501
502 struct gfs2_args sd_args; /* Mount arguments */
503 struct gfs2_tune sd_tune; /* Filesystem tuning structure */
504
505 /* Lock Stuff */
506
507 struct lm_lockstruct sd_lockstruct;
508 struct gfs2_gl_hash_bucket sd_gl_hash[GFS2_GL_HASH_SIZE];
509 struct list_head sd_reclaim_list;
510 spinlock_t sd_reclaim_lock;
511 wait_queue_head_t sd_reclaim_wq;
512 atomic_t sd_reclaim_count;
513 struct gfs2_holder sd_live_gh;
514 struct gfs2_glock *sd_rename_gl;
515 struct gfs2_glock *sd_trans_gl;
516 struct semaphore sd_invalidate_inodes_mutex;
517
518 /* Inode Stuff */
519
520 struct gfs2_inode *sd_master_dir;
521 struct gfs2_inode *sd_jindex;
522 struct gfs2_inode *sd_inum_inode;
523 struct gfs2_inode *sd_statfs_inode;
524 struct gfs2_inode *sd_ir_inode;
525 struct gfs2_inode *sd_sc_inode;
526 struct gfs2_inode *sd_ut_inode;
527 struct gfs2_inode *sd_qc_inode;
528 struct gfs2_inode *sd_rindex;
529 struct gfs2_inode *sd_quota_inode;
530 struct gfs2_inode *sd_root_dir;
531
532 /* Inum stuff */
533
534 struct semaphore sd_inum_mutex;
535
536 /* StatFS stuff */
537
538 spinlock_t sd_statfs_spin;
539 struct semaphore sd_statfs_mutex;
540 struct gfs2_statfs_change sd_statfs_master;
541 struct gfs2_statfs_change sd_statfs_local;
542 unsigned long sd_statfs_sync_time;
543
544 /* Resource group stuff */
545
546 uint64_t sd_rindex_vn;
547 spinlock_t sd_rindex_spin;
548 struct semaphore sd_rindex_mutex;
549 struct list_head sd_rindex_list;
550 struct list_head sd_rindex_mru_list;
551 struct list_head sd_rindex_recent_list;
552 struct gfs2_rgrpd *sd_rindex_forward;
553 unsigned int sd_rgrps;
554
555 /* Journal index stuff */
556
557 struct list_head sd_jindex_list;
558 spinlock_t sd_jindex_spin;
559 struct semaphore sd_jindex_mutex;
560 unsigned int sd_journals;
561 unsigned long sd_jindex_refresh_time;
562
563 struct gfs2_jdesc *sd_jdesc;
564 struct gfs2_holder sd_journal_gh;
565 struct gfs2_holder sd_jinode_gh;
566
567 struct gfs2_holder sd_ir_gh;
568 struct gfs2_holder sd_sc_gh;
569 struct gfs2_holder sd_ut_gh;
570 struct gfs2_holder sd_qc_gh;
571
572 /* Daemon stuff */
573
574 struct task_struct *sd_scand_process;
575 struct task_struct *sd_recoverd_process;
576 struct task_struct *sd_logd_process;
577 struct task_struct *sd_quotad_process;
578 struct task_struct *sd_inoded_process;
579 struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
580 unsigned int sd_glockd_num;
581
582 /* Unlinked inode stuff */
583
584 struct list_head sd_unlinked_list;
585 atomic_t sd_unlinked_count;
586 spinlock_t sd_unlinked_spin;
587 struct semaphore sd_unlinked_mutex;
588
589 unsigned int sd_unlinked_slots;
590 unsigned int sd_unlinked_chunks;
591 unsigned char **sd_unlinked_bitmap;
592
593 /* Quota stuff */
594
595 struct list_head sd_quota_list;
596 atomic_t sd_quota_count;
597 spinlock_t sd_quota_spin;
598 struct semaphore sd_quota_mutex;
599
600 unsigned int sd_quota_slots;
601 unsigned int sd_quota_chunks;
602 unsigned char **sd_quota_bitmap;
603
604 uint64_t sd_quota_sync_gen;
605 unsigned long sd_quota_sync_time;
606
607 /* Log stuff */
608
609 spinlock_t sd_log_lock;
610 atomic_t sd_log_trans_count;
611 wait_queue_head_t sd_log_trans_wq;
612 atomic_t sd_log_flush_count;
613 wait_queue_head_t sd_log_flush_wq;
614
615 unsigned int sd_log_blks_reserved;
616 unsigned int sd_log_commited_buf;
617 unsigned int sd_log_commited_revoke;
618
619 unsigned int sd_log_num_gl;
620 unsigned int sd_log_num_buf;
621 unsigned int sd_log_num_revoke;
622 unsigned int sd_log_num_rg;
623 unsigned int sd_log_num_databuf;
624 struct list_head sd_log_le_gl;
625 struct list_head sd_log_le_buf;
626 struct list_head sd_log_le_revoke;
627 struct list_head sd_log_le_rg;
628 struct list_head sd_log_le_databuf;
629
630 unsigned int sd_log_blks_free;
631 struct list_head sd_log_blks_list;
632 wait_queue_head_t sd_log_blks_wait;
633
634 uint64_t sd_log_sequence;
635 unsigned int sd_log_head;
636 unsigned int sd_log_tail;
637 uint64_t sd_log_wraps;
638 int sd_log_idle;
639
640 unsigned long sd_log_flush_time;
641 struct semaphore sd_log_flush_lock;
642 struct list_head sd_log_flush_list;
643
644 unsigned int sd_log_flush_head;
645 uint64_t sd_log_flush_wrapped;
646
647 struct list_head sd_ail1_list;
648 struct list_head sd_ail2_list;
649 uint64_t sd_ail_sync_gen;
650
651 /* Replay stuff */
652
653 struct list_head sd_revoke_list;
654 unsigned int sd_replay_tail;
655
656 unsigned int sd_found_blocks;
657 unsigned int sd_found_revokes;
658 unsigned int sd_replayed_blocks;
659
660 /* For quiescing the filesystem */
661
662 struct gfs2_holder sd_freeze_gh;
663 struct semaphore sd_freeze_lock;
664 unsigned int sd_freeze_count;
665
666 /* Counters */
667
668 atomic_t sd_glock_count;
669 atomic_t sd_glock_held_count;
670 atomic_t sd_inode_count;
671 atomic_t sd_bufdata_count;
672
673 atomic_t sd_fh2dentry_misses;
674 atomic_t sd_reclaimed;
675 atomic_t sd_log_flush_incore;
676 atomic_t sd_log_flush_ondisk;
677
678 atomic_t sd_glock_nq_calls;
679 atomic_t sd_glock_dq_calls;
680 atomic_t sd_glock_prefetch_calls;
681 atomic_t sd_lm_lock_calls;
682 atomic_t sd_lm_unlock_calls;
683 atomic_t sd_lm_callbacks;
684
685 atomic_t sd_ops_address;
686 atomic_t sd_ops_dentry;
687 atomic_t sd_ops_export;
688 atomic_t sd_ops_file;
689 atomic_t sd_ops_inode;
690 atomic_t sd_ops_super;
691 atomic_t sd_ops_vm;
692
693 char sd_fsname[GFS2_FSNAME_LEN];
694 char sd_table_name[GFS2_FSNAME_LEN];
695 char sd_proto_name[GFS2_FSNAME_LEN];
696
697 /* Debugging crud */
698
699 unsigned long sd_last_warning;
700};
701
702#endif /* __INCORE_DOT_H__ */
703
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
new file mode 100644
index 000000000000..73922dba5398
--- /dev/null
+++ b/fs/gfs2/inode.c
@@ -0,0 +1,1805 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "acl.h"
21#include "bmap.h"
22#include "dir.h"
23#include "eattr.h"
24#include "glock.h"
25#include "glops.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "ops_address.h"
30#include "ops_file.h"
31#include "ops_inode.h"
32#include "quota.h"
33#include "rgrp.h"
34#include "trans.h"
35#include "unlinked.h"
36
37/**
38 * inode_attr_in - Copy attributes from the dinode into the VFS inode
39 * @ip: The GFS2 inode (with embedded disk inode data)
40 * @inode: The Linux VFS inode
41 *
42 */
43
44static void inode_attr_in(struct gfs2_inode *ip, struct inode *inode)
45{
46 inode->i_ino = ip->i_num.no_formal_ino;
47
48 switch (ip->i_di.di_mode & S_IFMT) {
49 case S_IFBLK:
50 case S_IFCHR:
51 inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
52 break;
53 default:
54 inode->i_rdev = 0;
55 break;
56 };
57
58 inode->i_mode = ip->i_di.di_mode;
59 inode->i_nlink = ip->i_di.di_nlink;
60 inode->i_uid = ip->i_di.di_uid;
61 inode->i_gid = ip->i_di.di_gid;
62 i_size_write(inode, ip->i_di.di_size);
63 inode->i_atime.tv_sec = ip->i_di.di_atime;
64 inode->i_mtime.tv_sec = ip->i_di.di_mtime;
65 inode->i_ctime.tv_sec = ip->i_di.di_ctime;
66 inode->i_atime.tv_nsec = 0;
67 inode->i_mtime.tv_nsec = 0;
68 inode->i_ctime.tv_nsec = 0;
69 inode->i_blksize = PAGE_SIZE;
70 inode->i_blocks = ip->i_di.di_blocks <<
71 (ip->i_sbd->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
72
73 if (ip->i_di.di_flags & GFS2_DIF_IMMUTABLE)
74 inode->i_flags |= S_IMMUTABLE;
75 else
76 inode->i_flags &= ~S_IMMUTABLE;
77
78 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY)
79 inode->i_flags |= S_APPEND;
80 else
81 inode->i_flags &= ~S_APPEND;
82}
83
84/**
85 * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
86 * @ip: The GFS2 inode (with embedded disk inode data)
87 *
88 */
89
90void gfs2_inode_attr_in(struct gfs2_inode *ip)
91{
92 struct inode *inode;
93
94 inode = gfs2_ip2v_lookup(ip);
95 if (inode) {
96 inode_attr_in(ip, inode);
97 iput(inode);
98 }
99}
100
101/**
102 * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
103 * @ip: The GFS2 inode
104 *
105 * Only copy out the attributes that we want the VFS layer
106 * to be able to modify.
107 */
108
109void gfs2_inode_attr_out(struct gfs2_inode *ip)
110{
111 struct inode *inode = ip->i_vnode;
112
113 gfs2_assert_withdraw(ip->i_sbd,
114 (ip->i_di.di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
115 ip->i_di.di_mode = inode->i_mode;
116 ip->i_di.di_uid = inode->i_uid;
117 ip->i_di.di_gid = inode->i_gid;
118 ip->i_di.di_atime = inode->i_atime.tv_sec;
119 ip->i_di.di_mtime = inode->i_mtime.tv_sec;
120 ip->i_di.di_ctime = inode->i_ctime.tv_sec;
121}
122
123/**
124 * gfs2_ip2v_lookup - Get the struct inode for a struct gfs2_inode
125 * @ip: the struct gfs2_inode to get the struct inode for
126 *
127 * Returns: A VFS inode, or NULL if none
128 */
129
130struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip)
131{
132 struct inode *inode = NULL;
133
134 gfs2_assert_warn(ip->i_sbd, test_bit(GIF_MIN_INIT, &ip->i_flags));
135
136 spin_lock(&ip->i_spin);
137 if (ip->i_vnode)
138 inode = igrab(ip->i_vnode);
139 spin_unlock(&ip->i_spin);
140
141 return inode;
142}
143
144/**
145 * gfs2_ip2v - Get/Create a struct inode for a struct gfs2_inode
146 * @ip: the struct gfs2_inode to get the struct inode for
147 *
148 * Returns: A VFS inode, or NULL if no mem
149 */
150
151struct inode *gfs2_ip2v(struct gfs2_inode *ip)
152{
153 struct inode *inode, *tmp;
154
155 inode = gfs2_ip2v_lookup(ip);
156 if (inode)
157 return inode;
158
159 tmp = new_inode(ip->i_sbd->sd_vfs);
160 if (!tmp)
161 return NULL;
162
163 inode_attr_in(ip, tmp);
164
165 if (S_ISREG(ip->i_di.di_mode)) {
166 tmp->i_op = &gfs2_file_iops;
167 tmp->i_fop = &gfs2_file_fops;
168 tmp->i_mapping->a_ops = &gfs2_file_aops;
169 } else if (S_ISDIR(ip->i_di.di_mode)) {
170 tmp->i_op = &gfs2_dir_iops;
171 tmp->i_fop = &gfs2_dir_fops;
172 } else if (S_ISLNK(ip->i_di.di_mode)) {
173 tmp->i_op = &gfs2_symlink_iops;
174 } else {
175 tmp->i_op = &gfs2_dev_iops;
176 init_special_inode(tmp, tmp->i_mode, tmp->i_rdev);
177 }
178
179 set_v2ip(tmp, NULL);
180
181 for (;;) {
182 spin_lock(&ip->i_spin);
183 if (!ip->i_vnode)
184 break;
185 inode = igrab(ip->i_vnode);
186 spin_unlock(&ip->i_spin);
187
188 if (inode) {
189 iput(tmp);
190 return inode;
191 }
192 yield();
193 }
194
195 inode = tmp;
196
197 gfs2_inode_hold(ip);
198 ip->i_vnode = inode;
199 set_v2ip(inode, ip);
200
201 spin_unlock(&ip->i_spin);
202
203 insert_inode_hash(inode);
204
205 return inode;
206}
207
208static int iget_test(struct inode *inode, void *opaque)
209{
210 struct gfs2_inode *ip = get_v2ip(inode);
211 struct gfs2_inum *inum = (struct gfs2_inum *)opaque;
212
213 if (ip && ip->i_num.no_addr == inum->no_addr)
214 return 1;
215
216 return 0;
217}
218
219struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
220{
221 return ilookup5(sb, (unsigned long)inum->no_formal_ino,
222 iget_test, inum);
223}
224
225void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type)
226{
227 spin_lock(&ip->i_spin);
228 if (!test_and_set_bit(GIF_MIN_INIT, &ip->i_flags)) {
229 ip->i_di.di_nlink = 1;
230 ip->i_di.di_mode = DT2IF(type);
231 }
232 spin_unlock(&ip->i_spin);
233}
234
235/**
236 * gfs2_inode_refresh - Refresh the incore copy of the dinode
237 * @ip: The GFS2 inode
238 *
239 * Returns: errno
240 */
241
242int gfs2_inode_refresh(struct gfs2_inode *ip)
243{
244 struct buffer_head *dibh;
245 int error;
246
247 error = gfs2_meta_inode_buffer(ip, &dibh);
248 if (error)
249 return error;
250
251 if (gfs2_metatype_check(ip->i_sbd, dibh, GFS2_METATYPE_DI)) {
252 brelse(dibh);
253 return -EIO;
254 }
255
256 spin_lock(&ip->i_spin);
257 gfs2_dinode_in(&ip->i_di, dibh->b_data);
258 set_bit(GIF_MIN_INIT, &ip->i_flags);
259 spin_unlock(&ip->i_spin);
260
261 brelse(dibh);
262
263 if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
264 if (gfs2_consist_inode(ip))
265 gfs2_dinode_print(&ip->i_di);
266 return -EIO;
267 }
268 if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
269 return -ESTALE;
270
271 ip->i_vn = ip->i_gl->gl_vn;
272
273 return 0;
274}
275
276/**
277 * inode_create - create a struct gfs2_inode
278 * @i_gl: The glock covering the inode
279 * @inum: The inode number
280 * @io_gl: the iopen glock to acquire/hold (using holder in new gfs2_inode)
281 * @io_state: the state the iopen glock should be acquired in
282 * @ipp: pointer to put the returned inode in
283 *
284 * Returns: errno
285 */
286
287static int inode_create(struct gfs2_glock *i_gl, struct gfs2_inum *inum,
288 struct gfs2_glock *io_gl, unsigned int io_state,
289 struct gfs2_inode **ipp)
290{
291 struct gfs2_sbd *sdp = i_gl->gl_sbd;
292 struct gfs2_inode *ip;
293 int error = 0;
294
295 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
296 if (!ip)
297 return -ENOMEM;
298 memset(ip, 0, sizeof(struct gfs2_inode));
299
300 ip->i_num = *inum;
301
302 atomic_set(&ip->i_count, 1);
303
304 ip->i_vn = i_gl->gl_vn - 1;
305
306 ip->i_gl = i_gl;
307 ip->i_sbd = sdp;
308
309 spin_lock_init(&ip->i_spin);
310 init_rwsem(&ip->i_rw_mutex);
311
312 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
313
314 error = gfs2_glock_nq_init(io_gl,
315 io_state, GL_LOCAL_EXCL | GL_EXACT,
316 &ip->i_iopen_gh);
317 if (error)
318 goto fail;
319 ip->i_iopen_gh.gh_owner = NULL;
320
321 spin_lock(&io_gl->gl_spin);
322 gfs2_glock_hold(i_gl);
323 set_gl2gl(io_gl, i_gl);
324 spin_unlock(&io_gl->gl_spin);
325
326 gfs2_glock_hold(i_gl);
327 set_gl2ip(i_gl, ip);
328
329 atomic_inc(&sdp->sd_inode_count);
330
331 *ipp = ip;
332
333 return 0;
334
335 fail:
336 gfs2_meta_cache_flush(ip);
337 kmem_cache_free(gfs2_inode_cachep, ip);
338 *ipp = NULL;
339
340 return error;
341}
342
343/**
344 * gfs2_inode_get - Create or get a reference on an inode
345 * @i_gl: The glock covering the inode
346 * @inum: The inode number
347 * @create:
348 * @ipp: pointer to put the returned inode in
349 *
350 * Returns: errno
351 */
352
353int gfs2_inode_get(struct gfs2_glock *i_gl, struct gfs2_inum *inum, int create,
354 struct gfs2_inode **ipp)
355{
356 struct gfs2_sbd *sdp = i_gl->gl_sbd;
357 struct gfs2_glock *io_gl;
358 int error = 0;
359
360 gfs2_glmutex_lock(i_gl);
361
362 *ipp = get_gl2ip(i_gl);
363 if (*ipp) {
364 error = -ESTALE;
365 if ((*ipp)->i_num.no_formal_ino != inum->no_formal_ino)
366 goto out;
367 atomic_inc(&(*ipp)->i_count);
368 error = 0;
369 goto out;
370 }
371
372 if (!create)
373 goto out;
374
375 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops,
376 CREATE, &io_gl);
377 if (!error) {
378 error = inode_create(i_gl, inum, io_gl, LM_ST_SHARED, ipp);
379 gfs2_glock_put(io_gl);
380 }
381
382 out:
383 gfs2_glmutex_unlock(i_gl);
384
385 return error;
386}
387
388void gfs2_inode_hold(struct gfs2_inode *ip)
389{
390 gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
391 atomic_inc(&ip->i_count);
392}
393
394void gfs2_inode_put(struct gfs2_inode *ip)
395{
396 gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
397 atomic_dec(&ip->i_count);
398}
399
400void gfs2_inode_destroy(struct gfs2_inode *ip)
401{
402 struct gfs2_sbd *sdp = ip->i_sbd;
403 struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl;
404 struct gfs2_glock *i_gl = ip->i_gl;
405
406 gfs2_assert_warn(sdp, !atomic_read(&ip->i_count));
407 gfs2_assert(sdp, get_gl2gl(io_gl) == i_gl);
408
409 spin_lock(&io_gl->gl_spin);
410 set_gl2gl(io_gl, NULL);
411 gfs2_glock_put(i_gl);
412 spin_unlock(&io_gl->gl_spin);
413
414 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
415
416 gfs2_meta_cache_flush(ip);
417 kmem_cache_free(gfs2_inode_cachep, ip);
418
419 set_gl2ip(i_gl, NULL);
420 gfs2_glock_put(i_gl);
421
422 atomic_dec(&sdp->sd_inode_count);
423}
424
425static int dinode_dealloc(struct gfs2_inode *ip, struct gfs2_unlinked *ul)
426{
427 struct gfs2_sbd *sdp = ip->i_sbd;
428 struct gfs2_alloc *al;
429 struct gfs2_rgrpd *rgd;
430 int error;
431
432 if (ip->i_di.di_blocks != 1) {
433 if (gfs2_consist_inode(ip))
434 gfs2_dinode_print(&ip->i_di);
435 return -EIO;
436 }
437
438 al = gfs2_alloc_get(ip);
439
440 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
441 if (error)
442 goto out;
443
444 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
445 if (error)
446 goto out_qs;
447
448 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
449 if (!rgd) {
450 gfs2_consist_inode(ip);
451 error = -EIO;
452 goto out_rindex_relse;
453 }
454
455 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
456 &al->al_rgd_gh);
457 if (error)
458 goto out_rindex_relse;
459
460 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
461 RES_STATFS + RES_QUOTA, 1);
462 if (error)
463 goto out_rg_gunlock;
464
465 gfs2_trans_add_gl(ip->i_gl);
466
467 gfs2_free_di(rgd, ip);
468
469 error = gfs2_unlinked_ondisk_rm(sdp, ul);
470
471 gfs2_trans_end(sdp);
472 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
473
474 out_rg_gunlock:
475 gfs2_glock_dq_uninit(&al->al_rgd_gh);
476
477 out_rindex_relse:
478 gfs2_glock_dq_uninit(&al->al_ri_gh);
479
480 out_qs:
481 gfs2_quota_unhold(ip);
482
483 out:
484 gfs2_alloc_put(ip);
485
486 return error;
487}
488
489/**
490 * inode_dealloc - Deallocate all on-disk blocks for an inode (dinode)
491 * @sdp: the filesystem
492 * @inum: the inode number to deallocate
493 * @io_gh: a holder for the iopen glock for this inode
494 *
495 * Returns: errno
496 */
497
498static int inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul,
499 struct gfs2_holder *io_gh)
500{
501 struct gfs2_inode *ip;
502 struct gfs2_holder i_gh;
503 int error;
504
505 error = gfs2_glock_nq_num(sdp,
506 ul->ul_ut.ut_inum.no_addr, &gfs2_inode_glops,
507 LM_ST_EXCLUSIVE, 0, &i_gh);
508 if (error)
509 return error;
510
511 /* We reacquire the iopen lock here to avoid a race with the NFS server
512 calling gfs2_read_inode() with the inode number of a inode we're in
513 the process of deallocating. And we can't keep our hold on the lock
514 from inode_dealloc_init() for deadlock reasons. */
515
516 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh);
517 error = gfs2_glock_nq(io_gh);
518 switch (error) {
519 case 0:
520 break;
521 case GLR_TRYFAILED:
522 error = 1;
523 default:
524 goto out;
525 }
526
527 gfs2_assert_warn(sdp, !get_gl2ip(i_gh.gh_gl));
528 error = inode_create(i_gh.gh_gl, &ul->ul_ut.ut_inum, io_gh->gh_gl,
529 LM_ST_EXCLUSIVE, &ip);
530
531 gfs2_glock_dq(io_gh);
532
533 if (error)
534 goto out;
535
536 error = gfs2_inode_refresh(ip);
537 if (error)
538 goto out_iput;
539
540 if (ip->i_di.di_nlink) {
541 if (gfs2_consist_inode(ip))
542 gfs2_dinode_print(&ip->i_di);
543 error = -EIO;
544 goto out_iput;
545 }
546
547 if (S_ISDIR(ip->i_di.di_mode) &&
548 (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
549 error = gfs2_dir_exhash_dealloc(ip);
550 if (error)
551 goto out_iput;
552 }
553
554 if (ip->i_di.di_eattr) {
555 error = gfs2_ea_dealloc(ip);
556 if (error)
557 goto out_iput;
558 }
559
560 if (!gfs2_is_stuffed(ip)) {
561 error = gfs2_file_dealloc(ip);
562 if (error)
563 goto out_iput;
564 }
565
566 error = dinode_dealloc(ip, ul);
567 if (error)
568 goto out_iput;
569
570 out_iput:
571 gfs2_glmutex_lock(i_gh.gh_gl);
572 gfs2_inode_put(ip);
573 gfs2_inode_destroy(ip);
574 gfs2_glmutex_unlock(i_gh.gh_gl);
575
576 out:
577 gfs2_glock_dq_uninit(&i_gh);
578
579 return error;
580}
581
582/**
583 * try_inode_dealloc - Try to deallocate an inode and all its blocks
584 * @sdp: the filesystem
585 *
586 * Returns: 0 on success, -errno on error, 1 on busy (inode open)
587 */
588
589static int try_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
590{
591 struct gfs2_holder io_gh;
592 int error = 0;
593
594 gfs2_try_toss_inode(sdp, &ul->ul_ut.ut_inum);
595
596 error = gfs2_glock_nq_num(sdp,
597 ul->ul_ut.ut_inum.no_addr, &gfs2_iopen_glops,
598 LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh);
599 switch (error) {
600 case 0:
601 break;
602 case GLR_TRYFAILED:
603 return 1;
604 default:
605 return error;
606 }
607
608 gfs2_glock_dq(&io_gh);
609 error = inode_dealloc(sdp, ul, &io_gh);
610 gfs2_holder_uninit(&io_gh);
611
612 return error;
613}
614
615static int inode_dealloc_uninit(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
616{
617 struct gfs2_rgrpd *rgd;
618 struct gfs2_holder ri_gh, rgd_gh;
619 int error;
620
621 error = gfs2_rindex_hold(sdp, &ri_gh);
622 if (error)
623 return error;
624
625 rgd = gfs2_blk2rgrpd(sdp, ul->ul_ut.ut_inum.no_addr);
626 if (!rgd) {
627 gfs2_consist(sdp);
628 error = -EIO;
629 goto out;
630 }
631
632 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
633 if (error)
634 goto out;
635
636 error = gfs2_trans_begin(sdp,
637 RES_RG_BIT + RES_UNLINKED + RES_STATFS,
638 0);
639 if (error)
640 goto out_gunlock;
641
642 gfs2_free_uninit_di(rgd, ul->ul_ut.ut_inum.no_addr);
643 gfs2_unlinked_ondisk_rm(sdp, ul);
644
645 gfs2_trans_end(sdp);
646
647 out_gunlock:
648 gfs2_glock_dq_uninit(&rgd_gh);
649 out:
650 gfs2_glock_dq_uninit(&ri_gh);
651
652 return error;
653}
654
655int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
656{
657 if (ul->ul_ut.ut_flags & GFS2_UTF_UNINIT)
658 return inode_dealloc_uninit(sdp, ul);
659 else
660 return try_inode_dealloc(sdp, ul);
661}
662
663/**
664 * gfs2_change_nlink - Change nlink count on inode
665 * @ip: The GFS2 inode
666 * @diff: The change in the nlink count required
667 *
668 * Returns: errno
669 */
670
671int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
672{
673 struct buffer_head *dibh;
674 uint32_t nlink;
675 int error;
676
677 nlink = ip->i_di.di_nlink + diff;
678
679 /* If we are reducing the nlink count, but the new value ends up being
680 bigger than the old one, we must have underflowed. */
681 if (diff < 0 && nlink > ip->i_di.di_nlink) {
682 if (gfs2_consist_inode(ip))
683 gfs2_dinode_print(&ip->i_di);
684 return -EIO;
685 }
686
687 error = gfs2_meta_inode_buffer(ip, &dibh);
688 if (error)
689 return error;
690
691 ip->i_di.di_nlink = nlink;
692 ip->i_di.di_ctime = get_seconds();
693
694 gfs2_trans_add_bh(ip->i_gl, dibh);
695 gfs2_dinode_out(&ip->i_di, dibh->b_data);
696 brelse(dibh);
697
698 return 0;
699}
700
701/**
702 * gfs2_lookupi - Look up a filename in a directory and return its inode
703 * @d_gh: An initialized holder for the directory glock
704 * @name: The name of the inode to look for
705 * @is_root: If 1, ignore the caller's permissions
706 * @i_gh: An uninitialized holder for the new inode glock
707 *
708 * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
709 * @is_root is true.
710 *
711 * Returns: errno
712 */
713
714int gfs2_lookupi(struct gfs2_inode *dip, struct qstr *name, int is_root,
715 struct gfs2_inode **ipp)
716{
717 struct gfs2_sbd *sdp = dip->i_sbd;
718 struct gfs2_holder d_gh;
719 struct gfs2_inum inum;
720 unsigned int type;
721 struct gfs2_glock *gl;
722 int error;
723
724 if (!name->len || name->len > GFS2_FNAMESIZE)
725 return -ENAMETOOLONG;
726
727 if (gfs2_filecmp(name, ".", 1) ||
728 (gfs2_filecmp(name, "..", 2) && dip == sdp->sd_root_dir)) {
729 gfs2_inode_hold(dip);
730 *ipp = dip;
731 return 0;
732 }
733
734 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
735 if (error)
736 return error;
737
738 if (!is_root) {
739 error = gfs2_repermission(dip->i_vnode, MAY_EXEC, NULL);
740 if (error)
741 goto out;
742 }
743
744 error = gfs2_dir_search(dip, name, &inum, &type);
745 if (error)
746 goto out;
747
748 error = gfs2_glock_get(sdp, inum.no_addr, &gfs2_inode_glops,
749 CREATE, &gl);
750 if (error)
751 goto out;
752
753 error = gfs2_inode_get(gl, &inum, CREATE, ipp);
754 if (!error)
755 gfs2_inode_min_init(*ipp, type);
756
757 gfs2_glock_put(gl);
758
759 out:
760 gfs2_glock_dq_uninit(&d_gh);
761
762 return error;
763}
764
765static int pick_formal_ino_1(struct gfs2_sbd *sdp, uint64_t *formal_ino)
766{
767 struct gfs2_inode *ip = sdp->sd_ir_inode;
768 struct buffer_head *bh;
769 struct gfs2_inum_range ir;
770 int error;
771
772 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
773 if (error)
774 return error;
775 down(&sdp->sd_inum_mutex);
776
777 error = gfs2_meta_inode_buffer(ip, &bh);
778 if (error) {
779 up(&sdp->sd_inum_mutex);
780 gfs2_trans_end(sdp);
781 return error;
782 }
783
784 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
785
786 if (ir.ir_length) {
787 *formal_ino = ir.ir_start++;
788 ir.ir_length--;
789 gfs2_trans_add_bh(ip->i_gl, bh);
790 gfs2_inum_range_out(&ir,
791 bh->b_data + sizeof(struct gfs2_dinode));
792 brelse(bh);
793 up(&sdp->sd_inum_mutex);
794 gfs2_trans_end(sdp);
795 return 0;
796 }
797
798 brelse(bh);
799
800 up(&sdp->sd_inum_mutex);
801 gfs2_trans_end(sdp);
802
803 return 1;
804}
805
806static int pick_formal_ino_2(struct gfs2_sbd *sdp, uint64_t *formal_ino)
807{
808 struct gfs2_inode *ip = sdp->sd_ir_inode;
809 struct gfs2_inode *m_ip = sdp->sd_inum_inode;
810 struct gfs2_holder gh;
811 struct buffer_head *bh;
812 struct gfs2_inum_range ir;
813 int error;
814
815 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
816 if (error)
817 return error;
818
819 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
820 if (error)
821 goto out;
822 down(&sdp->sd_inum_mutex);
823
824 error = gfs2_meta_inode_buffer(ip, &bh);
825 if (error)
826 goto out_end_trans;
827
828 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
829
830 if (!ir.ir_length) {
831 struct buffer_head *m_bh;
832 uint64_t x, y;
833
834 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
835 if (error)
836 goto out_brelse;
837
838 x = *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode));
839 x = y = be64_to_cpu(x);
840 ir.ir_start = x;
841 ir.ir_length = GFS2_INUM_QUANTUM;
842 x += GFS2_INUM_QUANTUM;
843 if (x < y)
844 gfs2_consist_inode(m_ip);
845 x = cpu_to_be64(x);
846 gfs2_trans_add_bh(m_ip->i_gl, m_bh);
847 *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
848
849 brelse(m_bh);
850 }
851
852 *formal_ino = ir.ir_start++;
853 ir.ir_length--;
854
855 gfs2_trans_add_bh(ip->i_gl, bh);
856 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
857
858 out_brelse:
859 brelse(bh);
860
861 out_end_trans:
862 up(&sdp->sd_inum_mutex);
863 gfs2_trans_end(sdp);
864
865 out:
866 gfs2_glock_dq_uninit(&gh);
867
868 return error;
869}
870
871static int pick_formal_ino(struct gfs2_sbd *sdp, uint64_t *inum)
872{
873 int error;
874
875 error = pick_formal_ino_1(sdp, inum);
876 if (error <= 0)
877 return error;
878
879 error = pick_formal_ino_2(sdp, inum);
880
881 return error;
882}
883
884/**
885 * create_ok - OK to create a new on-disk inode here?
886 * @dip: Directory in which dinode is to be created
887 * @name: Name of new dinode
888 * @mode:
889 *
890 * Returns: errno
891 */
892
893static int create_ok(struct gfs2_inode *dip, struct qstr *name,
894 unsigned int mode)
895{
896 int error;
897
898 error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
899 if (error)
900 return error;
901
902 /* Don't create entries in an unlinked directory */
903 if (!dip->i_di.di_nlink)
904 return -EPERM;
905
906 error = gfs2_dir_search(dip, name, NULL, NULL);
907 switch (error) {
908 case -ENOENT:
909 error = 0;
910 break;
911 case 0:
912 return -EEXIST;
913 default:
914 return error;
915 }
916
917 if (dip->i_di.di_entries == (uint32_t)-1)
918 return -EFBIG;
919 if (S_ISDIR(mode) && dip->i_di.di_nlink == (uint32_t)-1)
920 return -EMLINK;
921
922 return 0;
923}
924
925static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
926 unsigned int *uid, unsigned int *gid)
927{
928 if (dip->i_sbd->sd_args.ar_suiddir &&
929 (dip->i_di.di_mode & S_ISUID) &&
930 dip->i_di.di_uid) {
931 if (S_ISDIR(*mode))
932 *mode |= S_ISUID;
933 else if (dip->i_di.di_uid != current->fsuid)
934 *mode &= ~07111;
935 *uid = dip->i_di.di_uid;
936 } else
937 *uid = current->fsuid;
938
939 if (dip->i_di.di_mode & S_ISGID) {
940 if (S_ISDIR(*mode))
941 *mode |= S_ISGID;
942 *gid = dip->i_di.di_gid;
943 } else
944 *gid = current->fsgid;
945}
946
947static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_unlinked *ul)
948{
949 struct gfs2_sbd *sdp = dip->i_sbd;
950 int error;
951
952 gfs2_alloc_get(dip);
953
954 dip->i_alloc.al_requested = RES_DINODE;
955 error = gfs2_inplace_reserve(dip);
956 if (error)
957 goto out;
958
959 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
960 RES_STATFS, 0);
961 if (error)
962 goto out_ipreserv;
963
964 ul->ul_ut.ut_inum.no_addr = gfs2_alloc_di(dip);
965
966 ul->ul_ut.ut_flags = GFS2_UTF_UNINIT;
967 error = gfs2_unlinked_ondisk_add(sdp, ul);
968
969 gfs2_trans_end(sdp);
970
971 out_ipreserv:
972 gfs2_inplace_release(dip);
973
974 out:
975 gfs2_alloc_put(dip);
976
977 return error;
978}
979
980/**
981 * init_dinode - Fill in a new dinode structure
982 * @dip: the directory this inode is being created in
983 * @gl: The glock covering the new inode
984 * @inum: the inode number
985 * @mode: the file permissions
986 * @uid:
987 * @gid:
988 *
989 */
990
991static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
992 struct gfs2_inum *inum, unsigned int mode,
993 unsigned int uid, unsigned int gid)
994{
995 struct gfs2_sbd *sdp = dip->i_sbd;
996 struct gfs2_dinode di;
997 struct buffer_head *dibh;
998
999 dibh = gfs2_meta_new(gl, inum->no_addr);
1000 gfs2_trans_add_bh(gl, dibh);
1001 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
1002 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1003
1004 memset(&di, 0, sizeof(struct gfs2_dinode));
1005 gfs2_meta_header_in(&di.di_header, dibh->b_data);
1006 di.di_num = *inum;
1007 di.di_mode = mode;
1008 di.di_uid = uid;
1009 di.di_gid = gid;
1010 di.di_blocks = 1;
1011 di.di_atime = di.di_mtime = di.di_ctime = get_seconds();
1012 di.di_goal_meta = di.di_goal_data = inum->no_addr;
1013
1014 if (S_ISREG(mode)) {
1015 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
1016 gfs2_tune_get(sdp, gt_new_files_jdata))
1017 di.di_flags |= GFS2_DIF_JDATA;
1018 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
1019 gfs2_tune_get(sdp, gt_new_files_directio))
1020 di.di_flags |= GFS2_DIF_DIRECTIO;
1021 } else if (S_ISDIR(mode)) {
1022 di.di_flags |= (dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO);
1023 di.di_flags |= (dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA);
1024 }
1025
1026 gfs2_dinode_out(&di, dibh->b_data);
1027 brelse(dibh);
1028}
1029
1030static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
1031 unsigned int mode, struct gfs2_unlinked *ul)
1032{
1033 struct gfs2_sbd *sdp = dip->i_sbd;
1034 unsigned int uid, gid;
1035 int error;
1036
1037 munge_mode_uid_gid(dip, &mode, &uid, &gid);
1038
1039 gfs2_alloc_get(dip);
1040
1041 error = gfs2_quota_lock(dip, uid, gid);
1042 if (error)
1043 goto out;
1044
1045 error = gfs2_quota_check(dip, uid, gid);
1046 if (error)
1047 goto out_quota;
1048
1049 error = gfs2_trans_begin(sdp, RES_DINODE + RES_UNLINKED +
1050 RES_QUOTA, 0);
1051 if (error)
1052 goto out_quota;
1053
1054 ul->ul_ut.ut_flags = 0;
1055 error = gfs2_unlinked_ondisk_munge(sdp, ul);
1056
1057 init_dinode(dip, gl, &ul->ul_ut.ut_inum,
1058 mode, uid, gid);
1059
1060 gfs2_quota_change(dip, +1, uid, gid);
1061
1062 gfs2_trans_end(sdp);
1063
1064 out_quota:
1065 gfs2_quota_unlock(dip);
1066
1067 out:
1068 gfs2_alloc_put(dip);
1069
1070 return error;
1071}
1072
1073static int link_dinode(struct gfs2_inode *dip, struct qstr *name,
1074 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1075{
1076 struct gfs2_sbd *sdp = dip->i_sbd;
1077 struct gfs2_alloc *al;
1078 int alloc_required;
1079 struct buffer_head *dibh;
1080 int error;
1081
1082 al = gfs2_alloc_get(dip);
1083
1084 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1085 if (error)
1086 goto fail;
1087
1088 error = gfs2_diradd_alloc_required(dip, name, &alloc_required);
1089 if (alloc_required) {
1090 error = gfs2_quota_check(dip, dip->i_di.di_uid,
1091 dip->i_di.di_gid);
1092 if (error)
1093 goto fail_quota_locks;
1094
1095 al->al_requested = sdp->sd_max_dirres;
1096
1097 error = gfs2_inplace_reserve(dip);
1098 if (error)
1099 goto fail_quota_locks;
1100
1101 error = gfs2_trans_begin(sdp,
1102 sdp->sd_max_dirres +
1103 al->al_rgd->rd_ri.ri_length +
1104 2 * RES_DINODE + RES_UNLINKED +
1105 RES_STATFS + RES_QUOTA, 0);
1106 if (error)
1107 goto fail_ipreserv;
1108 } else {
1109 error = gfs2_trans_begin(sdp,
1110 RES_LEAF +
1111 2 * RES_DINODE +
1112 RES_UNLINKED, 0);
1113 if (error)
1114 goto fail_quota_locks;
1115 }
1116
1117 error = gfs2_dir_add(dip, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
1118 if (error)
1119 goto fail_end_trans;
1120
1121 error = gfs2_meta_inode_buffer(ip, &dibh);
1122 if (error)
1123 goto fail_end_trans;
1124 ip->i_di.di_nlink = 1;
1125 gfs2_trans_add_bh(ip->i_gl, dibh);
1126 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1127 brelse(dibh);
1128
1129 error = gfs2_unlinked_ondisk_rm(sdp, ul);
1130 if (error)
1131 goto fail_end_trans;
1132
1133 return 0;
1134
1135 fail_end_trans:
1136 gfs2_trans_end(sdp);
1137
1138 fail_ipreserv:
1139 if (dip->i_alloc.al_rgd)
1140 gfs2_inplace_release(dip);
1141
1142 fail_quota_locks:
1143 gfs2_quota_unlock(dip);
1144
1145 fail:
1146 gfs2_alloc_put(dip);
1147
1148 return error;
1149}
1150
1151/**
1152 * gfs2_createi - Create a new inode
1153 * @ghs: An array of two holders
1154 * @name: The name of the new file
1155 * @mode: the permissions on the new inode
1156 *
1157 * @ghs[0] is an initialized holder for the directory
1158 * @ghs[1] is the holder for the inode lock
1159 *
1160 * If the return value is 0, the glocks on both the directory and the new
1161 * file are held. A transaction has been started and an inplace reservation
1162 * is held, as well.
1163 *
1164 * Returns: errno
1165 */
1166
1167int gfs2_createi(struct gfs2_holder *ghs, struct qstr *name, unsigned int mode)
1168{
1169 struct gfs2_inode *dip = get_gl2ip(ghs->gh_gl);
1170 struct gfs2_sbd *sdp = dip->i_sbd;
1171 struct gfs2_unlinked *ul;
1172 struct gfs2_inode *ip;
1173 int error;
1174
1175 if (!name->len || name->len > GFS2_FNAMESIZE)
1176 return -ENAMETOOLONG;
1177
1178 error = gfs2_unlinked_get(sdp, &ul);
1179 if (error)
1180 return error;
1181
1182 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1183 error = gfs2_glock_nq(ghs);
1184 if (error)
1185 goto fail;
1186
1187 error = create_ok(dip, name, mode);
1188 if (error)
1189 goto fail_gunlock;
1190
1191 error = pick_formal_ino(sdp, &ul->ul_ut.ut_inum.no_formal_ino);
1192 if (error)
1193 goto fail_gunlock;
1194
1195 error = alloc_dinode(dip, ul);
1196 if (error)
1197 goto fail_gunlock;
1198
1199 if (ul->ul_ut.ut_inum.no_addr < dip->i_num.no_addr) {
1200 gfs2_glock_dq(ghs);
1201
1202 error = gfs2_glock_nq_num(sdp,
1203 ul->ul_ut.ut_inum.no_addr,
1204 &gfs2_inode_glops,
1205 LM_ST_EXCLUSIVE, GL_SKIP,
1206 ghs + 1);
1207 if (error) {
1208 gfs2_unlinked_put(sdp, ul);
1209 return error;
1210 }
1211
1212 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1213 error = gfs2_glock_nq(ghs);
1214 if (error) {
1215 gfs2_glock_dq_uninit(ghs + 1);
1216 gfs2_unlinked_put(sdp, ul);
1217 return error;
1218 }
1219
1220 error = create_ok(dip, name, mode);
1221 if (error)
1222 goto fail_gunlock2;
1223 } else {
1224 error = gfs2_glock_nq_num(sdp,
1225 ul->ul_ut.ut_inum.no_addr,
1226 &gfs2_inode_glops,
1227 LM_ST_EXCLUSIVE, GL_SKIP,
1228 ghs + 1);
1229 if (error)
1230 goto fail_gunlock;
1231 }
1232
1233 error = make_dinode(dip, ghs[1].gh_gl, mode, ul);
1234 if (error)
1235 goto fail_gunlock2;
1236
1237 error = gfs2_inode_get(ghs[1].gh_gl, &ul->ul_ut.ut_inum, CREATE, &ip);
1238 if (error)
1239 goto fail_gunlock2;
1240
1241 error = gfs2_inode_refresh(ip);
1242 if (error)
1243 goto fail_iput;
1244
1245 error = gfs2_acl_create(dip, ip);
1246 if (error)
1247 goto fail_iput;
1248
1249 error = link_dinode(dip, name, ip, ul);
1250 if (error)
1251 goto fail_iput;
1252
1253 gfs2_unlinked_put(sdp, ul);
1254
1255 return 0;
1256
1257 fail_iput:
1258 gfs2_inode_put(ip);
1259
1260 fail_gunlock2:
1261 gfs2_glock_dq_uninit(ghs + 1);
1262
1263 fail_gunlock:
1264 gfs2_glock_dq(ghs);
1265
1266 fail:
1267 gfs2_unlinked_put(sdp, ul);
1268
1269 return error;
1270}
1271
1272/**
1273 * gfs2_unlinki - Unlink a file
1274 * @dip: The inode of the directory
1275 * @name: The name of the file to be unlinked
1276 * @ip: The inode of the file to be removed
1277 *
1278 * Assumes Glocks on both dip and ip are held.
1279 *
1280 * Returns: errno
1281 */
1282
1283int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
1284 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1285{
1286 struct gfs2_sbd *sdp = dip->i_sbd;
1287 int error;
1288
1289 error = gfs2_dir_del(dip, name);
1290 if (error)
1291 return error;
1292
1293 error = gfs2_change_nlink(ip, -1);
1294 if (error)
1295 return error;
1296
1297 /* If this inode is being unlinked from the directory structure,
1298 we need to mark that in the log so that it isn't lost during
1299 a crash. */
1300
1301 if (!ip->i_di.di_nlink) {
1302 ul->ul_ut.ut_inum = ip->i_num;
1303 error = gfs2_unlinked_ondisk_add(sdp, ul);
1304 if (!error)
1305 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1306 }
1307
1308 return error;
1309}
1310
1311/**
1312 * gfs2_rmdiri - Remove a directory
1313 * @dip: The parent directory of the directory to be removed
1314 * @name: The name of the directory to be removed
1315 * @ip: The GFS2 inode of the directory to be removed
1316 *
1317 * Assumes Glocks on dip and ip are held
1318 *
1319 * Returns: errno
1320 */
1321
1322int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
1323 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1324{
1325 struct gfs2_sbd *sdp = dip->i_sbd;
1326 struct qstr dotname;
1327 int error;
1328
1329 if (ip->i_di.di_entries != 2) {
1330 if (gfs2_consist_inode(ip))
1331 gfs2_dinode_print(&ip->i_di);
1332 return -EIO;
1333 }
1334
1335 error = gfs2_dir_del(dip, name);
1336 if (error)
1337 return error;
1338
1339 error = gfs2_change_nlink(dip, -1);
1340 if (error)
1341 return error;
1342
1343 dotname.len = 1;
1344 dotname.name = ".";
1345 error = gfs2_dir_del(ip, &dotname);
1346 if (error)
1347 return error;
1348
1349 dotname.len = 2;
1350 dotname.name = "..";
1351 error = gfs2_dir_del(ip, &dotname);
1352 if (error)
1353 return error;
1354
1355 error = gfs2_change_nlink(ip, -2);
1356 if (error)
1357 return error;
1358
1359 /* This inode is being unlinked from the directory structure and
1360 we need to mark that in the log so that it isn't lost during
1361 a crash. */
1362
1363 ul->ul_ut.ut_inum = ip->i_num;
1364 error = gfs2_unlinked_ondisk_add(sdp, ul);
1365 if (!error)
1366 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1367
1368 return error;
1369}
1370
1371/*
1372 * gfs2_unlink_ok - check to see that a inode is still in a directory
1373 * @dip: the directory
1374 * @name: the name of the file
1375 * @ip: the inode
1376 *
1377 * Assumes that the lock on (at least) @dip is held.
1378 *
1379 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
1380 */
1381
1382int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
1383 struct gfs2_inode *ip)
1384{
1385 struct gfs2_inum inum;
1386 unsigned int type;
1387 int error;
1388
1389 if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1390 return -EPERM;
1391
1392 if ((dip->i_di.di_mode & S_ISVTX) &&
1393 dip->i_di.di_uid != current->fsuid &&
1394 ip->i_di.di_uid != current->fsuid &&
1395 !capable(CAP_FOWNER))
1396 return -EPERM;
1397
1398 if (IS_APPEND(dip->i_vnode))
1399 return -EPERM;
1400
1401 error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
1402 if (error)
1403 return error;
1404
1405 error = gfs2_dir_search(dip, name, &inum, &type);
1406 if (error)
1407 return error;
1408
1409 if (!gfs2_inum_equal(&inum, &ip->i_num))
1410 return -ENOENT;
1411
1412 if (IF2DT(ip->i_di.di_mode) != type) {
1413 gfs2_consist_inode(dip);
1414 return -EIO;
1415 }
1416
1417 return 0;
1418}
1419
1420/*
1421 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1422 * @this: move this
1423 * @to: to here
1424 *
1425 * Follow @to back to the root and make sure we don't encounter @this
1426 * Assumes we already hold the rename lock.
1427 *
1428 * Returns: errno
1429 */
1430
1431int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1432{
1433 struct gfs2_sbd *sdp = this->i_sbd;
1434 struct gfs2_inode *tmp;
1435 struct qstr dotdot;
1436 int error = 0;
1437
1438 memset(&dotdot, 0, sizeof(struct qstr));
1439 dotdot.name = "..";
1440 dotdot.len = 2;
1441
1442 gfs2_inode_hold(to);
1443
1444 for (;;) {
1445 if (to == this) {
1446 error = -EINVAL;
1447 break;
1448 }
1449 if (to == sdp->sd_root_dir) {
1450 error = 0;
1451 break;
1452 }
1453
1454 error = gfs2_lookupi(to, &dotdot, 1, &tmp);
1455 if (error)
1456 break;
1457
1458 gfs2_inode_put(to);
1459 to = tmp;
1460 }
1461
1462 gfs2_inode_put(to);
1463
1464 return error;
1465}
1466
1467/**
1468 * gfs2_readlinki - return the contents of a symlink
1469 * @ip: the symlink's inode
1470 * @buf: a pointer to the buffer to be filled
1471 * @len: a pointer to the length of @buf
1472 *
1473 * If @buf is too small, a piece of memory is kmalloc()ed and needs
1474 * to be freed by the caller.
1475 *
1476 * Returns: errno
1477 */
1478
1479int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1480{
1481 struct gfs2_holder i_gh;
1482 struct buffer_head *dibh;
1483 unsigned int x;
1484 int error;
1485
1486 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
1487 error = gfs2_glock_nq_atime(&i_gh);
1488 if (error) {
1489 gfs2_holder_uninit(&i_gh);
1490 return error;
1491 }
1492
1493 if (!ip->i_di.di_size) {
1494 gfs2_consist_inode(ip);
1495 error = -EIO;
1496 goto out;
1497 }
1498
1499 error = gfs2_meta_inode_buffer(ip, &dibh);
1500 if (error)
1501 goto out;
1502
1503 x = ip->i_di.di_size + 1;
1504 if (x > *len) {
1505 *buf = kmalloc(x, GFP_KERNEL);
1506 if (!*buf) {
1507 error = -ENOMEM;
1508 goto out_brelse;
1509 }
1510 }
1511
1512 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1513 *len = x;
1514
1515 out_brelse:
1516 brelse(dibh);
1517
1518 out:
1519 gfs2_glock_dq_uninit(&i_gh);
1520
1521 return error;
1522}
1523
1524/**
1525 * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
1526 * conditionally update the inode's atime
1527 * @gh: the holder to acquire
1528 *
1529 * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
1530 * Update if the difference between the current time and the inode's current
1531 * atime is greater than an interval specified at mount.
1532 *
1533 * Returns: errno
1534 */
1535
1536int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1537{
1538 struct gfs2_glock *gl = gh->gh_gl;
1539 struct gfs2_sbd *sdp = gl->gl_sbd;
1540 struct gfs2_inode *ip = get_gl2ip(gl);
1541 int64_t curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1542 unsigned int state;
1543 int flags;
1544 int error;
1545
1546 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1547 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
1548 gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
1549 return -EINVAL;
1550
1551 state = gh->gh_state;
1552 flags = gh->gh_flags;
1553
1554 error = gfs2_glock_nq(gh);
1555 if (error)
1556 return error;
1557
1558 if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
1559 (sdp->sd_vfs->s_flags & MS_RDONLY))
1560 return 0;
1561
1562 curtime = get_seconds();
1563 if (curtime - ip->i_di.di_atime >= quantum) {
1564 gfs2_glock_dq(gh);
1565 gfs2_holder_reinit(LM_ST_EXCLUSIVE,
1566 gh->gh_flags & ~LM_FLAG_ANY,
1567 gh);
1568 error = gfs2_glock_nq(gh);
1569 if (error)
1570 return error;
1571
1572 /* Verify that atime hasn't been updated while we were
1573 trying to get exclusive lock. */
1574
1575 curtime = get_seconds();
1576 if (curtime - ip->i_di.di_atime >= quantum) {
1577 struct buffer_head *dibh;
1578
1579 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1580 if (error == -EROFS)
1581 return 0;
1582 if (error)
1583 goto fail;
1584
1585 error = gfs2_meta_inode_buffer(ip, &dibh);
1586 if (error)
1587 goto fail_end_trans;
1588
1589 ip->i_di.di_atime = curtime;
1590
1591 gfs2_trans_add_bh(ip->i_gl, dibh);
1592 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1593 brelse(dibh);
1594
1595 gfs2_trans_end(sdp);
1596 }
1597
1598 /* If someone else has asked for the glock,
1599 unlock and let them have it. Then reacquire
1600 in the original state. */
1601 if (gfs2_glock_is_blocking(gl)) {
1602 gfs2_glock_dq(gh);
1603 gfs2_holder_reinit(state, flags, gh);
1604 return gfs2_glock_nq(gh);
1605 }
1606 }
1607
1608 return 0;
1609
1610 fail_end_trans:
1611 gfs2_trans_end(sdp);
1612
1613 fail:
1614 gfs2_glock_dq(gh);
1615
1616 return error;
1617}
1618
1619/**
1620 * glock_compare_atime - Compare two struct gfs2_glock structures for sort
1621 * @arg_a: the first structure
1622 * @arg_b: the second structure
1623 *
1624 * Returns: 1 if A > B
1625 * -1 if A < B
1626 * 0 if A = B
1627 */
1628
1629static int glock_compare_atime(const void *arg_a, const void *arg_b)
1630{
1631 struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1632 struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1633 struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1634 struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1635 int ret = 0;
1636
1637 if (a->ln_number > b->ln_number)
1638 ret = 1;
1639 else if (a->ln_number < b->ln_number)
1640 ret = -1;
1641 else {
1642 if (gh_a->gh_state == LM_ST_SHARED &&
1643 gh_b->gh_state == LM_ST_EXCLUSIVE)
1644 ret = 1;
1645 else if (gh_a->gh_state == LM_ST_SHARED &&
1646 (gh_b->gh_flags & GL_ATIME))
1647 ret = 1;
1648 }
1649
1650 return ret;
1651}
1652
1653/**
1654 * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
1655 * atime update
1656 * @num_gh: the number of structures
1657 * @ghs: an array of struct gfs2_holder structures
1658 *
1659 * Returns: 0 on success (all glocks acquired),
1660 * errno on failure (no glocks acquired)
1661 */
1662
1663int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
1664{
1665 struct gfs2_holder **p;
1666 unsigned int x;
1667 int error = 0;
1668
1669 if (!num_gh)
1670 return 0;
1671
1672 if (num_gh == 1) {
1673 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1674 if (ghs->gh_flags & GL_ATIME)
1675 error = gfs2_glock_nq_atime(ghs);
1676 else
1677 error = gfs2_glock_nq(ghs);
1678 return error;
1679 }
1680
1681 p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1682 if (!p)
1683 return -ENOMEM;
1684
1685 for (x = 0; x < num_gh; x++)
1686 p[x] = &ghs[x];
1687
1688 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
1689
1690 for (x = 0; x < num_gh; x++) {
1691 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1692
1693 if (p[x]->gh_flags & GL_ATIME)
1694 error = gfs2_glock_nq_atime(p[x]);
1695 else
1696 error = gfs2_glock_nq(p[x]);
1697
1698 if (error) {
1699 while (x--)
1700 gfs2_glock_dq(p[x]);
1701 break;
1702 }
1703 }
1704
1705 kfree(p);
1706
1707 return error;
1708}
1709
1710/**
1711 * gfs2_try_toss_vnode - See if we can toss a vnode from memory
1712 * @ip: the inode
1713 *
1714 * Returns: 1 if the vnode was tossed
1715 */
1716
1717void gfs2_try_toss_vnode(struct gfs2_inode *ip)
1718{
1719 struct inode *inode;
1720
1721 inode = gfs2_ip2v_lookup(ip);
1722 if (!inode)
1723 return;
1724
1725 d_prune_aliases(inode);
1726
1727 if (S_ISDIR(ip->i_di.di_mode)) {
1728 struct list_head *head = &inode->i_dentry;
1729 struct dentry *d = NULL;
1730
1731 spin_lock(&dcache_lock);
1732 if (list_empty(head))
1733 spin_unlock(&dcache_lock);
1734 else {
1735 d = list_entry(head->next, struct dentry, d_alias);
1736 dget_locked(d);
1737 spin_unlock(&dcache_lock);
1738
1739 if (have_submounts(d))
1740 dput(d);
1741 else {
1742 shrink_dcache_parent(d);
1743 dput(d);
1744 d_prune_aliases(inode);
1745 }
1746 }
1747 }
1748
1749 inode->i_nlink = 0;
1750 iput(inode);
1751}
1752
1753
1754static int
1755__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1756{
1757 struct buffer_head *dibh;
1758 int error;
1759
1760 error = gfs2_meta_inode_buffer(ip, &dibh);
1761 if (!error) {
1762 error = inode_setattr(ip->i_vnode, attr);
1763 gfs2_assert_warn(ip->i_sbd, !error);
1764 gfs2_inode_attr_out(ip);
1765
1766 gfs2_trans_add_bh(ip->i_gl, dibh);
1767 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1768 brelse(dibh);
1769 }
1770 return error;
1771}
1772
1773/**
1774 * gfs2_setattr_simple -
1775 * @ip:
1776 * @attr:
1777 *
1778 * Called with a reference on the vnode.
1779 *
1780 * Returns: errno
1781 */
1782
1783int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1784{
1785 int error;
1786
1787 if (get_transaction)
1788 return __gfs2_setattr_simple(ip, attr);
1789
1790 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
1791 if (error)
1792 return error;
1793
1794 error = __gfs2_setattr_simple(ip, attr);
1795
1796 gfs2_trans_end(ip->i_sbd);
1797
1798 return error;
1799}
1800
1801int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd)
1802{
1803 return permission(inode, mask, nd);
1804}
1805
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
new file mode 100644
index 000000000000..4df7da51f715
--- /dev/null
+++ b/fs/gfs2/inode.h
@@ -0,0 +1,74 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__
12
13static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
14{
15 return !ip->i_di.di_height;
16}
17
18static inline int gfs2_is_jdata(struct gfs2_inode *ip)
19{
20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21}
22
23void gfs2_inode_attr_in(struct gfs2_inode *ip);
24void gfs2_inode_attr_out(struct gfs2_inode *ip);
25struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip);
26struct inode *gfs2_ip2v(struct gfs2_inode *ip);
27struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum);
28
29void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type);
30int gfs2_inode_refresh(struct gfs2_inode *ip);
31
32int gfs2_inode_get(struct gfs2_glock *i_gl,
33 struct gfs2_inum *inum, int create,
34 struct gfs2_inode **ipp);
35void gfs2_inode_hold(struct gfs2_inode *ip);
36void gfs2_inode_put(struct gfs2_inode *ip);
37void gfs2_inode_destroy(struct gfs2_inode *ip);
38
39int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
40
41int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
42int gfs2_lookupi(struct gfs2_inode *dip, struct qstr *name, int is_root,
43 struct gfs2_inode **ipp);
44int gfs2_createi(struct gfs2_holder *ghs, struct qstr *name, unsigned int mode);
45int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
46 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
47int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
48 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
49int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
50 struct gfs2_inode *ip);
51int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
52int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
53
54int gfs2_glock_nq_atime(struct gfs2_holder *gh);
55int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
56
57void gfs2_try_toss_vnode(struct gfs2_inode *ip);
58
59int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
60
61int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd);
62
63static inline int gfs2_lookup_simple(struct gfs2_inode *dip, char *name,
64 struct gfs2_inode **ipp)
65{
66 struct qstr qstr;
67 memset(&qstr, 0, sizeof(struct qstr));
68 qstr.name = name;
69 qstr.len = strlen(name);
70 return gfs2_lookupi(dip, &qstr, 1, ipp);
71}
72
73#endif /* __INODE_DOT_H__ */
74
diff --git a/fs/gfs2/jdata.c b/fs/gfs2/jdata.c
new file mode 100644
index 000000000000..d4adbf171ed3
--- /dev/null
+++ b/fs/gfs2/jdata.c
@@ -0,0 +1,382 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16#include <asm/uaccess.h>
17
18#include "gfs2.h"
19#include "bmap.h"
20#include "inode.h"
21#include "jdata.h"
22#include "meta_io.h"
23#include "trans.h"
24
25int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
26 struct buffer_head **bhp)
27{
28 struct buffer_head *bh;
29 int error = 0;
30
31 if (new) {
32 bh = gfs2_meta_new(ip->i_gl, block);
33 gfs2_trans_add_bh(ip->i_gl, bh);
34 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
35 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
36 } else {
37 error = gfs2_meta_read(ip->i_gl, block,
38 DIO_START | DIO_WAIT, &bh);
39 if (error)
40 return error;
41 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
42 brelse(bh);
43 return -EIO;
44 }
45 }
46
47 *bhp = bh;
48
49 return 0;
50}
51
52/**
53 * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read()
54 * @bh: The buffer to copy from, or NULL meaning zero the buffer
55 * @buf: The buffer to copy/zero
56 * @offset: The offset in the buffer to copy from
57 * @size: The amount of data to copy/zero
58 *
59 * Returns: errno
60 */
61
62int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset,
63 unsigned int size)
64{
65 if (bh)
66 memcpy(*buf, bh->b_data + offset, size);
67 else
68 memset(*buf, 0, size);
69 *buf += size;
70 return 0;
71}
72
73/**
74 * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read()
75 * @bh: The buffer
76 * @buf: The destination of the data
77 * @offset: The offset into the buffer
78 * @size: The amount of data to copy
79 *
80 * Returns: errno
81 */
82
83int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset,
84 unsigned int size)
85{
86 int error;
87
88 if (bh)
89 error = copy_to_user(*buf, bh->b_data + offset, size);
90 else
91 error = clear_user(*buf, size);
92
93 if (error)
94 error = -EFAULT;
95 else
96 *buf += size;
97
98 return error;
99}
100
101static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf,
102 unsigned int offset, unsigned int size,
103 read_copy_fn_t copy_fn)
104{
105 struct buffer_head *dibh;
106 int error;
107
108 error = gfs2_meta_inode_buffer(ip, &dibh);
109 if (!error) {
110 error = copy_fn(dibh, &buf,
111 offset + sizeof(struct gfs2_dinode), size);
112 brelse(dibh);
113 }
114
115 return (error) ? error : size;
116}
117
118/**
119 * gfs2_jdata_read - Read a jdata file
120 * @ip: The GFS2 Inode
121 * @buf: The buffer to place result into
122 * @offset: File offset to begin jdata_readng from
123 * @size: Amount of data to transfer
124 * @copy_fn: Function to actually perform the copy
125 *
126 * The @copy_fn only copies a maximum of a single block at once so
127 * we are safe calling it with int arguments. It is done so that
128 * we don't needlessly put 64bit arguments on the stack and it
129 * also makes the code in the @copy_fn nicer too.
130 *
131 * Returns: The amount of data actually copied or the error
132 */
133
134int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, uint64_t offset,
135 unsigned int size, read_copy_fn_t copy_fn)
136{
137 struct gfs2_sbd *sdp = ip->i_sbd;
138 uint64_t lblock, dblock;
139 uint32_t extlen = 0;
140 unsigned int o;
141 int copied = 0;
142 int error = 0;
143
144 if (offset >= ip->i_di.di_size)
145 return 0;
146
147 if ((offset + size) > ip->i_di.di_size)
148 size = ip->i_di.di_size - offset;
149
150 if (!size)
151 return 0;
152
153 if (gfs2_is_stuffed(ip))
154 return jdata_read_stuffed(ip, buf, (unsigned int)offset, size,
155 copy_fn);
156
157 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
158 return -EINVAL;
159
160 lblock = offset;
161 o = do_div(lblock, sdp->sd_jbsize) +
162 sizeof(struct gfs2_meta_header);
163
164 while (copied < size) {
165 unsigned int amount;
166 struct buffer_head *bh;
167 int new;
168
169 amount = size - copied;
170 if (amount > sdp->sd_sb.sb_bsize - o)
171 amount = sdp->sd_sb.sb_bsize - o;
172
173 if (!extlen) {
174 new = 0;
175 error = gfs2_block_map(ip, lblock, &new,
176 &dblock, &extlen);
177 if (error)
178 goto fail;
179 }
180
181 if (extlen > 1)
182 gfs2_meta_ra(ip->i_gl, dblock, extlen);
183
184 if (dblock) {
185 error = gfs2_jdata_get_buffer(ip, dblock, new, &bh);
186 if (error)
187 goto fail;
188 dblock++;
189 extlen--;
190 } else
191 bh = NULL;
192
193 error = copy_fn(bh, &buf, o, amount);
194 brelse(bh);
195 if (error)
196 goto fail;
197
198 copied += amount;
199 lblock++;
200
201 o = sizeof(struct gfs2_meta_header);
202 }
203
204 return copied;
205
206 fail:
207 return (copied) ? copied : error;
208}
209
210/**
211 * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write()
212 * @bh: The buffer to copy to or clear
213 * @buf: The buffer to copy from
214 * @offset: The offset in the buffer to write to
215 * @size: The amount of data to write
216 *
217 * Returns: errno
218 */
219
220int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh,
221 const char **buf, unsigned int offset, unsigned int size)
222{
223 gfs2_trans_add_bh(ip->i_gl, bh);
224 memcpy(bh->b_data + offset, *buf, size);
225
226 *buf += size;
227
228 return 0;
229}
230
231/**
232 * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write()
233 * @bh: The buffer to copy to or clear
234 * @buf: The buffer to copy from
235 * @offset: The offset in the buffer to write to
236 * @size: The amount of data to write
237 *
238 * Returns: errno
239 */
240
241int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh,
242 const char __user **buf, unsigned int offset, unsigned int size)
243{
244 int error = 0;
245
246 gfs2_trans_add_bh(ip->i_gl, bh);
247 if (copy_from_user(bh->b_data + offset, *buf, size))
248 error = -EFAULT;
249 else
250 *buf += size;
251
252 return error;
253}
254
255static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf,
256 unsigned int offset, unsigned int size,
257 write_copy_fn_t copy_fn)
258{
259 struct buffer_head *dibh;
260 int error;
261
262 error = gfs2_meta_inode_buffer(ip, &dibh);
263 if (error)
264 return error;
265
266 error = copy_fn(ip,
267 dibh, &buf,
268 offset + sizeof(struct gfs2_dinode), size);
269 if (!error) {
270 if (ip->i_di.di_size < offset + size)
271 ip->i_di.di_size = offset + size;
272 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
273 gfs2_dinode_out(&ip->i_di, dibh->b_data);
274 }
275
276 brelse(dibh);
277
278 return (error) ? error : size;
279}
280
281/**
282 * gfs2_jdata_write - Write bytes to a file
283 * @ip: The GFS2 inode
284 * @buf: The buffer containing information to be written
285 * @offset: The file offset to start writing at
286 * @size: The amount of data to write
287 * @copy_fn: Function to do the actual copying
288 *
289 * Returns: The number of bytes correctly written or error code
290 */
291
292int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, uint64_t offset,
293 unsigned int size, write_copy_fn_t copy_fn)
294{
295 struct gfs2_sbd *sdp = ip->i_sbd;
296 struct buffer_head *dibh;
297 uint64_t lblock, dblock;
298 uint32_t extlen = 0;
299 unsigned int o;
300 int copied = 0;
301 int error = 0;
302
303 if (!size)
304 return 0;
305
306 if (gfs2_is_stuffed(ip) &&
307 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
308 return jdata_write_stuffed(ip, buf, (unsigned int)offset, size,
309 copy_fn);
310
311 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
312 return -EINVAL;
313
314 if (gfs2_is_stuffed(ip)) {
315 error = gfs2_unstuff_dinode(ip, NULL, NULL);
316 if (error)
317 return error;
318 }
319
320 lblock = offset;
321 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
322
323 while (copied < size) {
324 unsigned int amount;
325 struct buffer_head *bh;
326 int new;
327
328 amount = size - copied;
329 if (amount > sdp->sd_sb.sb_bsize - o)
330 amount = sdp->sd_sb.sb_bsize - o;
331
332 if (!extlen) {
333 new = 1;
334 error = gfs2_block_map(ip, lblock, &new,
335 &dblock, &extlen);
336 if (error)
337 goto fail;
338 error = -EIO;
339 if (gfs2_assert_withdraw(sdp, dblock))
340 goto fail;
341 }
342
343 error = gfs2_jdata_get_buffer(ip, dblock,
344 (amount == sdp->sd_jbsize) ? 1 : new,
345 &bh);
346 if (error)
347 goto fail;
348
349 error = copy_fn(ip, bh, &buf, o, amount);
350 brelse(bh);
351 if (error)
352 goto fail;
353
354 copied += amount;
355 lblock++;
356 dblock++;
357 extlen--;
358
359 o = sizeof(struct gfs2_meta_header);
360 }
361
362 out:
363 error = gfs2_meta_inode_buffer(ip, &dibh);
364 if (error)
365 return error;
366
367 if (ip->i_di.di_size < offset + copied)
368 ip->i_di.di_size = offset + copied;
369 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
370
371 gfs2_trans_add_bh(ip->i_gl, dibh);
372 gfs2_dinode_out(&ip->i_di, dibh->b_data);
373 brelse(dibh);
374
375 return copied;
376
377 fail:
378 if (copied)
379 goto out;
380 return error;
381}
382
diff --git a/fs/gfs2/jdata.h b/fs/gfs2/jdata.h
new file mode 100644
index 000000000000..95e18fcb8f82
--- /dev/null
+++ b/fs/gfs2/jdata.h
@@ -0,0 +1,52 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __FILE_DOT_H__
11#define __FILE_DOT_H__
12
13int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
14 struct buffer_head **bhp);
15
16typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf,
17 unsigned int offset, unsigned int size);
18typedef int (*write_copy_fn_t) (struct gfs2_inode *ip,
19 struct buffer_head *bh, const char **buf,
20 unsigned int offset, unsigned int size);
21
22int gfs2_copy2mem(struct buffer_head *bh, char **buf,
23 unsigned int offset, unsigned int size);
24int gfs2_copy2user(struct buffer_head *bh, char __user **buf,
25 unsigned int offset, unsigned int size);
26int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf,
27 uint64_t offset, unsigned int size,
28 read_copy_fn_t copy_fn);
29
30int gfs2_copy_from_mem(struct gfs2_inode *ip,
31 struct buffer_head *bh, const char **buf,
32 unsigned int offset, unsigned int size);
33int gfs2_copy_from_user(struct gfs2_inode *ip,
34 struct buffer_head *bh, const char __user **buf,
35 unsigned int offset, unsigned int size);
36int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf,
37 uint64_t offset, unsigned int size,
38 write_copy_fn_t copy_fn);
39
40static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf,
41 uint64_t offset, unsigned int size)
42{
43 return gfs2_jdata_read(ip, (__force char __user *)buf, offset, size, gfs2_copy2mem);
44}
45
46static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, const char *buf,
47 uint64_t offset, unsigned int size)
48{
49 return gfs2_jdata_write(ip, (__force const char __user *)buf, offset, size, gfs2_copy_from_mem);
50}
51
52#endif /* __FILE_DOT_H__ */
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
new file mode 100644
index 000000000000..cc7442261b2e
--- /dev/null
+++ b/fs/gfs2/lm.c
@@ -0,0 +1,235 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "glock.h"
20#include "lm.h"
21#include "super.h"
22
23/**
24 * gfs2_lm_mount - mount a locking protocol
25 * @sdp: the filesystem
26 * @args: mount arguements
27 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
28 *
29 * Returns: errno
30 */
31
32int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
33{
34 char *proto = sdp->sd_proto_name;
35 char *table = sdp->sd_table_name;
36 int flags = 0;
37 int error;
38
39 if (sdp->sd_args.ar_spectator)
40 flags |= LM_MFLAG_SPECTATOR;
41
42 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
43
44 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
45 gfs2_glock_cb, sdp,
46 GFS2_MIN_LVB_SIZE, flags,
47 &sdp->sd_lockstruct, &sdp->sd_kobj);
48 if (error) {
49 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
50 proto, table, sdp->sd_args.ar_hostdata);
51 goto out;
52 }
53
54 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
55 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
56 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
57 GFS2_MIN_LVB_SIZE)) {
58 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
59 goto out;
60 }
61
62 if (sdp->sd_args.ar_spectator)
63 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
64 else
65 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
66 sdp->sd_lockstruct.ls_jid);
67
68 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
69
70 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
71 !sdp->sd_args.ar_ignore_local_fs) {
72 sdp->sd_args.ar_localflocks = 1;
73 sdp->sd_args.ar_localcaching = 1;
74 }
75
76 out:
77 return error;
78}
79
80void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
81{
82 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
83 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(sdp->sd_lockstruct.ls_lockspace);
84}
85
86void gfs2_lm_unmount(struct gfs2_sbd *sdp)
87{
88 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
89 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
90}
91
92int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
93{
94 va_list args;
95
96 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
97 return 0;
98
99 va_start(args, fmt);
100 vprintk(fmt, args);
101 va_end(args);
102
103 fs_err(sdp, "about to withdraw from the cluster\n");
104 if (sdp->sd_args.ar_debug)
105 BUG();
106
107 fs_err(sdp, "waiting for outstanding I/O\n");
108
109 /* FIXME: suspend dm device so oustanding bio's complete
110 and all further io requests fail */
111
112 fs_err(sdp, "telling LM to withdraw\n");
113 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
114 fs_err(sdp, "withdrawn\n");
115 dump_stack();
116
117 return -1;
118}
119
120int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
121 lm_lock_t **lockp)
122{
123 int error;
124 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
125 error = -EIO;
126 else
127 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(sdp->sd_lockstruct.ls_lockspace, name, lockp);
128 return error;
129}
130
131void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock)
132{
133 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
134 sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
135}
136
137unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
138 unsigned int cur_state, unsigned int req_state,
139 unsigned int flags)
140{
141 int ret;
142 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
143 ret = 0;
144 else
145 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
146 cur_state,
147 req_state, flags);
148 return ret;
149}
150
151unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
152 unsigned int cur_state)
153{
154 int ret;
155 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
156 ret = 0;
157 else
158 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
159 return ret;
160}
161
162void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock)
163{
164 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
165 sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
166}
167
168int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp)
169{
170 int error;
171 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
172 error = -EIO;
173 else
174 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
175 return error;
176}
177
178void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
179{
180 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
181 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
182}
183
184void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
185{
186 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
187 sdp->sd_lockstruct.ls_ops->lm_sync_lvb(lock, lvb);
188}
189
190int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
191 struct file *file, struct file_lock *fl)
192{
193 int error;
194 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
195 error = -EIO;
196 else
197 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
198 sdp->sd_lockstruct.ls_lockspace,
199 name, file, fl);
200 return error;
201}
202
203int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
204 struct file *file, int cmd, struct file_lock *fl)
205{
206 int error;
207 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
208 error = -EIO;
209 else
210 error = sdp->sd_lockstruct.ls_ops->lm_plock(
211 sdp->sd_lockstruct.ls_lockspace,
212 name, file, cmd, fl);
213 return error;
214}
215
216int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
217 struct file *file, struct file_lock *fl)
218{
219 int error;
220 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
221 error = -EIO;
222 else
223 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
224 sdp->sd_lockstruct.ls_lockspace,
225 name, file, fl);
226 return error;
227}
228
229void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
230 unsigned int message)
231{
232 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
233 sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace, jid, message);
234}
235
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h
new file mode 100644
index 000000000000..ec812424fdec
--- /dev/null
+++ b/fs/gfs2/lm.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LM_DOT_H__
11#define __LM_DOT_H__
12
13int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
14void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
15void gfs2_lm_unmount(struct gfs2_sbd *sdp);
16int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
17__attribute__ ((format(printf, 2, 3)));
18int gfs2_lm_get_lock(struct gfs2_sbd *sdp,
19 struct lm_lockname *name, lm_lock_t **lockp);
20void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock);
21unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
22 unsigned int cur_state, unsigned int req_state,
23 unsigned int flags);
24unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
25 unsigned int cur_state);
26void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock);
27int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp);
28void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
29void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
30int gfs2_lm_plock_get(struct gfs2_sbd *sdp,
31 struct lm_lockname *name,
32 struct file *file, struct file_lock *fl);
33int gfs2_lm_plock(struct gfs2_sbd *sdp,
34 struct lm_lockname *name,
35 struct file *file, int cmd, struct file_lock *fl);
36int gfs2_lm_punlock(struct gfs2_sbd *sdp,
37 struct lm_lockname *name,
38 struct file *file, struct file_lock *fl);
39void gfs2_lm_recovery_done(struct gfs2_sbd *sdp,
40 unsigned int jid, unsigned int message);
41
42#endif /* __LM_DOT_H__ */
diff --git a/fs/gfs2/lm_interface.h b/fs/gfs2/lm_interface.h
new file mode 100644
index 000000000000..378432f17f27
--- /dev/null
+++ b/fs/gfs2/lm_interface.h
@@ -0,0 +1,295 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LM_INTERFACE_DOT_H__
11#define __LM_INTERFACE_DOT_H__
12
13/*
14 * Opaque handles represent the lock module's lockspace structure, the lock
15 * module's lock structures, and GFS's file system (superblock) structure.
16 */
17
18typedef void lm_lockspace_t;
19typedef void lm_lock_t;
20typedef void lm_fsdata_t;
21
22typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type,
23 void *data);
24
25/*
26 * lm_mount() flags
27 *
28 * LM_MFLAG_SPECTATOR
29 * GFS is asking to join the filesystem's lockspace, but it doesn't want to
30 * modify the filesystem. The lock module shouldn't assign a journal to the FS
31 * mount. It shouldn't send recovery callbacks to the FS mount. If the node
32 * dies or withdraws, all locks can be wiped immediately.
33 */
34
35#define LM_MFLAG_SPECTATOR 0x00000001
36
37/*
38 * lm_lockstruct flags
39 *
40 * LM_LSFLAG_LOCAL
41 * The lock_nolock module returns LM_LSFLAG_LOCAL to GFS, indicating that GFS
42 * can make single-node optimizations.
43 */
44
45#define LM_LSFLAG_LOCAL 0x00000001
46
47/*
48 * lm_lockname types
49 */
50
51#define LM_TYPE_RESERVED 0x00
52#define LM_TYPE_NONDISK 0x01
53#define LM_TYPE_INODE 0x02
54#define LM_TYPE_RGRP 0x03
55#define LM_TYPE_META 0x04
56#define LM_TYPE_IOPEN 0x05
57#define LM_TYPE_FLOCK 0x06
58#define LM_TYPE_PLOCK 0x07
59#define LM_TYPE_QUOTA 0x08
60#define LM_TYPE_JOURNAL 0x09
61
62/*
63 * lm_lock() states
64 *
65 * SHARED is compatible with SHARED, not with DEFERRED or EX.
66 * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
67 */
68
69#define LM_ST_UNLOCKED 0
70#define LM_ST_EXCLUSIVE 1
71#define LM_ST_DEFERRED 2
72#define LM_ST_SHARED 3
73
74/*
75 * lm_lock() flags
76 *
77 * LM_FLAG_TRY
78 * Don't wait to acquire the lock if it can't be granted immediately.
79 *
80 * LM_FLAG_TRY_1CB
81 * Send one blocking callback if TRY is set and the lock is not granted.
82 *
83 * LM_FLAG_NOEXP
84 * GFS sets this flag on lock requests it makes while doing journal recovery.
85 * These special requests should not be blocked due to the recovery like
86 * ordinary locks would be.
87 *
88 * LM_FLAG_ANY
89 * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
90 * also be granted in SHARED. The preferred state is whichever is compatible
91 * with other granted locks, or the specified state if no other locks exist.
92 *
93 * LM_FLAG_PRIORITY
94 * Override fairness considerations. Suppose a lock is held in a shared state
95 * and there is a pending request for the deferred state. A shared lock
96 * request with the priority flag would be allowed to bypass the deferred
97 * request and directly join the other shared lock. A shared lock request
98 * without the priority flag might be forced to wait until the deferred
99 * requested had acquired and released the lock.
100 */
101
102#define LM_FLAG_TRY 0x00000001
103#define LM_FLAG_TRY_1CB 0x00000002
104#define LM_FLAG_NOEXP 0x00000004
105#define LM_FLAG_ANY 0x00000008
106#define LM_FLAG_PRIORITY 0x00000010
107
108/*
109 * lm_lock() and lm_async_cb return flags
110 *
111 * LM_OUT_ST_MASK
112 * Masks the lower two bits of lock state in the returned value.
113 *
114 * LM_OUT_CACHEABLE
115 * The lock hasn't been released so GFS can continue to cache data for it.
116 *
117 * LM_OUT_CANCELED
118 * The lock request was canceled.
119 *
120 * LM_OUT_ASYNC
121 * The result of the request will be returned in an LM_CB_ASYNC callback.
122 */
123
124#define LM_OUT_ST_MASK 0x00000003
125#define LM_OUT_CACHEABLE 0x00000004
126#define LM_OUT_CANCELED 0x00000008
127#define LM_OUT_ASYNC 0x00000080
128#define LM_OUT_ERROR 0x00000100
129
130/*
131 * lm_callback_t types
132 *
133 * LM_CB_NEED_E LM_CB_NEED_D LM_CB_NEED_S
134 * Blocking callback, a remote node is requesting the given lock in
135 * EXCLUSIVE, DEFERRED, or SHARED.
136 *
137 * LM_CB_NEED_RECOVERY
138 * The given journal needs to be recovered.
139 *
140 * LM_CB_DROPLOCKS
141 * Reduce the number of cached locks.
142 *
143 * LM_CB_ASYNC
144 * The given lock has been granted.
145 */
146
147#define LM_CB_NEED_E 257
148#define LM_CB_NEED_D 258
149#define LM_CB_NEED_S 259
150#define LM_CB_NEED_RECOVERY 260
151#define LM_CB_DROPLOCKS 261
152#define LM_CB_ASYNC 262
153
154/*
155 * lm_recovery_done() messages
156 */
157
158#define LM_RD_GAVEUP 308
159#define LM_RD_SUCCESS 309
160
161
162struct lm_lockname {
163 uint64_t ln_number;
164 unsigned int ln_type;
165};
166
167#define lm_name_equal(name1, name2) \
168 (((name1)->ln_number == (name2)->ln_number) && \
169 ((name1)->ln_type == (name2)->ln_type)) \
170
171struct lm_async_cb {
172 struct lm_lockname lc_name;
173 int lc_ret;
174};
175
176struct lm_lockstruct;
177
178struct lm_lockops {
179 char lm_proto_name[256];
180
181 /*
182 * Mount/Unmount
183 */
184
185 int (*lm_mount) (char *table_name, char *host_data,
186 lm_callback_t cb, lm_fsdata_t *fsdata,
187 unsigned int min_lvb_size, int flags,
188 struct lm_lockstruct *lockstruct,
189 struct kobject *fskobj);
190
191 void (*lm_others_may_mount) (lm_lockspace_t *lockspace);
192
193 void (*lm_unmount) (lm_lockspace_t *lockspace);
194
195 void (*lm_withdraw) (lm_lockspace_t *lockspace);
196
197 /*
198 * Lock oriented operations
199 */
200
201 int (*lm_get_lock) (lm_lockspace_t *lockspace,
202 struct lm_lockname *name, lm_lock_t **lockp);
203
204 void (*lm_put_lock) (lm_lock_t *lock);
205
206 unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state,
207 unsigned int req_state, unsigned int flags);
208
209 unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state);
210
211 void (*lm_cancel) (lm_lock_t *lock);
212
213 int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp);
214 void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb);
215 void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb);
216
217 /*
218 * Posix Lock oriented operations
219 */
220
221 int (*lm_plock_get) (lm_lockspace_t *lockspace,
222 struct lm_lockname *name,
223 struct file *file, struct file_lock *fl);
224
225 int (*lm_plock) (lm_lockspace_t *lockspace,
226 struct lm_lockname *name,
227 struct file *file, int cmd, struct file_lock *fl);
228
229 int (*lm_punlock) (lm_lockspace_t *lockspace,
230 struct lm_lockname *name,
231 struct file *file, struct file_lock *fl);
232
233 /*
234 * Client oriented operations
235 */
236
237 void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid,
238 unsigned int message);
239
240 struct module *lm_owner;
241};
242
243/*
244 * lm_mount() return values
245 *
246 * ls_jid - the journal ID this node should use
247 * ls_first - this node is the first to mount the file system
248 * ls_lvb_size - size in bytes of lock value blocks
249 * ls_lockspace - lock module's context for this file system
250 * ls_ops - lock module's functions
251 * ls_flags - lock module features
252 */
253
254struct lm_lockstruct {
255 unsigned int ls_jid;
256 unsigned int ls_first;
257 unsigned int ls_lvb_size;
258 lm_lockspace_t *ls_lockspace;
259 struct lm_lockops *ls_ops;
260 int ls_flags;
261};
262
263void __init gfs2_init_lmh(void);
264
265/*
266 * Lock module bottom interface. A lock module makes itself available to GFS
267 * with these functions.
268 *
269 * For the time being, we copy the gfs1 lock module bottom interface so the
270 * same lock modules can be used with both gfs1 and gfs2 (it won't be possible
271 * to load both gfs1 and gfs2 at once.) Eventually the lock modules will fork
272 * for gfs1/gfs2 and this API can change to the gfs2_ prefix.
273 */
274
275int gfs_register_lockproto(struct lm_lockops *proto);
276
277void gfs_unregister_lockproto(struct lm_lockops *proto);
278
279/*
280 * Lock module top interface. GFS calls these functions when mounting or
281 * unmounting a file system.
282 */
283
284int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
285 lm_callback_t cb, lm_fsdata_t *fsdata,
286 unsigned int min_lvb_size, int flags,
287 struct lm_lockstruct *lockstruct,
288 struct kobject *fskobj);
289
290void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct);
291
292void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct);
293
294#endif /* __LM_INTERFACE_DOT_H__ */
295
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
new file mode 100644
index 000000000000..2d2f8fe53999
--- /dev/null
+++ b/fs/gfs2/locking.c
@@ -0,0 +1,192 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/string.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/kmod.h>
17#include <linux/fs.h>
18#include <linux/delay.h>
19
20#include "lm_interface.h"
21
22struct lmh_wrapper {
23 struct list_head lw_list;
24 struct lm_lockops *lw_ops;
25};
26
27/* List of registered low-level locking protocols. A file system selects one
28 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
29
30static struct list_head lmh_list;
31static struct semaphore lmh_lock;
32
33/**
34 * gfs_register_lockproto - Register a low-level locking protocol
35 * @proto: the protocol definition
36 *
37 * Returns: 0 on success, -EXXX on failure
38 */
39
40int gfs_register_lockproto(struct lm_lockops *proto)
41{
42 struct lmh_wrapper *lw;
43
44 down(&lmh_lock);
45
46 list_for_each_entry(lw, &lmh_list, lw_list) {
47 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
48 up(&lmh_lock);
49 printk("GFS2: protocol %s already exists\n",
50 proto->lm_proto_name);
51 return -EEXIST;
52 }
53 }
54
55 lw = kmalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
56 if (!lw) {
57 up(&lmh_lock);
58 return -ENOMEM;
59 }
60 memset(lw, 0, sizeof(struct lmh_wrapper));
61
62 lw->lw_ops = proto;
63 list_add(&lw->lw_list, &lmh_list);
64
65 up(&lmh_lock);
66
67 return 0;
68}
69
70/**
71 * gfs_unregister_lockproto - Unregister a low-level locking protocol
72 * @proto: the protocol definition
73 *
74 */
75
76void gfs_unregister_lockproto(struct lm_lockops *proto)
77{
78 struct lmh_wrapper *lw;
79
80 down(&lmh_lock);
81
82 list_for_each_entry(lw, &lmh_list, lw_list) {
83 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
84 list_del(&lw->lw_list);
85 up(&lmh_lock);
86 kfree(lw);
87 return;
88 }
89 }
90
91 up(&lmh_lock);
92
93 printk("GFS2: can't unregister lock protocol %s\n",
94 proto->lm_proto_name);
95}
96
97/**
98 * gfs2_mount_lockproto - Mount a lock protocol
99 * @proto_name - the name of the protocol
100 * @table_name - the name of the lock space
101 * @host_data - data specific to this host
102 * @cb - the callback to the code using the lock module
103 * @fsdata - data to pass back with the callback
104 * @min_lvb_size - the mininum LVB size that the caller can deal with
105 * @flags - LM_MFLAG_*
106 * @lockstruct - a structure returned describing the mount
107 *
108 * Returns: 0 on success, -EXXX on failure
109 */
110
111int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
112 lm_callback_t cb, lm_fsdata_t *fsdata,
113 unsigned int min_lvb_size, int flags,
114 struct lm_lockstruct *lockstruct,
115 struct kobject *fskobj)
116{
117 struct lmh_wrapper *lw = NULL;
118 int try = 0;
119 int error, found;
120
121 retry:
122 down(&lmh_lock);
123
124 found = 0;
125 list_for_each_entry(lw, &lmh_list, lw_list) {
126 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
127 found = 1;
128 break;
129 }
130 }
131
132 if (!found) {
133 if (!try && capable(CAP_SYS_MODULE)) {
134 try = 1;
135 up(&lmh_lock);
136 request_module(proto_name);
137 goto retry;
138 }
139 printk("GFS2: can't find protocol %s\n", proto_name);
140 error = -ENOENT;
141 goto out;
142 }
143
144 if (!try_module_get(lw->lw_ops->lm_owner)) {
145 try = 0;
146 up(&lmh_lock);
147 msleep(1000);
148 goto retry;
149 }
150
151 error = lw->lw_ops->lm_mount(table_name, host_data, cb, fsdata,
152 min_lvb_size, flags, lockstruct, fskobj);
153 if (error)
154 module_put(lw->lw_ops->lm_owner);
155 out:
156 up(&lmh_lock);
157 return error;
158}
159
160void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
161{
162 down(&lmh_lock);
163 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
164 if (lockstruct->ls_ops->lm_owner)
165 module_put(lockstruct->ls_ops->lm_owner);
166 up(&lmh_lock);
167}
168
169/**
170 * gfs2_withdraw_lockproto - abnormally unmount a lock module
171 * @lockstruct: the lockstruct passed into mount
172 *
173 */
174
175void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
176{
177 down(&lmh_lock);
178 lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
179 if (lockstruct->ls_ops->lm_owner)
180 module_put(lockstruct->ls_ops->lm_owner);
181 up(&lmh_lock);
182}
183
184void __init gfs2_init_lmh(void)
185{
186 init_MUTEX(&lmh_lock);
187 INIT_LIST_HEAD(&lmh_list);
188}
189
190EXPORT_SYMBOL_GPL(gfs_register_lockproto);
191EXPORT_SYMBOL_GPL(gfs_unregister_lockproto);
192
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
new file mode 100644
index 000000000000..736d0d33dd1b
--- /dev/null
+++ b/fs/gfs2/log.c
@@ -0,0 +1,659 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "glock.h"
20#include "log.h"
21#include "lops.h"
22#include "meta_io.h"
23
24#define PULL 1
25
26static inline int is_done(struct gfs2_sbd *sdp, atomic_t *a)
27{
28 int done;
29 gfs2_log_lock(sdp);
30 done = atomic_read(a) ? 0 : 1;
31 gfs2_log_unlock(sdp);
32 return done;
33}
34
35static void do_lock_wait(struct gfs2_sbd *sdp, wait_queue_head_t *wq,
36 atomic_t *a)
37{
38 gfs2_log_unlock(sdp);
39 wait_event(*wq, is_done(sdp, a));
40 gfs2_log_lock(sdp);
41}
42
43static void lock_for_trans(struct gfs2_sbd *sdp)
44{
45 gfs2_log_lock(sdp);
46 do_lock_wait(sdp, &sdp->sd_log_trans_wq, &sdp->sd_log_flush_count);
47 atomic_inc(&sdp->sd_log_trans_count);
48 gfs2_log_unlock(sdp);
49}
50
51static void unlock_from_trans(struct gfs2_sbd *sdp)
52{
53 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_trans_count));
54 if (atomic_dec_and_test(&sdp->sd_log_trans_count))
55 wake_up(&sdp->sd_log_flush_wq);
56}
57
58void gfs2_lock_for_flush(struct gfs2_sbd *sdp)
59{
60 gfs2_log_lock(sdp);
61 atomic_inc(&sdp->sd_log_flush_count);
62 do_lock_wait(sdp, &sdp->sd_log_flush_wq, &sdp->sd_log_trans_count);
63 gfs2_log_unlock(sdp);
64}
65
66void gfs2_unlock_from_flush(struct gfs2_sbd *sdp)
67{
68 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_flush_count));
69 if (atomic_dec_and_test(&sdp->sd_log_flush_count))
70 wake_up(&sdp->sd_log_trans_wq);
71}
72
73/**
74 * gfs2_struct2blk - compute stuff
75 * @sdp: the filesystem
76 * @nstruct: the number of structures
77 * @ssize: the size of the structures
78 *
79 * Compute the number of log descriptor blocks needed to hold a certain number
80 * of structures of a certain size.
81 *
82 * Returns: the number of blocks needed (minimum is always 1)
83 */
84
85unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
86 unsigned int ssize)
87{
88 unsigned int blks;
89 unsigned int first, second;
90
91 blks = 1;
92 first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
93
94 if (nstruct > first) {
95 second = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / ssize;
96 blks += DIV_RU(nstruct - first, second);
97 }
98
99 return blks;
100}
101
102void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
103{
104 struct list_head *head = &sdp->sd_ail1_list;
105 uint64_t sync_gen;
106 struct list_head *first, *tmp;
107 struct gfs2_ail *first_ai, *ai;
108
109 gfs2_log_lock(sdp);
110 if (list_empty(head)) {
111 gfs2_log_unlock(sdp);
112 return;
113 }
114 sync_gen = sdp->sd_ail_sync_gen++;
115
116 first = head->prev;
117 first_ai = list_entry(first, struct gfs2_ail, ai_list);
118 first_ai->ai_sync_gen = sync_gen;
119 gfs2_ail1_start_one(sdp, first_ai);
120
121 if (flags & DIO_ALL)
122 first = NULL;
123
124 for (;;) {
125 if (first &&
126 (head->prev != first ||
127 gfs2_ail1_empty_one(sdp, first_ai, 0)))
128 break;
129
130 for (tmp = head->prev; tmp != head; tmp = tmp->prev) {
131 ai = list_entry(tmp, struct gfs2_ail, ai_list);
132 if (ai->ai_sync_gen >= sync_gen)
133 continue;
134 ai->ai_sync_gen = sync_gen;
135 gfs2_ail1_start_one(sdp, ai);
136 break;
137 }
138
139 if (tmp == head)
140 break;
141 }
142
143 gfs2_log_unlock(sdp);
144}
145
146int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
147{
148 struct gfs2_ail *ai, *s;
149 int ret;
150
151 gfs2_log_lock(sdp);
152
153 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
154 if (gfs2_ail1_empty_one(sdp, ai, flags))
155 list_move(&ai->ai_list, &sdp->sd_ail2_list);
156 else if (!(flags & DIO_ALL))
157 break;
158 }
159
160 ret = list_empty(&sdp->sd_ail1_list);
161
162 gfs2_log_unlock(sdp);
163
164 return ret;
165}
166
167static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
168{
169 struct gfs2_ail *ai, *safe;
170 unsigned int old_tail = sdp->sd_log_tail;
171 int wrap = (new_tail < old_tail);
172 int a, b, rm;
173
174 gfs2_log_lock(sdp);
175
176 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
177 a = (old_tail <= ai->ai_first);
178 b = (ai->ai_first < new_tail);
179 rm = (wrap) ? (a || b) : (a && b);
180 if (!rm)
181 continue;
182
183 gfs2_ail2_empty_one(sdp, ai);
184 list_del(&ai->ai_list);
185 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
186 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
187 kfree(ai);
188 }
189
190 gfs2_log_unlock(sdp);
191}
192
193/**
194 * gfs2_log_reserve - Make a log reservation
195 * @sdp: The GFS2 superblock
196 * @blks: The number of blocks to reserve
197 *
198 * Returns: errno
199 */
200
201int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
202{
203 LIST_HEAD(list);
204 unsigned int try = 0;
205
206 if (gfs2_assert_warn(sdp, blks) ||
207 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
208 return -EINVAL;
209
210 for (;;) {
211 gfs2_log_lock(sdp);
212
213 if (list_empty(&list)) {
214 list_add_tail(&list, &sdp->sd_log_blks_list);
215 while (sdp->sd_log_blks_list.next != &list) {
216 DECLARE_WAITQUEUE(__wait_chan, current);
217 set_current_state(TASK_UNINTERRUPTIBLE);
218 add_wait_queue(&sdp->sd_log_blks_wait,
219 &__wait_chan);
220 gfs2_log_unlock(sdp);
221 schedule();
222 gfs2_log_lock(sdp);
223 remove_wait_queue(&sdp->sd_log_blks_wait,
224 &__wait_chan);
225 set_current_state(TASK_RUNNING);
226 }
227 }
228
229 /* Never give away the last block so we can
230 always pull the tail if we need to. */
231 if (sdp->sd_log_blks_free > blks) {
232 sdp->sd_log_blks_free -= blks;
233 list_del(&list);
234 gfs2_log_unlock(sdp);
235 wake_up(&sdp->sd_log_blks_wait);
236 break;
237 }
238
239 gfs2_log_unlock(sdp);
240
241 gfs2_ail1_empty(sdp, 0);
242 gfs2_log_flush(sdp);
243
244 if (try++)
245 gfs2_ail1_start(sdp, 0);
246 }
247
248 lock_for_trans(sdp);
249
250 return 0;
251}
252
253/**
254 * gfs2_log_release - Release a given number of log blocks
255 * @sdp: The GFS2 superblock
256 * @blks: The number of blocks
257 *
258 */
259
260void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
261{
262 unlock_from_trans(sdp);
263
264 gfs2_log_lock(sdp);
265 sdp->sd_log_blks_free += blks;
266 gfs2_assert_withdraw(sdp,
267 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
268 gfs2_log_unlock(sdp);
269}
270
271static uint64_t log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
272{
273 int new = 0;
274 uint64_t dbn;
275 int error;
276
277 error = gfs2_block_map(sdp->sd_jdesc->jd_inode, lbn, &new, &dbn, NULL);
278 gfs2_assert_withdraw(sdp, !error && dbn);
279
280 return dbn;
281}
282
283/**
284 * log_distance - Compute distance between two journal blocks
285 * @sdp: The GFS2 superblock
286 * @newer: The most recent journal block of the pair
287 * @older: The older journal block of the pair
288 *
289 * Compute the distance (in the journal direction) between two
290 * blocks in the journal
291 *
292 * Returns: the distance in blocks
293 */
294
295static inline unsigned int log_distance(struct gfs2_sbd *sdp,
296 unsigned int newer,
297 unsigned int older)
298{
299 int dist;
300
301 dist = newer - older;
302 if (dist < 0)
303 dist += sdp->sd_jdesc->jd_blocks;
304
305 return dist;
306}
307
308static unsigned int current_tail(struct gfs2_sbd *sdp)
309{
310 struct gfs2_ail *ai;
311 unsigned int tail;
312
313 gfs2_log_lock(sdp);
314
315 if (list_empty(&sdp->sd_ail1_list))
316 tail = sdp->sd_log_head;
317 else {
318 ai = list_entry(sdp->sd_ail1_list.prev,
319 struct gfs2_ail, ai_list);
320 tail = ai->ai_first;
321 }
322
323 gfs2_log_unlock(sdp);
324
325 return tail;
326}
327
328static inline void log_incr_head(struct gfs2_sbd *sdp)
329{
330 if (sdp->sd_log_flush_head == sdp->sd_log_tail)
331 gfs2_assert_withdraw(sdp,
332 sdp->sd_log_flush_head == sdp->sd_log_head);
333
334 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
335 sdp->sd_log_flush_head = 0;
336 sdp->sd_log_flush_wrapped = 1;
337 }
338}
339
340/**
341 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
342 * @sdp: The GFS2 superblock
343 *
344 * Returns: the buffer_head
345 */
346
347struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
348{
349 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
350 struct gfs2_log_buf *lb;
351 struct buffer_head *bh;
352
353 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_KERNEL | __GFP_NOFAIL);
354 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
355
356 bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
357 lock_buffer(bh);
358 memset(bh->b_data, 0, bh->b_size);
359 set_buffer_uptodate(bh);
360 clear_buffer_dirty(bh);
361 unlock_buffer(bh);
362
363 log_incr_head(sdp);
364
365 return bh;
366}
367
368/**
369 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
370 * @sdp: the filesystem
371 * @data: the data the buffer_head should point to
372 *
373 * Returns: the log buffer descriptor
374 */
375
376struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
377 struct buffer_head *real)
378{
379 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
380 struct gfs2_log_buf *lb;
381 struct buffer_head *bh;
382
383 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_KERNEL | __GFP_NOFAIL);
384 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
385 lb->lb_real = real;
386
387 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
388 atomic_set(&bh->b_count, 1);
389 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
390 set_bh_page(bh, virt_to_page(real->b_data),
391 ((unsigned long)real->b_data) & (PAGE_SIZE - 1));
392 bh->b_blocknr = blkno;
393 bh->b_size = sdp->sd_sb.sb_bsize;
394 bh->b_bdev = sdp->sd_vfs->s_bdev;
395
396 log_incr_head(sdp);
397
398 return bh;
399}
400
401static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
402{
403 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
404
405 ail2_empty(sdp, new_tail);
406
407 gfs2_log_lock(sdp);
408 sdp->sd_log_blks_free += dist - ((pull) ? 1 : 0);
409 gfs2_assert_withdraw(sdp,
410 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
411 gfs2_log_unlock(sdp);
412
413 sdp->sd_log_tail = new_tail;
414}
415
416/**
417 * log_write_header - Get and initialize a journal header buffer
418 * @sdp: The GFS2 superblock
419 *
420 * Returns: the initialized log buffer descriptor
421 */
422
423static void log_write_header(struct gfs2_sbd *sdp, uint32_t flags, int pull)
424{
425 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
426 struct buffer_head *bh;
427 struct gfs2_log_header *lh;
428 unsigned int tail;
429 uint32_t hash;
430
431 atomic_inc(&sdp->sd_log_flush_ondisk);
432
433 bh = sb_getblk(sdp->sd_vfs, blkno);
434 lock_buffer(bh);
435 memset(bh->b_data, 0, bh->b_size);
436 set_buffer_uptodate(bh);
437 clear_buffer_dirty(bh);
438 unlock_buffer(bh);
439
440 gfs2_ail1_empty(sdp, 0);
441 tail = current_tail(sdp);
442
443 lh = (struct gfs2_log_header *)bh->b_data;
444 memset(lh, 0, sizeof(struct gfs2_log_header));
445 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
446 lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
447 lh->lh_header.mh_format = cpu_to_be16(GFS2_FORMAT_LH);
448 lh->lh_sequence = be64_to_cpu(sdp->sd_log_sequence++);
449 lh->lh_flags = be32_to_cpu(flags);
450 lh->lh_tail = be32_to_cpu(tail);
451 lh->lh_blkno = be32_to_cpu(sdp->sd_log_flush_head);
452 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
453 lh->lh_hash = cpu_to_be32(hash);
454
455 set_buffer_dirty(bh);
456 if (sync_dirty_buffer(bh))
457 gfs2_io_error_bh(sdp, bh);
458 brelse(bh);
459
460 if (sdp->sd_log_tail != tail)
461 log_pull_tail(sdp, tail, pull);
462 else
463 gfs2_assert_withdraw(sdp, !pull);
464
465 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
466 log_incr_head(sdp);
467}
468
469static void log_flush_commit(struct gfs2_sbd *sdp)
470{
471 struct list_head *head = &sdp->sd_log_flush_list;
472 struct gfs2_log_buf *lb;
473 struct buffer_head *bh;
474 unsigned int d;
475
476 d = log_distance(sdp, sdp->sd_log_flush_head, sdp->sd_log_head);
477
478 gfs2_assert_withdraw(sdp, d + 1 == sdp->sd_log_blks_reserved);
479
480 while (!list_empty(head)) {
481 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
482 list_del(&lb->lb_list);
483 bh = lb->lb_bh;
484
485 wait_on_buffer(bh);
486 if (!buffer_uptodate(bh))
487 gfs2_io_error_bh(sdp, bh);
488 if (lb->lb_real) {
489 while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
490 schedule();
491 free_buffer_head(bh);
492 } else
493 brelse(bh);
494 kfree(lb);
495 }
496
497 log_write_header(sdp, 0, 0);
498}
499
500/**
501 * gfs2_log_flush_i - flush incore transaction(s)
502 * @sdp: the filesystem
503 * @gl: The glock structure to flush. If NULL, flush the whole incore log
504 *
505 */
506
507void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
508{
509 struct gfs2_ail *ai;
510
511 atomic_inc(&sdp->sd_log_flush_incore);
512
513 ai = kzalloc(sizeof(struct gfs2_ail), GFP_KERNEL | __GFP_NOFAIL);
514 INIT_LIST_HEAD(&ai->ai_ail1_list);
515 INIT_LIST_HEAD(&ai->ai_ail2_list);
516
517 gfs2_lock_for_flush(sdp);
518 down(&sdp->sd_log_flush_lock);
519
520 gfs2_assert_withdraw(sdp,
521 sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
522 gfs2_assert_withdraw(sdp,
523 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
524
525 if (gl && list_empty(&gl->gl_le.le_list)) {
526 up(&sdp->sd_log_flush_lock);
527 gfs2_unlock_from_flush(sdp);
528 kfree(ai);
529 return;
530 }
531
532 sdp->sd_log_flush_head = sdp->sd_log_head;
533 sdp->sd_log_flush_wrapped = 0;
534 ai->ai_first = sdp->sd_log_flush_head;
535
536 lops_before_commit(sdp);
537 if (!list_empty(&sdp->sd_log_flush_list))
538 log_flush_commit(sdp);
539 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
540 log_write_header(sdp, 0, PULL);
541 lops_after_commit(sdp, ai);
542
543 sdp->sd_log_head = sdp->sd_log_flush_head;
544 if (sdp->sd_log_flush_wrapped)
545 sdp->sd_log_wraps++;
546
547 sdp->sd_log_blks_reserved =
548 sdp->sd_log_commited_buf =
549 sdp->sd_log_commited_revoke = 0;
550
551 gfs2_log_lock(sdp);
552 if (!list_empty(&ai->ai_ail1_list)) {
553 list_add(&ai->ai_list, &sdp->sd_ail1_list);
554 ai = NULL;
555 }
556 gfs2_log_unlock(sdp);
557
558 up(&sdp->sd_log_flush_lock);
559 sdp->sd_vfs->s_dirt = 0;
560 gfs2_unlock_from_flush(sdp);
561
562 kfree(ai);
563}
564
565static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
566{
567 unsigned int reserved = 1;
568 unsigned int old;
569
570 gfs2_log_lock(sdp);
571
572 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
573 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
574 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
575 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
576
577 if (sdp->sd_log_commited_buf)
578 reserved += 1 + sdp->sd_log_commited_buf + sdp->sd_log_commited_buf/503;
579 if (sdp->sd_log_commited_revoke)
580 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
581 sizeof(uint64_t));
582
583 old = sdp->sd_log_blks_free;
584 sdp->sd_log_blks_free += tr->tr_reserved -
585 (reserved - sdp->sd_log_blks_reserved);
586
587 gfs2_assert_withdraw(sdp,
588 sdp->sd_log_blks_free >= old);
589 gfs2_assert_withdraw(sdp,
590 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
591
592 sdp->sd_log_blks_reserved = reserved;
593
594 gfs2_log_unlock(sdp);
595}
596
597/**
598 * gfs2_log_commit - Commit a transaction to the log
599 * @sdp: the filesystem
600 * @tr: the transaction
601 *
602 * Returns: errno
603 */
604
605void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
606{
607 log_refund(sdp, tr);
608 lops_incore_commit(sdp, tr);
609
610 sdp->sd_vfs->s_dirt = 1;
611 unlock_from_trans(sdp);
612
613 kfree(tr);
614
615 gfs2_log_lock(sdp);
616 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
617 gfs2_log_unlock(sdp);
618 gfs2_log_flush(sdp);
619 } else
620 gfs2_log_unlock(sdp);
621}
622
623/**
624 * gfs2_log_shutdown - write a shutdown header into a journal
625 * @sdp: the filesystem
626 *
627 */
628
629void gfs2_log_shutdown(struct gfs2_sbd *sdp)
630{
631 down(&sdp->sd_log_flush_lock);
632
633 gfs2_assert_withdraw(sdp, !atomic_read(&sdp->sd_log_trans_count));
634 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
635 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
636 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
637 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
638 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
639 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
640 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
641
642 sdp->sd_log_flush_head = sdp->sd_log_head;
643 sdp->sd_log_flush_wrapped = 0;
644
645 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
646
647 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free ==
648 sdp->sd_jdesc->jd_blocks);
649 gfs2_assert_withdraw(sdp, sdp->sd_log_head == sdp->sd_log_tail);
650 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail2_list));
651
652 sdp->sd_log_head = sdp->sd_log_flush_head;
653 if (sdp->sd_log_flush_wrapped)
654 sdp->sd_log_wraps++;
655 sdp->sd_log_tail = sdp->sd_log_head;
656
657 up(&sdp->sd_log_flush_lock);
658}
659
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
new file mode 100644
index 000000000000..4413cda81154
--- /dev/null
+++ b/fs/gfs2/log.h
@@ -0,0 +1,68 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LOG_DOT_H__
11#define __LOG_DOT_H__
12
13/**
14 * gfs2_log_lock - acquire the right to mess with the log manager
15 * @sdp: the filesystem
16 *
17 */
18
19static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
20{
21 spin_lock(&sdp->sd_log_lock);
22}
23
24/**
25 * gfs2_log_unlock - release the right to mess with the log manager
26 * @sdp: the filesystem
27 *
28 */
29
30static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
31{
32 spin_unlock(&sdp->sd_log_lock);
33}
34
35static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
36 unsigned int value)
37{
38 if (++value == sdp->sd_jdesc->jd_blocks) {
39 value = 0;
40 sdp->sd_log_wraps++;
41 }
42 sdp->sd_log_head = sdp->sd_log_tail = value;
43}
44
45void gfs2_lock_for_flush(struct gfs2_sbd *sdp);
46void gfs2_unlock_from_flush(struct gfs2_sbd *sdp);
47
48unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
49 unsigned int ssize);
50
51void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
52int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
53
54int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
55void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
56
57struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
58struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59 struct buffer_head *real);
60
61#define gfs2_log_flush(sdp) gfs2_log_flush_i((sdp), NULL)
62#define gfs2_log_flush_glock(gl) gfs2_log_flush_i((gl)->gl_sbd, (gl))
63void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
64void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
65
66void gfs2_log_shutdown(struct gfs2_sbd *sdp);
67
68#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
new file mode 100644
index 000000000000..d501e8224ed0
--- /dev/null
+++ b/fs/gfs2/lops.c
@@ -0,0 +1,534 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "glock.h"
19#include "log.h"
20#include "lops.h"
21#include "meta_io.h"
22#include "recovery.h"
23#include "rgrp.h"
24#include "trans.h"
25
26static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
27{
28 struct gfs2_glock *gl;
29
30 get_transaction->tr_touched = 1;
31
32 if (!list_empty(&le->le_list))
33 return;
34
35 gl = container_of(le, struct gfs2_glock, gl_le);
36 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
37 return;
38 gfs2_glock_hold(gl);
39 set_bit(GLF_DIRTY, &gl->gl_flags);
40
41 gfs2_log_lock(sdp);
42 sdp->sd_log_num_gl++;
43 list_add(&le->le_list, &sdp->sd_log_le_gl);
44 gfs2_log_unlock(sdp);
45}
46
47static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
48{
49 struct list_head *head = &sdp->sd_log_le_gl;
50 struct gfs2_glock *gl;
51
52 while (!list_empty(head)) {
53 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
54 list_del_init(&gl->gl_le.le_list);
55 sdp->sd_log_num_gl--;
56
57 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
58 gfs2_glock_put(gl);
59 }
60 gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
61}
62
63static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
64{
65 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
66 struct gfs2_trans *tr;
67
68 if (!list_empty(&bd->bd_list_tr))
69 return;
70
71 tr = get_transaction;
72 tr->tr_touched = 1;
73 tr->tr_num_buf++;
74 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
75
76 if (!list_empty(&le->le_list))
77 return;
78
79 gfs2_trans_add_gl(bd->bd_gl);
80
81 gfs2_meta_check(sdp, bd->bd_bh);
82 gfs2_meta_pin(sdp, bd->bd_bh);
83
84 gfs2_log_lock(sdp);
85 sdp->sd_log_num_buf++;
86 list_add(&le->le_list, &sdp->sd_log_le_buf);
87 gfs2_log_unlock(sdp);
88
89 tr->tr_num_buf_new++;
90}
91
92static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
93{
94 struct list_head *head = &tr->tr_list_buf;
95 struct gfs2_bufdata *bd;
96
97 while (!list_empty(head)) {
98 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
99 list_del_init(&bd->bd_list_tr);
100 tr->tr_num_buf--;
101 }
102 gfs2_assert_warn(sdp, !tr->tr_num_buf);
103}
104
105static void buf_lo_before_commit(struct gfs2_sbd *sdp)
106{
107 struct buffer_head *bh;
108 struct gfs2_log_descriptor *ld;
109 struct gfs2_bufdata *bd1 = NULL, *bd2;
110 unsigned int total = sdp->sd_log_num_buf;
111 unsigned int offset = sizeof(struct gfs2_log_descriptor);
112 unsigned int limit;
113 unsigned int num;
114 unsigned n;
115 __be64 *ptr;
116
117 offset += (sizeof(__be64) - 1);
118 offset &= ~(sizeof(__be64) - 1);
119 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
120 /* for 4k blocks, limit = 503 */
121
122 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
123 while(total) {
124 num = total;
125 if (total > limit)
126 num = limit;
127 bh = gfs2_log_get_buf(sdp);
128 ld = (struct gfs2_log_descriptor *)bh->b_data;
129 ptr = (__be64 *)(bh->b_data + offset);
130 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
131 ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
132 ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
133 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
134 ld->ld_length = cpu_to_be32(num + 1);
135 ld->ld_data1 = cpu_to_be32(num);
136 ld->ld_data2 = cpu_to_be32(0);
137 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
138
139 n = 0;
140 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, bd_le.le_list) {
141 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
142 if (++n >= num)
143 break;
144 }
145
146 set_buffer_dirty(bh);
147 ll_rw_block(WRITE, 1, &bh);
148
149 n = 0;
150 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, bd_le.le_list) {
151 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
152 set_buffer_dirty(bh);
153 ll_rw_block(WRITE, 1, &bh);
154 if (++n >= num)
155 break;
156 }
157
158 total -= num;
159 }
160}
161
162static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
163{
164 struct list_head *head = &sdp->sd_log_le_buf;
165 struct gfs2_bufdata *bd;
166
167 while (!list_empty(head)) {
168 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
169 list_del_init(&bd->bd_le.le_list);
170 sdp->sd_log_num_buf--;
171
172 gfs2_meta_unpin(sdp, bd->bd_bh, ai);
173 }
174 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
175}
176
177static void buf_lo_before_scan(struct gfs2_jdesc *jd,
178 struct gfs2_log_header *head, int pass)
179{
180 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
181
182 if (pass != 0)
183 return;
184
185 sdp->sd_found_blocks = 0;
186 sdp->sd_replayed_blocks = 0;
187}
188
189static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
190 struct gfs2_log_descriptor *ld, __be64 *ptr,
191 int pass)
192{
193 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
194 struct gfs2_glock *gl = jd->jd_inode->i_gl;
195 unsigned int blks = be32_to_cpu(ld->ld_data1);
196 struct buffer_head *bh_log, *bh_ip;
197 uint64_t blkno;
198 int error = 0;
199
200 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
201 return 0;
202
203 gfs2_replay_incr_blk(sdp, &start);
204
205 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
206 blkno = be64_to_cpu(*ptr++);
207
208 sdp->sd_found_blocks++;
209
210 if (gfs2_revoke_check(sdp, blkno, start))
211 continue;
212
213 error = gfs2_replay_read_block(jd, start, &bh_log);
214 if (error)
215 return error;
216
217 bh_ip = gfs2_meta_new(gl, blkno);
218 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
219
220 if (gfs2_meta_check(sdp, bh_ip))
221 error = -EIO;
222 else
223 mark_buffer_dirty(bh_ip);
224
225 brelse(bh_log);
226 brelse(bh_ip);
227
228 if (error)
229 break;
230
231 sdp->sd_replayed_blocks++;
232 }
233
234 return error;
235}
236
237static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
238{
239 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
240
241 if (error) {
242 gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
243 return;
244 }
245 if (pass != 1)
246 return;
247
248 gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
249
250 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
251 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
252}
253
254static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
255{
256 struct gfs2_trans *tr;
257
258 tr = get_transaction;
259 tr->tr_touched = 1;
260 tr->tr_num_revoke++;
261
262 gfs2_log_lock(sdp);
263 sdp->sd_log_num_revoke++;
264 list_add(&le->le_list, &sdp->sd_log_le_revoke);
265 gfs2_log_unlock(sdp);
266}
267
268static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
269{
270 struct gfs2_log_descriptor *ld;
271 struct gfs2_meta_header *mh;
272 struct buffer_head *bh;
273 unsigned int offset;
274 struct list_head *head = &sdp->sd_log_le_revoke;
275 struct gfs2_revoke *rv;
276
277 if (!sdp->sd_log_num_revoke)
278 return;
279
280 bh = gfs2_log_get_buf(sdp);
281 ld = (struct gfs2_log_descriptor *)bh->b_data;
282 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
283 ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
284 ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
285 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
286 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(uint64_t)));
287 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
288 ld->ld_data2 = cpu_to_be32(0);
289 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
290 offset = sizeof(struct gfs2_log_descriptor);
291
292 while (!list_empty(head)) {
293 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
294 list_del(&rv->rv_le.le_list);
295 sdp->sd_log_num_revoke--;
296
297 if (offset + sizeof(uint64_t) > sdp->sd_sb.sb_bsize) {
298 set_buffer_dirty(bh);
299 ll_rw_block(WRITE, 1, &bh);
300
301 bh = gfs2_log_get_buf(sdp);
302 mh = (struct gfs2_meta_header *)bh->b_data;
303 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
304 mh->mh_type = cpu_to_be16(GFS2_METATYPE_LB);
305 mh->mh_format = cpu_to_be16(GFS2_FORMAT_LB);
306 offset = sizeof(struct gfs2_meta_header);
307 }
308
309 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
310 kfree(rv);
311
312 offset += sizeof(uint64_t);
313 }
314 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
315
316 set_buffer_dirty(bh);
317 ll_rw_block(WRITE, 1, &bh);
318}
319
320static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
321 struct gfs2_log_header *head, int pass)
322{
323 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
324
325 if (pass != 0)
326 return;
327
328 sdp->sd_found_revokes = 0;
329 sdp->sd_replay_tail = head->lh_tail;
330}
331
332static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
333 struct gfs2_log_descriptor *ld, __be64 *ptr,
334 int pass)
335{
336 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
337 unsigned int blks = be32_to_cpu(ld->ld_length);
338 unsigned int revokes = be32_to_cpu(ld->ld_data1);
339 struct buffer_head *bh;
340 unsigned int offset;
341 uint64_t blkno;
342 int first = 1;
343 int error;
344
345 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
346 return 0;
347
348 offset = sizeof(struct gfs2_log_descriptor);
349
350 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
351 error = gfs2_replay_read_block(jd, start, &bh);
352 if (error)
353 return error;
354
355 if (!first)
356 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
357
358 while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
359 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
360
361 error = gfs2_revoke_add(sdp, blkno, start);
362 if (error < 0)
363 return error;
364 else if (error)
365 sdp->sd_found_revokes++;
366
367 if (!--revokes)
368 break;
369 offset += sizeof(uint64_t);
370 }
371
372 brelse(bh);
373 offset = sizeof(struct gfs2_meta_header);
374 first = 0;
375 }
376
377 return 0;
378}
379
380static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
381{
382 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
383
384 if (error) {
385 gfs2_revoke_clean(sdp);
386 return;
387 }
388 if (pass != 1)
389 return;
390
391 fs_info(sdp, "jid=%u: Found %u revoke tags\n",
392 jd->jd_jid, sdp->sd_found_revokes);
393
394 gfs2_revoke_clean(sdp);
395}
396
397static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
398{
399 struct gfs2_rgrpd *rgd;
400
401 get_transaction->tr_touched = 1;
402
403 if (!list_empty(&le->le_list))
404 return;
405
406 rgd = container_of(le, struct gfs2_rgrpd, rd_le);
407 gfs2_rgrp_bh_hold(rgd);
408
409 gfs2_log_lock(sdp);
410 sdp->sd_log_num_rg++;
411 list_add(&le->le_list, &sdp->sd_log_le_rg);
412 gfs2_log_unlock(sdp);
413}
414
415static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
416{
417 struct list_head *head = &sdp->sd_log_le_rg;
418 struct gfs2_rgrpd *rgd;
419
420 while (!list_empty(head)) {
421 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
422 list_del_init(&rgd->rd_le.le_list);
423 sdp->sd_log_num_rg--;
424
425 gfs2_rgrp_repolish_clones(rgd);
426 gfs2_rgrp_bh_put(rgd);
427 }
428 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
429}
430
431static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
432{
433 get_transaction->tr_touched = 1;
434
435 gfs2_log_lock(sdp);
436 sdp->sd_log_num_databuf++;
437 list_add(&le->le_list, &sdp->sd_log_le_databuf);
438 gfs2_log_unlock(sdp);
439}
440
441static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
442{
443 struct list_head *head = &sdp->sd_log_le_databuf;
444 LIST_HEAD(started);
445 struct gfs2_databuf *db;
446 struct buffer_head *bh;
447
448 while (!list_empty(head)) {
449 db = list_entry(head->prev, struct gfs2_databuf, db_le.le_list);
450 list_move(&db->db_le.le_list, &started);
451
452 gfs2_log_lock(sdp);
453 bh = db->db_bh;
454 if (bh) {
455 get_bh(bh);
456 gfs2_log_unlock(sdp);
457 if (buffer_dirty(bh)) {
458 wait_on_buffer(bh);
459 ll_rw_block(WRITE, 1, &bh);
460 }
461 brelse(bh);
462 } else
463 gfs2_log_unlock(sdp);
464 }
465
466 while (!list_empty(&started)) {
467 db = list_entry(started.next, struct gfs2_databuf,
468 db_le.le_list);
469 list_del(&db->db_le.le_list);
470 sdp->sd_log_num_databuf--;
471
472 gfs2_log_lock(sdp);
473 bh = db->db_bh;
474 if (bh) {
475 set_v2db(bh, NULL);
476 gfs2_log_unlock(sdp);
477 wait_on_buffer(bh);
478 brelse(bh);
479 } else
480 gfs2_log_unlock(sdp);
481
482 kfree(db);
483 }
484
485 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
486}
487
488struct gfs2_log_operations gfs2_glock_lops = {
489 .lo_add = glock_lo_add,
490 .lo_after_commit = glock_lo_after_commit,
491 .lo_name = "glock"
492};
493
494struct gfs2_log_operations gfs2_buf_lops = {
495 .lo_add = buf_lo_add,
496 .lo_incore_commit = buf_lo_incore_commit,
497 .lo_before_commit = buf_lo_before_commit,
498 .lo_after_commit = buf_lo_after_commit,
499 .lo_before_scan = buf_lo_before_scan,
500 .lo_scan_elements = buf_lo_scan_elements,
501 .lo_after_scan = buf_lo_after_scan,
502 .lo_name = "buf"
503};
504
505struct gfs2_log_operations gfs2_revoke_lops = {
506 .lo_add = revoke_lo_add,
507 .lo_before_commit = revoke_lo_before_commit,
508 .lo_before_scan = revoke_lo_before_scan,
509 .lo_scan_elements = revoke_lo_scan_elements,
510 .lo_after_scan = revoke_lo_after_scan,
511 .lo_name = "revoke"
512};
513
514struct gfs2_log_operations gfs2_rg_lops = {
515 .lo_add = rg_lo_add,
516 .lo_after_commit = rg_lo_after_commit,
517 .lo_name = "rg"
518};
519
520struct gfs2_log_operations gfs2_databuf_lops = {
521 .lo_add = databuf_lo_add,
522 .lo_before_commit = databuf_lo_before_commit,
523 .lo_name = "databuf"
524};
525
526struct gfs2_log_operations *gfs2_log_ops[] = {
527 &gfs2_glock_lops,
528 &gfs2_buf_lops,
529 &gfs2_revoke_lops,
530 &gfs2_rg_lops,
531 &gfs2_databuf_lops,
532 NULL
533};
534
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
new file mode 100644
index 000000000000..417f5aade4b1
--- /dev/null
+++ b/fs/gfs2/lops.h
@@ -0,0 +1,96 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LOPS_DOT_H__
11#define __LOPS_DOT_H__
12
13extern struct gfs2_log_operations gfs2_glock_lops;
14extern struct gfs2_log_operations gfs2_buf_lops;
15extern struct gfs2_log_operations gfs2_revoke_lops;
16extern struct gfs2_log_operations gfs2_rg_lops;
17extern struct gfs2_log_operations gfs2_databuf_lops;
18
19extern struct gfs2_log_operations *gfs2_log_ops[];
20
21static inline void lops_init_le(struct gfs2_log_element *le,
22 struct gfs2_log_operations *lops)
23{
24 INIT_LIST_HEAD(&le->le_list);
25 le->le_ops = lops;
26}
27
28static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
29{
30 if (le->le_ops->lo_add)
31 le->le_ops->lo_add(sdp, le);
32}
33
34static inline void lops_incore_commit(struct gfs2_sbd *sdp,
35 struct gfs2_trans *tr)
36{
37 int x;
38 for (x = 0; gfs2_log_ops[x]; x++)
39 if (gfs2_log_ops[x]->lo_incore_commit)
40 gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
41}
42
43static inline void lops_before_commit(struct gfs2_sbd *sdp)
44{
45 int x;
46 for (x = 0; gfs2_log_ops[x]; x++)
47 if (gfs2_log_ops[x]->lo_before_commit)
48 gfs2_log_ops[x]->lo_before_commit(sdp);
49}
50
51static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
52{
53 int x;
54 for (x = 0; gfs2_log_ops[x]; x++)
55 if (gfs2_log_ops[x]->lo_after_commit)
56 gfs2_log_ops[x]->lo_after_commit(sdp, ai);
57}
58
59static inline void lops_before_scan(struct gfs2_jdesc *jd,
60 struct gfs2_log_header *head,
61 unsigned int pass)
62{
63 int x;
64 for (x = 0; gfs2_log_ops[x]; x++)
65 if (gfs2_log_ops[x]->lo_before_scan)
66 gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
67}
68
69static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
70 struct gfs2_log_descriptor *ld,
71 __be64 *ptr,
72 unsigned int pass)
73{
74 int x, error;
75 for (x = 0; gfs2_log_ops[x]; x++)
76 if (gfs2_log_ops[x]->lo_scan_elements) {
77 error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
78 ld, ptr, pass);
79 if (error)
80 return error;
81 }
82
83 return 0;
84}
85
86static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
87 unsigned int pass)
88{
89 int x;
90 for (x = 0; gfs2_log_ops[x]; x++)
91 if (gfs2_log_ops[x]->lo_before_scan)
92 gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
93}
94
95#endif /* __LOPS_DOT_H__ */
96
diff --git a/fs/gfs2/lvb.c b/fs/gfs2/lvb.c
new file mode 100644
index 000000000000..8af62568a3f4
--- /dev/null
+++ b/fs/gfs2/lvb.c
@@ -0,0 +1,48 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18
19#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member);
20
21void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb)
22{
23 struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
24
25 qb->qb_magic = be32_to_cpu(str->qb_magic);
26 qb->qb_limit = be64_to_cpu(str->qb_limit);
27 qb->qb_warn = be64_to_cpu(str->qb_warn);
28 qb->qb_value = be64_to_cpu(str->qb_value);
29}
30
31void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb)
32{
33 struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
34
35 str->qb_magic = cpu_to_be32(qb->qb_magic);
36 str->qb_limit = cpu_to_be64(qb->qb_limit);
37 str->qb_warn = cpu_to_be64(qb->qb_warn);
38 str->qb_value = cpu_to_be64(qb->qb_value);
39}
40
41void gfs2_quota_lvb_print(struct gfs2_quota_lvb *qb)
42{
43 pv(qb, qb_magic, "%u");
44 pv(qb, qb_limit, "%llu");
45 pv(qb, qb_warn, "%llu");
46 pv(qb, qb_value, "%lld");
47}
48
diff --git a/fs/gfs2/lvb.h b/fs/gfs2/lvb.h
new file mode 100644
index 000000000000..ca9732b2d9f4
--- /dev/null
+++ b/fs/gfs2/lvb.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LVB_DOT_H__
11#define __LVB_DOT_H__
12
13#define GFS2_MIN_LVB_SIZE 32
14
15struct gfs2_quota_lvb {
16 uint32_t qb_magic;
17 uint32_t __pad;
18 uint64_t qb_limit; /* Hard limit of # blocks to alloc */
19 uint64_t qb_warn; /* Warn user when alloc is above this # */
20 int64_t qb_value; /* Current # blocks allocated */
21};
22
23void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb);
24void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb);
25void gfs2_quota_lvb_print(struct gfs2_quota_lvb *qb);
26
27#endif /* __LVB_DOT_H__ */
28
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
new file mode 100644
index 000000000000..0c60f2b10fdd
--- /dev/null
+++ b/fs/gfs2/main.c
@@ -0,0 +1,103 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "ops_fstype.h"
21#include "sys.h"
22
23/**
24 * init_gfs2_fs - Register GFS2 as a filesystem
25 *
26 * Returns: 0 on success, error code on failure
27 */
28
29static int __init init_gfs2_fs(void)
30{
31 int error;
32
33 gfs2_init_lmh();
34
35 error = gfs2_sys_init();
36 if (error)
37 return error;
38
39 error = -ENOMEM;
40
41 gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
42 sizeof(struct gfs2_glock),
43 0, 0, NULL, NULL);
44 if (!gfs2_glock_cachep)
45 goto fail;
46
47 gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
48 sizeof(struct gfs2_inode),
49 0, 0, NULL, NULL);
50 if (!gfs2_inode_cachep)
51 goto fail;
52
53 gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
54 sizeof(struct gfs2_bufdata),
55 0, 0, NULL, NULL);
56 if (!gfs2_bufdata_cachep)
57 goto fail;
58
59 error = register_filesystem(&gfs2_fs_type);
60 if (error)
61 goto fail;
62
63 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
64
65 return 0;
66
67 fail:
68 if (gfs2_bufdata_cachep)
69 kmem_cache_destroy(gfs2_bufdata_cachep);
70
71 if (gfs2_inode_cachep)
72 kmem_cache_destroy(gfs2_inode_cachep);
73
74 if (gfs2_glock_cachep)
75 kmem_cache_destroy(gfs2_glock_cachep);
76
77 gfs2_sys_uninit();
78 return error;
79}
80
81/**
82 * exit_gfs2_fs - Unregister the file system
83 *
84 */
85
86static void __exit exit_gfs2_fs(void)
87{
88 unregister_filesystem(&gfs2_fs_type);
89
90 kmem_cache_destroy(gfs2_bufdata_cachep);
91 kmem_cache_destroy(gfs2_inode_cachep);
92 kmem_cache_destroy(gfs2_glock_cachep);
93
94 gfs2_sys_uninit();
95}
96
97MODULE_DESCRIPTION("Global File System");
98MODULE_AUTHOR("Red Hat, Inc.");
99MODULE_LICENSE("GPL");
100
101module_init(init_gfs2_fs);
102module_exit(exit_gfs2_fs);
103
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
new file mode 100644
index 000000000000..177b0246d194
--- /dev/null
+++ b/fs/gfs2/meta_io.c
@@ -0,0 +1,876 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/writeback.h>
18#include <linux/swap.h>
19#include <linux/delay.h>
20#include <asm/semaphore.h>
21
22#include "gfs2.h"
23#include "glock.h"
24#include "glops.h"
25#include "inode.h"
26#include "log.h"
27#include "lops.h"
28#include "meta_io.h"
29#include "rgrp.h"
30#include "trans.h"
31
32#define buffer_busy(bh) \
33((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
34#define buffer_in_io(bh) \
35((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
36
37static int aspace_get_block(struct inode *inode, sector_t lblock,
38 struct buffer_head *bh_result, int create)
39{
40 gfs2_assert_warn(get_v2sdp(inode->i_sb), 0);
41 return -EOPNOTSUPP;
42}
43
44static int gfs2_aspace_writepage(struct page *page,
45 struct writeback_control *wbc)
46{
47 return block_write_full_page(page, aspace_get_block, wbc);
48}
49
50/**
51 * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
52 * @bh: the buffer we're stuck on
53 *
54 */
55
56static void stuck_releasepage(struct buffer_head *bh)
57{
58 struct gfs2_sbd *sdp = get_v2sdp(bh->b_page->mapping->host->i_sb);
59 struct gfs2_bufdata *bd = get_v2bd(bh);
60 struct gfs2_glock *gl;
61
62 fs_warn(sdp, "stuck in gfs2_releasepage()\n");
63 fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
64 (uint64_t)bh->b_blocknr, atomic_read(&bh->b_count));
65 fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
66 fs_warn(sdp, "get_v2bd(bh) = %s\n", (bd) ? "!NULL" : "NULL");
67
68 if (!bd)
69 return;
70
71 gl = bd->bd_gl;
72
73 fs_warn(sdp, "gl = (%u, %llu)\n",
74 gl->gl_name.ln_type, gl->gl_name.ln_number);
75
76 fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
77 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
78 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
79
80 if (gl->gl_ops == &gfs2_inode_glops) {
81 struct gfs2_inode *ip = get_gl2ip(gl);
82 unsigned int x;
83
84 if (!ip)
85 return;
86
87 fs_warn(sdp, "ip = %llu %llu\n",
88 ip->i_num.no_formal_ino, ip->i_num.no_addr);
89 fs_warn(sdp, "ip->i_count = %d, ip->i_vnode = %s\n",
90 atomic_read(&ip->i_count),
91 (ip->i_vnode) ? "!NULL" : "NULL");
92
93 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
94 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
95 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
96 }
97}
98
99/**
100 * gfs2_aspace_releasepage - free the metadata associated with a page
101 * @page: the page that's being released
102 * @gfp_mask: passed from Linux VFS, ignored by us
103 *
104 * Call try_to_free_buffers() if the buffers in this page can be
105 * released.
106 *
107 * Returns: 0
108 */
109
110static int gfs2_aspace_releasepage(struct page *page, gfp_t gfp_mask)
111{
112 struct inode *aspace = page->mapping->host;
113 struct gfs2_sbd *sdp = get_v2sdp(aspace->i_sb);
114 struct buffer_head *bh, *head;
115 struct gfs2_bufdata *bd;
116 unsigned long t;
117
118 if (!page_has_buffers(page))
119 goto out;
120
121 head = bh = page_buffers(page);
122 do {
123 t = jiffies;
124
125 while (atomic_read(&bh->b_count)) {
126 if (atomic_read(&aspace->i_writecount)) {
127 if (time_after_eq(jiffies, t +
128 gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
129 stuck_releasepage(bh);
130 t = jiffies;
131 }
132
133 yield();
134 continue;
135 }
136
137 return 0;
138 }
139
140 gfs2_assert_warn(sdp, !buffer_pinned(bh));
141
142 bd = get_v2bd(bh);
143 if (bd) {
144 gfs2_assert_warn(sdp, bd->bd_bh == bh);
145 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
146 gfs2_assert_warn(sdp, list_empty(&bd->bd_le.le_list));
147 gfs2_assert_warn(sdp, !bd->bd_ail);
148 kmem_cache_free(gfs2_bufdata_cachep, bd);
149 atomic_dec(&sdp->sd_bufdata_count);
150 set_v2bd(bh, NULL);
151 }
152
153 bh = bh->b_this_page;
154 }
155 while (bh != head);
156
157 out:
158 return try_to_free_buffers(page);
159}
160
161static struct address_space_operations aspace_aops = {
162 .writepage = gfs2_aspace_writepage,
163 .releasepage = gfs2_aspace_releasepage,
164};
165
166/**
167 * gfs2_aspace_get - Create and initialize a struct inode structure
168 * @sdp: the filesystem the aspace is in
169 *
170 * Right now a struct inode is just a struct inode. Maybe Linux
171 * will supply a more lightweight address space construct (that works)
172 * in the future.
173 *
174 * Make sure pages/buffers in this aspace aren't in high memory.
175 *
176 * Returns: the aspace
177 */
178
179struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
180{
181 struct inode *aspace;
182
183 aspace = new_inode(sdp->sd_vfs);
184 if (aspace) {
185 mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL);
186 aspace->i_mapping->a_ops = &aspace_aops;
187 aspace->i_size = ~0ULL;
188 set_v2ip(aspace, NULL);
189 insert_inode_hash(aspace);
190 }
191
192 return aspace;
193}
194
195void gfs2_aspace_put(struct inode *aspace)
196{
197 remove_inode_hash(aspace);
198 iput(aspace);
199}
200
201/**
202 * gfs2_ail1_start_one - Start I/O on a part of the AIL
203 * @sdp: the filesystem
204 * @tr: the part of the AIL
205 *
206 */
207
208void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
209{
210 struct gfs2_bufdata *bd, *s;
211 struct buffer_head *bh;
212 int retry;
213
214 do {
215 retry = 0;
216
217 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
218 bd_ail_st_list) {
219 bh = bd->bd_bh;
220
221 gfs2_assert(sdp, bd->bd_ail == ai);
222
223 if (!buffer_busy(bh)) {
224 if (!buffer_uptodate(bh))
225 gfs2_io_error_bh(sdp, bh);
226 list_move(&bd->bd_ail_st_list,
227 &ai->ai_ail2_list);
228 continue;
229 }
230
231 if (!buffer_dirty(bh))
232 continue;
233
234 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
235
236 gfs2_log_unlock(sdp);
237 wait_on_buffer(bh);
238 ll_rw_block(WRITE, 1, &bh);
239 gfs2_log_lock(sdp);
240
241 retry = 1;
242 break;
243 }
244 } while (retry);
245}
246
247/**
248 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
249 * @sdp: the filesystem
250 * @ai: the AIL entry
251 *
252 */
253
254int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
255{
256 struct gfs2_bufdata *bd, *s;
257 struct buffer_head *bh;
258
259 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
260 bd_ail_st_list) {
261 bh = bd->bd_bh;
262
263 gfs2_assert(sdp, bd->bd_ail == ai);
264
265 if (buffer_busy(bh)) {
266 if (flags & DIO_ALL)
267 continue;
268 else
269 break;
270 }
271
272 if (!buffer_uptodate(bh))
273 gfs2_io_error_bh(sdp, bh);
274
275 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
276 }
277
278 return list_empty(&ai->ai_ail1_list);
279}
280
281/**
282 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
283 * @sdp: the filesystem
284 * @ai: the AIL entry
285 *
286 */
287
288void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
289{
290 struct list_head *head = &ai->ai_ail2_list;
291 struct gfs2_bufdata *bd;
292
293 while (!list_empty(head)) {
294 bd = list_entry(head->prev, struct gfs2_bufdata,
295 bd_ail_st_list);
296 gfs2_assert(sdp, bd->bd_ail == ai);
297 bd->bd_ail = NULL;
298 list_del(&bd->bd_ail_st_list);
299 list_del(&bd->bd_ail_gl_list);
300 atomic_dec(&bd->bd_gl->gl_ail_count);
301 brelse(bd->bd_bh);
302 }
303}
304
305/**
306 * ail_empty_gl - remove all buffers for a given lock from the AIL
307 * @gl: the glock
308 *
309 * None of the buffers should be dirty, locked, or pinned.
310 */
311
312void gfs2_ail_empty_gl(struct gfs2_glock *gl)
313{
314 struct gfs2_sbd *sdp = gl->gl_sbd;
315 unsigned int blocks;
316 struct list_head *head = &gl->gl_ail_list;
317 struct gfs2_bufdata *bd;
318 struct buffer_head *bh;
319 uint64_t blkno;
320 int error;
321
322 blocks = atomic_read(&gl->gl_ail_count);
323 if (!blocks)
324 return;
325
326 error = gfs2_trans_begin(sdp, 0, blocks);
327 if (gfs2_assert_withdraw(sdp, !error))
328 return;
329
330 gfs2_log_lock(sdp);
331 while (!list_empty(head)) {
332 bd = list_entry(head->next, struct gfs2_bufdata,
333 bd_ail_gl_list);
334 bh = bd->bd_bh;
335 blkno = bh->b_blocknr;
336 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
337
338 bd->bd_ail = NULL;
339 list_del(&bd->bd_ail_st_list);
340 list_del(&bd->bd_ail_gl_list);
341 atomic_dec(&gl->gl_ail_count);
342 brelse(bh);
343 gfs2_log_unlock(sdp);
344
345 gfs2_trans_add_revoke(sdp, blkno);
346
347 gfs2_log_lock(sdp);
348 }
349 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
350 gfs2_log_unlock(sdp);
351
352 gfs2_trans_end(sdp);
353 gfs2_log_flush(sdp);
354}
355
356/**
357 * gfs2_meta_inval - Invalidate all buffers associated with a glock
358 * @gl: the glock
359 *
360 */
361
362void gfs2_meta_inval(struct gfs2_glock *gl)
363{
364 struct gfs2_sbd *sdp = gl->gl_sbd;
365 struct inode *aspace = gl->gl_aspace;
366 struct address_space *mapping = gl->gl_aspace->i_mapping;
367
368 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
369
370 atomic_inc(&aspace->i_writecount);
371 truncate_inode_pages(mapping, 0);
372 atomic_dec(&aspace->i_writecount);
373
374 gfs2_assert_withdraw(sdp, !mapping->nrpages);
375}
376
377/**
378 * gfs2_meta_sync - Sync all buffers associated with a glock
379 * @gl: The glock
380 * @flags: DIO_START | DIO_WAIT
381 *
382 */
383
384void gfs2_meta_sync(struct gfs2_glock *gl, int flags)
385{
386 struct address_space *mapping = gl->gl_aspace->i_mapping;
387 int error = 0;
388
389 if (flags & DIO_START)
390 filemap_fdatawrite(mapping);
391 if (!error && (flags & DIO_WAIT))
392 error = filemap_fdatawait(mapping);
393
394 if (error)
395 gfs2_io_error(gl->gl_sbd);
396}
397
398/**
399 * getbuf - Get a buffer with a given address space
400 * @sdp: the filesystem
401 * @aspace: the address space
402 * @blkno: the block number (filesystem scope)
403 * @create: 1 if the buffer should be created
404 *
405 * Returns: the buffer
406 */
407
408static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
409 uint64_t blkno, int create)
410{
411 struct page *page;
412 struct buffer_head *bh;
413 unsigned int shift;
414 unsigned long index;
415 unsigned int bufnum;
416
417 shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
418 index = blkno >> shift; /* convert block to page */
419 bufnum = blkno - (index << shift); /* block buf index within page */
420
421 if (create) {
422 for (;;) {
423 page = grab_cache_page(aspace->i_mapping, index);
424 if (page)
425 break;
426 yield();
427 }
428 } else {
429 page = find_lock_page(aspace->i_mapping, index);
430 if (!page)
431 return NULL;
432 }
433
434 if (!page_has_buffers(page))
435 create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
436
437 /* Locate header for our buffer within our page */
438 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
439 /* Do nothing */;
440 get_bh(bh);
441
442 if (!buffer_mapped(bh))
443 map_bh(bh, sdp->sd_vfs, blkno);
444
445 unlock_page(page);
446 mark_page_accessed(page);
447 page_cache_release(page);
448
449 return bh;
450}
451
452static void meta_prep_new(struct buffer_head *bh)
453{
454 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
455
456 lock_buffer(bh);
457 clear_buffer_dirty(bh);
458 set_buffer_uptodate(bh);
459 unlock_buffer(bh);
460
461 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
462}
463
464/**
465 * gfs2_meta_new - Get a block
466 * @gl: The glock associated with this block
467 * @blkno: The block number
468 *
469 * Returns: The buffer
470 */
471
472struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno)
473{
474 struct buffer_head *bh;
475 bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
476 meta_prep_new(bh);
477 return bh;
478}
479
480/**
481 * gfs2_meta_read - Read a block from disk
482 * @gl: The glock covering the block
483 * @blkno: The block number
484 * @flags: flags to gfs2_dreread()
485 * @bhp: the place where the buffer is returned (NULL on failure)
486 *
487 * Returns: errno
488 */
489
490int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, int flags,
491 struct buffer_head **bhp)
492{
493 int error;
494
495 *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
496 error = gfs2_meta_reread(gl->gl_sbd, *bhp, flags);
497 if (error)
498 brelse(*bhp);
499
500 return error;
501}
502
503/**
504 * gfs2_meta_reread - Reread a block from disk
505 * @sdp: the filesystem
506 * @bh: The block to read
507 * @flags: Flags that control the read
508 *
509 * Returns: errno
510 */
511
512int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags)
513{
514 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
515 return -EIO;
516
517 if (flags & DIO_FORCE)
518 clear_buffer_uptodate(bh);
519
520 if ((flags & DIO_START) && !buffer_uptodate(bh))
521 ll_rw_block(READ, 1, &bh);
522
523 if (flags & DIO_WAIT) {
524 wait_on_buffer(bh);
525
526 if (!buffer_uptodate(bh)) {
527 struct gfs2_trans *tr = get_transaction;
528 if (tr && tr->tr_touched)
529 gfs2_io_error_bh(sdp, bh);
530 return -EIO;
531 }
532 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
533 return -EIO;
534 }
535
536 return 0;
537}
538
539/**
540 * gfs2_meta_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
541 * @gl: the glock the buffer belongs to
542 * @bh: The buffer to be attached to
543 *
544 */
545
546void gfs2_meta_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh)
547{
548 struct gfs2_bufdata *bd;
549
550 lock_page(bh->b_page);
551
552 if (get_v2bd(bh)) {
553 unlock_page(bh->b_page);
554 return;
555 }
556
557 bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_KERNEL | __GFP_NOFAIL),
558 atomic_inc(&gl->gl_sbd->sd_bufdata_count);
559
560 memset(bd, 0, sizeof(struct gfs2_bufdata));
561
562 bd->bd_bh = bh;
563 bd->bd_gl = gl;
564
565 INIT_LIST_HEAD(&bd->bd_list_tr);
566 lops_init_le(&bd->bd_le, &gfs2_buf_lops);
567
568 set_v2bd(bh, bd);
569
570 unlock_page(bh->b_page);
571}
572
573/**
574 * gfs2_meta_pin - Pin a metadata buffer in memory
575 * @sdp: the filesystem the buffer belongs to
576 * @bh: The buffer to be pinned
577 *
578 */
579
580void gfs2_meta_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
581{
582 struct gfs2_bufdata *bd = get_v2bd(bh);
583
584 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
585
586 if (test_set_buffer_pinned(bh))
587 gfs2_assert_withdraw(sdp, 0);
588
589 wait_on_buffer(bh);
590
591 /* If this buffer is in the AIL and it has already been written
592 to in-place disk block, remove it from the AIL. */
593
594 gfs2_log_lock(sdp);
595 if (bd->bd_ail && !buffer_in_io(bh))
596 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
597 gfs2_log_unlock(sdp);
598
599 clear_buffer_dirty(bh);
600 wait_on_buffer(bh);
601
602 if (!buffer_uptodate(bh))
603 gfs2_io_error_bh(sdp, bh);
604
605 get_bh(bh);
606}
607
608/**
609 * gfs2_meta_unpin - Unpin a buffer
610 * @sdp: the filesystem the buffer belongs to
611 * @bh: The buffer to unpin
612 * @ai:
613 *
614 */
615
616void gfs2_meta_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
617 struct gfs2_ail *ai)
618{
619 struct gfs2_bufdata *bd = get_v2bd(bh);
620
621 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
622
623 if (!buffer_pinned(bh))
624 gfs2_assert_withdraw(sdp, 0);
625
626 mark_buffer_dirty(bh);
627 clear_buffer_pinned(bh);
628
629 gfs2_log_lock(sdp);
630 if (bd->bd_ail) {
631 list_del(&bd->bd_ail_st_list);
632 brelse(bh);
633 } else {
634 struct gfs2_glock *gl = bd->bd_gl;
635 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
636 atomic_inc(&gl->gl_ail_count);
637 }
638 bd->bd_ail = ai;
639 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
640 gfs2_log_unlock(sdp);
641}
642
643/**
644 * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
645 * @ip: the inode who owns the buffers
646 * @bstart: the first buffer in the run
647 * @blen: the number of buffers in the run
648 *
649 */
650
651void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
652{
653 struct gfs2_sbd *sdp = ip->i_sbd;
654 struct inode *aspace = ip->i_gl->gl_aspace;
655 struct buffer_head *bh;
656
657 while (blen) {
658 bh = getbuf(sdp, aspace, bstart, NO_CREATE);
659 if (bh) {
660 struct gfs2_bufdata *bd = get_v2bd(bh);
661
662 if (test_clear_buffer_pinned(bh)) {
663 gfs2_log_lock(sdp);
664 list_del_init(&bd->bd_le.le_list);
665 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
666 sdp->sd_log_num_buf--;
667 gfs2_log_unlock(sdp);
668 get_transaction->tr_num_buf_rm++;
669 brelse(bh);
670 }
671 if (bd) {
672 gfs2_log_lock(sdp);
673 if (bd->bd_ail) {
674 uint64_t blkno = bh->b_blocknr;
675 bd->bd_ail = NULL;
676 list_del(&bd->bd_ail_st_list);
677 list_del(&bd->bd_ail_gl_list);
678 atomic_dec(&bd->bd_gl->gl_ail_count);
679 brelse(bh);
680 gfs2_log_unlock(sdp);
681 gfs2_trans_add_revoke(sdp, blkno);
682 } else
683 gfs2_log_unlock(sdp);
684 }
685
686 lock_buffer(bh);
687 clear_buffer_dirty(bh);
688 clear_buffer_uptodate(bh);
689 unlock_buffer(bh);
690
691 brelse(bh);
692 }
693
694 bstart++;
695 blen--;
696 }
697}
698
699/**
700 * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
701 * @ip: The GFS2 inode
702 *
703 * This releases buffers that are in the most-recently-used array of
704 * blocks used for indirect block addressing for this inode.
705 */
706
707void gfs2_meta_cache_flush(struct gfs2_inode *ip)
708{
709 struct buffer_head **bh_slot;
710 unsigned int x;
711
712 spin_lock(&ip->i_spin);
713
714 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
715 bh_slot = &ip->i_cache[x];
716 if (!*bh_slot)
717 break;
718 brelse(*bh_slot);
719 *bh_slot = NULL;
720 }
721
722 spin_unlock(&ip->i_spin);
723}
724
725/**
726 * gfs2_meta_indirect_buffer - Get a metadata buffer
727 * @ip: The GFS2 inode
728 * @height: The level of this buf in the metadata (indir addr) tree (if any)
729 * @num: The block number (device relative) of the buffer
730 * @new: Non-zero if we may create a new buffer
731 * @bhp: the buffer is returned here
732 *
733 * Try to use the gfs2_inode's MRU metadata tree cache.
734 *
735 * Returns: errno
736 */
737
738int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
739 int new, struct buffer_head **bhp)
740{
741 struct buffer_head *bh, **bh_slot = ip->i_cache + height;
742 int error;
743
744 spin_lock(&ip->i_spin);
745 bh = *bh_slot;
746 if (bh) {
747 if (bh->b_blocknr == num)
748 get_bh(bh);
749 else
750 bh = NULL;
751 }
752 spin_unlock(&ip->i_spin);
753
754 if (bh) {
755 if (new)
756 meta_prep_new(bh);
757 else {
758 error = gfs2_meta_reread(ip->i_sbd, bh,
759 DIO_START | DIO_WAIT);
760 if (error) {
761 brelse(bh);
762 return error;
763 }
764 }
765 } else {
766 if (new)
767 bh = gfs2_meta_new(ip->i_gl, num);
768 else {
769 error = gfs2_meta_read(ip->i_gl, num,
770 DIO_START | DIO_WAIT, &bh);
771 if (error)
772 return error;
773 }
774
775 spin_lock(&ip->i_spin);
776 if (*bh_slot != bh) {
777 brelse(*bh_slot);
778 *bh_slot = bh;
779 get_bh(bh);
780 }
781 spin_unlock(&ip->i_spin);
782 }
783
784 if (new) {
785 if (gfs2_assert_warn(ip->i_sbd, height)) {
786 brelse(bh);
787 return -EIO;
788 }
789 gfs2_trans_add_bh(ip->i_gl, bh);
790 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
791 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
792
793 } else if (gfs2_metatype_check(ip->i_sbd, bh,
794 (height) ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)) {
795 brelse(bh);
796 return -EIO;
797 }
798
799 *bhp = bh;
800
801 return 0;
802}
803
804/**
805 * gfs2_meta_ra - start readahead on an extent of a file
806 * @gl: the glock the blocks belong to
807 * @dblock: the starting disk block
808 * @extlen: the number of blocks in the extent
809 *
810 */
811
812void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen)
813{
814 struct gfs2_sbd *sdp = gl->gl_sbd;
815 struct inode *aspace = gl->gl_aspace;
816 struct buffer_head *first_bh, *bh;
817 uint32_t max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> sdp->sd_sb.sb_bsize_shift;
818 int error;
819
820 if (!extlen || !max_ra)
821 return;
822 if (extlen > max_ra)
823 extlen = max_ra;
824
825 first_bh = getbuf(sdp, aspace, dblock, CREATE);
826
827 if (buffer_uptodate(first_bh))
828 goto out;
829 if (!buffer_locked(first_bh)) {
830 error = gfs2_meta_reread(sdp, first_bh, DIO_START);
831 if (error)
832 goto out;
833 }
834
835 dblock++;
836 extlen--;
837
838 while (extlen) {
839 bh = getbuf(sdp, aspace, dblock, CREATE);
840
841 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
842 error = gfs2_meta_reread(sdp, bh, DIO_START);
843 brelse(bh);
844 if (error)
845 goto out;
846 } else
847 brelse(bh);
848
849 dblock++;
850 extlen--;
851
852 if (buffer_uptodate(first_bh))
853 break;
854 }
855
856 out:
857 brelse(first_bh);
858}
859
860/**
861 * gfs2_meta_syncfs - sync all the buffers in a filesystem
862 * @sdp: the filesystem
863 *
864 */
865
866void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
867{
868 gfs2_log_flush(sdp);
869 for (;;) {
870 gfs2_ail1_start(sdp, DIO_ALL);
871 if (gfs2_ail1_empty(sdp, DIO_ALL))
872 break;
873 msleep(100);
874 }
875}
876
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
new file mode 100644
index 000000000000..5556df8cc6c9
--- /dev/null
+++ b/fs/gfs2/meta_io.h
@@ -0,0 +1,88 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DIO_DOT_H__
11#define __DIO_DOT_H__
12
13static inline void gfs2_buffer_clear(struct buffer_head *bh)
14{
15 memset(bh->b_data, 0, bh->b_size);
16}
17
18static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
19{
20 memset(bh->b_data + head, 0, bh->b_size - head);
21}
22
23static inline void gfs2_buffer_clear_ends(struct buffer_head *bh, int offset,
24 int amount, int journaled)
25{
26 int z_off1 = (journaled) ? sizeof(struct gfs2_meta_header) : 0;
27 int z_len1 = offset - z_off1;
28 int z_off2 = offset + amount;
29 int z_len2 = (bh)->b_size - z_off2;
30
31 if (z_len1)
32 memset(bh->b_data + z_off1, 0, z_len1);
33
34 if (z_len2)
35 memset(bh->b_data + z_off2, 0, z_len2);
36}
37
38static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
39 int to_head,
40 struct buffer_head *from_bh,
41 int from_head)
42{
43 memcpy(to_bh->b_data + to_head,
44 from_bh->b_data + from_head,
45 from_bh->b_size - from_head);
46 memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
47 0,
48 from_head - to_head);
49}
50
51struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
52void gfs2_aspace_put(struct inode *aspace);
53
54void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
55int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags);
56void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
57void gfs2_ail_empty_gl(struct gfs2_glock *gl);
58
59void gfs2_meta_inval(struct gfs2_glock *gl);
60void gfs2_meta_sync(struct gfs2_glock *gl, int flags);
61
62struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno);
63int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno,
64 int flags, struct buffer_head **bhp);
65int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags);
66
67void gfs2_meta_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh);
68void gfs2_meta_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
69void gfs2_meta_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
70 struct gfs2_ail *ai);
71
72void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
73
74void gfs2_meta_cache_flush(struct gfs2_inode *ip);
75int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
76 int new, struct buffer_head **bhp);
77
78static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
79 struct buffer_head **bhp)
80{
81 return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
82}
83
84void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen);
85void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
86
87#endif /* __DIO_DOT_H__ */
88
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
new file mode 100644
index 000000000000..3e42697aafc7
--- /dev/null
+++ b/fs/gfs2/mount.c
@@ -0,0 +1,211 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "mount.h"
19#include "sys.h"
20
21/**
22 * gfs2_mount_args - Parse mount options
23 * @sdp:
24 * @data:
25 *
26 * Return: errno
27 */
28
29int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
30{
31 struct gfs2_args *args = &sdp->sd_args;
32 char *data = data_arg;
33 char *options, *o, *v;
34 int error = 0;
35
36 if (!remount) {
37 /* If someone preloaded options, use those instead */
38 spin_lock(&gfs2_sys_margs_lock);
39 if (gfs2_sys_margs) {
40 data = gfs2_sys_margs;
41 gfs2_sys_margs = NULL;
42 }
43 spin_unlock(&gfs2_sys_margs_lock);
44
45 /* Set some defaults */
46 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
47 args->ar_quota = GFS2_QUOTA_DEFAULT;
48 args->ar_data = GFS2_DATA_DEFAULT;
49 }
50
51 /* Split the options into tokens with the "," character and
52 process them */
53
54 for (options = data; (o = strsep(&options, ",")); ) {
55 if (!*o)
56 continue;
57
58 v = strchr(o, '=');
59 if (v)
60 *v++ = 0;
61
62 if (!strcmp(o, "lockproto")) {
63 if (!v)
64 goto need_value;
65 if (remount && strcmp(v, args->ar_lockproto))
66 goto cant_remount;
67 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
68 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
69 }
70
71 else if (!strcmp(o, "locktable")) {
72 if (!v)
73 goto need_value;
74 if (remount && strcmp(v, args->ar_locktable))
75 goto cant_remount;
76 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
77 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
78 }
79
80 else if (!strcmp(o, "hostdata")) {
81 if (!v)
82 goto need_value;
83 if (remount && strcmp(v, args->ar_hostdata))
84 goto cant_remount;
85 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
86 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
87 }
88
89 else if (!strcmp(o, "spectator")) {
90 if (remount && !args->ar_spectator)
91 goto cant_remount;
92 args->ar_spectator = 1;
93 sdp->sd_vfs->s_flags |= MS_RDONLY;
94 }
95
96 else if (!strcmp(o, "ignore_local_fs")) {
97 if (remount && !args->ar_ignore_local_fs)
98 goto cant_remount;
99 args->ar_ignore_local_fs = 1;
100 }
101
102 else if (!strcmp(o, "localflocks")) {
103 if (remount && !args->ar_localflocks)
104 goto cant_remount;
105 args->ar_localflocks = 1;
106 }
107
108 else if (!strcmp(o, "localcaching")) {
109 if (remount && !args->ar_localcaching)
110 goto cant_remount;
111 args->ar_localcaching = 1;
112 }
113
114 else if (!strcmp(o, "debug"))
115 args->ar_debug = 1;
116
117 else if (!strcmp(o, "nodebug"))
118 args->ar_debug = 0;
119
120 else if (!strcmp(o, "upgrade")) {
121 if (remount && !args->ar_upgrade)
122 goto cant_remount;
123 args->ar_upgrade = 1;
124 }
125
126 else if (!strcmp(o, "num_glockd")) {
127 unsigned int x;
128 if (!v)
129 goto need_value;
130 sscanf(v, "%u", &x);
131 if (remount && x != args->ar_num_glockd)
132 goto cant_remount;
133 if (!x || x > GFS2_GLOCKD_MAX) {
134 fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
135 GFS2_GLOCKD_MAX, x);
136 error = -EINVAL;
137 break;
138 }
139 args->ar_num_glockd = x;
140 }
141
142 else if (!strcmp(o, "acl")) {
143 args->ar_posix_acl = 1;
144 sdp->sd_vfs->s_flags |= MS_POSIXACL;
145 }
146
147 else if (!strcmp(o, "noacl")) {
148 args->ar_posix_acl = 0;
149 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
150 }
151
152 else if (!strcmp(o, "quota")) {
153 if (!v)
154 goto need_value;
155 if (!strcmp(v, "off"))
156 args->ar_quota = GFS2_QUOTA_OFF;
157 else if (!strcmp(v, "account"))
158 args->ar_quota = GFS2_QUOTA_ACCOUNT;
159 else if (!strcmp(v, "on"))
160 args->ar_quota = GFS2_QUOTA_ON;
161 else {
162 fs_info(sdp, "invalid value for quota\n");
163 error = -EINVAL;
164 break;
165 }
166 }
167
168 else if (!strcmp(o, "suiddir"))
169 args->ar_suiddir = 1;
170
171 else if (!strcmp(o, "nosuiddir"))
172 args->ar_suiddir = 0;
173
174 else if (!strcmp(o, "data")) {
175 if (!v)
176 goto need_value;
177 if (!strcmp(v, "writeback"))
178 args->ar_data = GFS2_DATA_WRITEBACK;
179 else if (!strcmp(v, "ordered"))
180 args->ar_data = GFS2_DATA_ORDERED;
181 else {
182 fs_info(sdp, "invalid value for data\n");
183 error = -EINVAL;
184 break;
185 }
186 }
187
188 else {
189 fs_info(sdp, "unknown option: %s\n", o);
190 error = -EINVAL;
191 break;
192 }
193 }
194
195 if (error)
196 fs_info(sdp, "invalid mount option(s)\n");
197
198 if (data != data_arg)
199 kfree(data);
200
201 return error;
202
203 need_value:
204 fs_info(sdp, "need value for option %s\n", o);
205 return -EINVAL;
206
207 cant_remount:
208 fs_info(sdp, "can't remount with option %s\n", o);
209 return -EINVAL;
210}
211
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
new file mode 100644
index 000000000000..bc8331cd7b2c
--- /dev/null
+++ b/fs/gfs2/mount.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __MOUNT_DOT_H__
11#define __MOUNT_DOT_H__
12
13int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
14
15#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
new file mode 100644
index 000000000000..2a1ef5aa7f0c
--- /dev/null
+++ b/fs/gfs2/ondisk.c
@@ -0,0 +1,590 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include <linux/gfs2_ondisk.h>
19
20#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member);
21#define pa(struct, member, count) print_array(#member, struct->member, count);
22
23/**
24 * print_array - Print out an array of bytes
25 * @title: what to print before the array
26 * @buf: the array
27 * @count: the number of bytes
28 *
29 */
30
31static void print_array(char *title, char *buf, int count)
32{
33 int x;
34
35 printk(" %s =\n", title);
36 for (x = 0; x < count; x++) {
37 printk("%.2X ", (unsigned char)buf[x]);
38 if (x % 16 == 15)
39 printk("\n");
40 }
41 if (x % 16)
42 printk("\n");
43}
44
45/*
46 * gfs2_xxx_in - read in an xxx struct
47 * first arg: the cpu-order structure
48 * buf: the disk-order buffer
49 *
50 * gfs2_xxx_out - write out an xxx struct
51 * first arg: the cpu-order structure
52 * buf: the disk-order buffer
53 *
54 * gfs2_xxx_print - print out an xxx struct
55 * first arg: the cpu-order structure
56 */
57
58void gfs2_inum_in(struct gfs2_inum *no, char *buf)
59{
60 struct gfs2_inum *str = (struct gfs2_inum *)buf;
61
62 no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
63 no->no_addr = be64_to_cpu(str->no_addr);
64}
65
66void gfs2_inum_out(struct gfs2_inum *no, char *buf)
67{
68 struct gfs2_inum *str = (struct gfs2_inum *)buf;
69
70 str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
71 str->no_addr = cpu_to_be64(no->no_addr);
72}
73
74void gfs2_inum_print(struct gfs2_inum *no)
75{
76 pv(no, no_formal_ino, "%llu");
77 pv(no, no_addr, "%llu");
78}
79
80void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf)
81{
82 struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
83
84 mh->mh_magic = be32_to_cpu(str->mh_magic);
85 mh->mh_type = be16_to_cpu(str->mh_type);
86 mh->mh_format = be16_to_cpu(str->mh_format);
87}
88
89void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf)
90{
91 struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
92
93 str->mh_magic = cpu_to_be32(mh->mh_magic);
94 str->mh_type = cpu_to_be16(mh->mh_type);
95 str->mh_format = cpu_to_be16(mh->mh_format);
96}
97
98void gfs2_meta_header_print(struct gfs2_meta_header *mh)
99{
100 pv(mh, mh_magic, "0x%.8X");
101 pv(mh, mh_type, "%u");
102 pv(mh, mh_format, "%u");
103}
104
105void gfs2_sb_in(struct gfs2_sb *sb, char *buf)
106{
107 struct gfs2_sb *str = (struct gfs2_sb *)buf;
108
109 gfs2_meta_header_in(&sb->sb_header, buf);
110
111 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
112 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
113 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
114 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
115
116 gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
117 gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
118
119 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
120 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
121}
122
123void gfs2_sb_out(struct gfs2_sb *sb, char *buf)
124{
125 struct gfs2_sb *str = (struct gfs2_sb *)buf;
126
127 gfs2_meta_header_out(&sb->sb_header, buf);
128
129 str->sb_fs_format = cpu_to_be32(sb->sb_fs_format);
130 str->sb_multihost_format = cpu_to_be32(sb->sb_multihost_format);
131 str->sb_bsize = cpu_to_be32(sb->sb_bsize);
132 str->sb_bsize_shift = cpu_to_be32(sb->sb_bsize_shift);
133
134 gfs2_inum_out(&sb->sb_master_dir, (char *)&str->sb_master_dir);
135 gfs2_inum_out(&sb->sb_root_dir, (char *)&str->sb_root_dir);
136
137 memcpy(str->sb_lockproto, sb->sb_lockproto, GFS2_LOCKNAME_LEN);
138 memcpy(str->sb_locktable, sb->sb_locktable, GFS2_LOCKNAME_LEN);
139}
140
141void gfs2_sb_print(struct gfs2_sb *sb)
142{
143 gfs2_meta_header_print(&sb->sb_header);
144
145 pv(sb, sb_fs_format, "%u");
146 pv(sb, sb_multihost_format, "%u");
147
148 pv(sb, sb_bsize, "%u");
149 pv(sb, sb_bsize_shift, "%u");
150
151 gfs2_inum_print(&sb->sb_master_dir);
152
153 pv(sb, sb_lockproto, "%s");
154 pv(sb, sb_locktable, "%s");
155}
156
157void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf)
158{
159 struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
160
161 ri->ri_addr = be64_to_cpu(str->ri_addr);
162 ri->ri_length = be32_to_cpu(str->ri_length);
163 ri->ri_data0 = be64_to_cpu(str->ri_data0);
164 ri->ri_data = be32_to_cpu(str->ri_data);
165 ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
166
167}
168
169void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf)
170{
171 struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
172
173 str->ri_addr = cpu_to_be64(ri->ri_addr);
174 str->ri_length = cpu_to_be32(ri->ri_length);
175 str->__pad = 0;
176
177 str->ri_data0 = cpu_to_be64(ri->ri_data0);
178 str->ri_data = cpu_to_be32(ri->ri_data);
179 str->ri_bitbytes = cpu_to_be32(ri->ri_bitbytes);
180 memset(str->ri_reserved, 0, sizeof(str->ri_reserved));
181}
182
183void gfs2_rindex_print(struct gfs2_rindex *ri)
184{
185 pv(ri, ri_addr, "%llu");
186 pv(ri, ri_length, "%u");
187
188 pv(ri, ri_data0, "%llu");
189 pv(ri, ri_data, "%u");
190
191 pv(ri, ri_bitbytes, "%u");
192}
193
194void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf)
195{
196 struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
197
198 gfs2_meta_header_in(&rg->rg_header, buf);
199 rg->rg_flags = be32_to_cpu(str->rg_flags);
200 rg->rg_free = be32_to_cpu(str->rg_free);
201 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
202}
203
204void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf)
205{
206 struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
207
208 gfs2_meta_header_out(&rg->rg_header, buf);
209 str->rg_flags = cpu_to_be32(rg->rg_flags);
210 str->rg_free = cpu_to_be32(rg->rg_free);
211 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
212
213 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
214}
215
216void gfs2_rgrp_print(struct gfs2_rgrp *rg)
217{
218 gfs2_meta_header_print(&rg->rg_header);
219 pv(rg, rg_flags, "%u");
220 pv(rg, rg_free, "%u");
221 pv(rg, rg_dinodes, "%u");
222
223 pa(rg, rg_reserved, 36);
224}
225
226void gfs2_quota_in(struct gfs2_quota *qu, char *buf)
227{
228 struct gfs2_quota *str = (struct gfs2_quota *)buf;
229
230 qu->qu_limit = be64_to_cpu(str->qu_limit);
231 qu->qu_warn = be64_to_cpu(str->qu_warn);
232 qu->qu_value = be64_to_cpu(str->qu_value);
233}
234
235void gfs2_quota_out(struct gfs2_quota *qu, char *buf)
236{
237 struct gfs2_quota *str = (struct gfs2_quota *)buf;
238
239 str->qu_limit = cpu_to_be64(qu->qu_limit);
240 str->qu_warn = cpu_to_be64(qu->qu_warn);
241 str->qu_value = cpu_to_be64(qu->qu_value);
242}
243
244void gfs2_quota_print(struct gfs2_quota *qu)
245{
246 pv(qu, qu_limit, "%llu");
247 pv(qu, qu_warn, "%llu");
248 pv(qu, qu_value, "%lld");
249}
250
251void gfs2_dinode_in(struct gfs2_dinode *di, char *buf)
252{
253 struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
254
255 gfs2_meta_header_in(&di->di_header, buf);
256 gfs2_inum_in(&di->di_num, (char *)&str->di_num);
257
258 di->di_mode = be32_to_cpu(str->di_mode);
259 di->di_uid = be32_to_cpu(str->di_uid);
260 di->di_gid = be32_to_cpu(str->di_gid);
261 di->di_nlink = be32_to_cpu(str->di_nlink);
262 di->di_size = be64_to_cpu(str->di_size);
263 di->di_blocks = be64_to_cpu(str->di_blocks);
264 di->di_atime = be64_to_cpu(str->di_atime);
265 di->di_mtime = be64_to_cpu(str->di_mtime);
266 di->di_ctime = be64_to_cpu(str->di_ctime);
267 di->di_major = be32_to_cpu(str->di_major);
268 di->di_minor = be32_to_cpu(str->di_minor);
269
270 di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
271 di->di_goal_data = be64_to_cpu(str->di_goal_data);
272
273 di->di_flags = be32_to_cpu(str->di_flags);
274 di->di_payload_format = be32_to_cpu(str->di_payload_format);
275 di->di_height = be16_to_cpu(str->di_height);
276
277 di->di_depth = be16_to_cpu(str->di_depth);
278 di->di_entries = be32_to_cpu(str->di_entries);
279
280 di->di_eattr = be64_to_cpu(str->di_eattr);
281
282}
283
284void gfs2_dinode_out(struct gfs2_dinode *di, char *buf)
285{
286 struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
287
288 gfs2_meta_header_out(&di->di_header, buf);
289 gfs2_inum_out(&di->di_num, (char *)&str->di_num);
290
291 str->di_mode = cpu_to_be32(di->di_mode);
292 str->di_uid = cpu_to_be32(di->di_uid);
293 str->di_gid = cpu_to_be32(di->di_gid);
294 str->di_nlink = cpu_to_be32(di->di_nlink);
295 str->di_size = cpu_to_be64(di->di_size);
296 str->di_blocks = cpu_to_be64(di->di_blocks);
297 str->di_atime = cpu_to_be64(di->di_atime);
298 str->di_mtime = cpu_to_be64(di->di_mtime);
299 str->di_ctime = cpu_to_be64(di->di_ctime);
300 str->di_major = cpu_to_be32(di->di_major);
301 str->di_minor = cpu_to_be32(di->di_minor);
302
303 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
304 str->di_goal_data = cpu_to_be64(di->di_goal_data);
305
306 str->di_flags = cpu_to_be32(di->di_flags);
307 str->di_payload_format = cpu_to_be32(di->di_payload_format);
308 str->di_height = cpu_to_be16(di->di_height);
309
310 str->di_depth = cpu_to_be16(di->di_depth);
311 str->di_entries = cpu_to_be32(di->di_entries);
312
313 str->di_eattr = cpu_to_be64(di->di_eattr);
314
315}
316
317void gfs2_dinode_print(struct gfs2_dinode *di)
318{
319 gfs2_meta_header_print(&di->di_header);
320 gfs2_inum_print(&di->di_num);
321
322 pv(di, di_mode, "0%o");
323 pv(di, di_uid, "%u");
324 pv(di, di_gid, "%u");
325 pv(di, di_nlink, "%u");
326 pv(di, di_size, "%llu");
327 pv(di, di_blocks, "%llu");
328 pv(di, di_atime, "%lld");
329 pv(di, di_mtime, "%lld");
330 pv(di, di_ctime, "%lld");
331 pv(di, di_major, "%u");
332 pv(di, di_minor, "%u");
333
334 pv(di, di_goal_meta, "%llu");
335 pv(di, di_goal_data, "%llu");
336
337 pv(di, di_flags, "0x%.8X");
338 pv(di, di_payload_format, "%u");
339 pv(di, di_height, "%u");
340
341 pv(di, di_depth, "%u");
342 pv(di, di_entries, "%u");
343
344 pv(di, di_eattr, "%llu");
345}
346
347void gfs2_dirent_in(struct gfs2_dirent *de, char *buf)
348{
349 struct gfs2_dirent *str = (struct gfs2_dirent *)buf;
350
351 gfs2_inum_in(&de->de_inum, buf);
352 de->de_hash = be32_to_cpu(str->de_hash);
353 de->de_rec_len = be32_to_cpu(str->de_rec_len);
354 de->de_name_len = str->de_name_len;
355 de->de_type = str->de_type;
356}
357
358void gfs2_dirent_out(struct gfs2_dirent *de, char *buf)
359{
360 struct gfs2_dirent *str = (struct gfs2_dirent *)buf;
361
362 gfs2_inum_out(&de->de_inum, buf);
363 str->de_hash = cpu_to_be32(de->de_hash);
364 str->de_rec_len = cpu_to_be32(de->de_rec_len);
365 str->de_name_len = de->de_name_len;
366 str->de_type = de->de_type;
367 str->__pad1 = 0;
368 str->__pad2 = 0;
369}
370
371void gfs2_dirent_print(struct gfs2_dirent *de, char *name)
372{
373 char buf[GFS2_FNAMESIZE + 1];
374
375 gfs2_inum_print(&de->de_inum);
376 pv(de, de_hash, "0x%.8X");
377 pv(de, de_rec_len, "%u");
378 pv(de, de_name_len, "%u");
379 pv(de, de_type, "%u");
380
381 memset(buf, 0, GFS2_FNAMESIZE + 1);
382 memcpy(buf, name, de->de_name_len);
383 printk(" name = %s\n", buf);
384}
385
386void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf)
387{
388 struct gfs2_leaf *str = (struct gfs2_leaf *)buf;
389
390 gfs2_meta_header_in(&lf->lf_header, buf);
391 lf->lf_depth = be16_to_cpu(str->lf_depth);
392 lf->lf_entries = be16_to_cpu(str->lf_entries);
393 lf->lf_dirent_format = be32_to_cpu(str->lf_dirent_format);
394 lf->lf_next = be64_to_cpu(str->lf_next);
395}
396
397void gfs2_leaf_out(struct gfs2_leaf *lf, char *buf)
398{
399 struct gfs2_leaf *str = (struct gfs2_leaf *)buf;
400
401 gfs2_meta_header_out(&lf->lf_header, buf);
402 str->lf_depth = cpu_to_be16(lf->lf_depth);
403 str->lf_entries = cpu_to_be16(lf->lf_entries);
404 str->lf_dirent_format = cpu_to_be32(lf->lf_dirent_format);
405 str->lf_next = cpu_to_be64(lf->lf_next);
406 memset(&str->lf_reserved, 0, sizeof(str->lf_reserved));
407}
408
409void gfs2_leaf_print(struct gfs2_leaf *lf)
410{
411 gfs2_meta_header_print(&lf->lf_header);
412 pv(lf, lf_depth, "%u");
413 pv(lf, lf_entries, "%u");
414 pv(lf, lf_dirent_format, "%u");
415 pv(lf, lf_next, "%llu");
416
417 pa(lf, lf_reserved, 32);
418}
419
420void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf)
421{
422 struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf;
423
424 ea->ea_rec_len = be32_to_cpu(str->ea_rec_len);
425 ea->ea_data_len = be32_to_cpu(str->ea_data_len);
426 ea->ea_name_len = str->ea_name_len;
427 ea->ea_type = str->ea_type;
428 ea->ea_flags = str->ea_flags;
429 ea->ea_num_ptrs = str->ea_num_ptrs;
430}
431
432void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf)
433{
434 struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf;
435
436 str->ea_rec_len = cpu_to_be32(ea->ea_rec_len);
437 str->ea_data_len = cpu_to_be32(ea->ea_data_len);
438 str->ea_name_len = ea->ea_name_len;
439 str->ea_type = ea->ea_type;
440 str->ea_flags = ea->ea_flags;
441 str->ea_num_ptrs = ea->ea_num_ptrs;
442 str->__pad = 0;
443}
444
445void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name)
446{
447 char buf[GFS2_EA_MAX_NAME_LEN + 1];
448
449 pv(ea, ea_rec_len, "%u");
450 pv(ea, ea_data_len, "%u");
451 pv(ea, ea_name_len, "%u");
452 pv(ea, ea_type, "%u");
453 pv(ea, ea_flags, "%u");
454 pv(ea, ea_num_ptrs, "%u");
455
456 memset(buf, 0, GFS2_EA_MAX_NAME_LEN + 1);
457 memcpy(buf, name, ea->ea_name_len);
458 printk(" name = %s\n", buf);
459}
460
461void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf)
462{
463 struct gfs2_log_header *str = (struct gfs2_log_header *)buf;
464
465 gfs2_meta_header_in(&lh->lh_header, buf);
466 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
467 lh->lh_flags = be32_to_cpu(str->lh_flags);
468 lh->lh_tail = be32_to_cpu(str->lh_tail);
469 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
470 lh->lh_hash = be32_to_cpu(str->lh_hash);
471}
472
473void gfs2_log_header_print(struct gfs2_log_header *lh)
474{
475 gfs2_meta_header_print(&lh->lh_header);
476 pv(lh, lh_sequence, "%llu");
477 pv(lh, lh_flags, "0x%.8X");
478 pv(lh, lh_tail, "%u");
479 pv(lh, lh_blkno, "%u");
480 pv(lh, lh_hash, "0x%.8X");
481}
482
483void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld)
484{
485 gfs2_meta_header_print(&ld->ld_header);
486 pv(ld, ld_type, "%u");
487 pv(ld, ld_length, "%u");
488 pv(ld, ld_data1, "%u");
489 pv(ld, ld_data2, "%u");
490
491 pa(ld, ld_reserved, 32);
492}
493
494void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf)
495{
496 struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
497
498 ir->ir_start = be64_to_cpu(str->ir_start);
499 ir->ir_length = be64_to_cpu(str->ir_length);
500}
501
502void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf)
503{
504 struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
505
506 str->ir_start = cpu_to_be64(ir->ir_start);
507 str->ir_length = cpu_to_be64(ir->ir_length);
508}
509
510void gfs2_inum_range_print(struct gfs2_inum_range *ir)
511{
512 pv(ir, ir_start, "%llu");
513 pv(ir, ir_length, "%llu");
514}
515
516void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf)
517{
518 struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
519
520 sc->sc_total = be64_to_cpu(str->sc_total);
521 sc->sc_free = be64_to_cpu(str->sc_free);
522 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
523}
524
525void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf)
526{
527 struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
528
529 str->sc_total = cpu_to_be64(sc->sc_total);
530 str->sc_free = cpu_to_be64(sc->sc_free);
531 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
532}
533
534void gfs2_statfs_change_print(struct gfs2_statfs_change *sc)
535{
536 pv(sc, sc_total, "%lld");
537 pv(sc, sc_free, "%lld");
538 pv(sc, sc_dinodes, "%lld");
539}
540
541void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf)
542{
543 struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
544
545 gfs2_inum_in(&ut->ut_inum, buf);
546 ut->ut_flags = be32_to_cpu(str->ut_flags);
547}
548
549void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf)
550{
551 struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
552
553 gfs2_inum_out(&ut->ut_inum, buf);
554 str->ut_flags = cpu_to_be32(ut->ut_flags);
555 str->__pad = 0;
556}
557
558void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut)
559{
560 gfs2_inum_print(&ut->ut_inum);
561 pv(ut, ut_flags, "%u");
562}
563
564void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf)
565{
566 struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
567
568 qc->qc_change = be64_to_cpu(str->qc_change);
569 qc->qc_flags = be32_to_cpu(str->qc_flags);
570 qc->qc_id = be32_to_cpu(str->qc_id);
571}
572
573void gfs2_quota_change_out(struct gfs2_quota_change *qc, char *buf)
574{
575 struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
576
577 str->qc_change = cpu_to_be64(qc->qc_change);
578 str->qc_flags = cpu_to_be32(qc->qc_flags);
579 str->qc_id = cpu_to_be32(qc->qc_id);
580}
581
582void gfs2_quota_change_print(struct gfs2_quota_change *qc)
583{
584 pv(qc, qc_change, "%lld");
585 pv(qc, qc_flags, "0x%.8X");
586 pv(qc, qc_id, "%u");
587}
588
589
590
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
new file mode 100644
index 000000000000..0aa5f140ddb1
--- /dev/null
+++ b/fs/gfs2/ops_address.c
@@ -0,0 +1,515 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "bmap.h"
20#include "glock.h"
21#include "inode.h"
22#include "jdata.h"
23#include "log.h"
24#include "meta_io.h"
25#include "ops_address.h"
26#include "page.h"
27#include "quota.h"
28#include "trans.h"
29
30/**
31 * get_block - Fills in a buffer head with details about a block
32 * @inode: The inode
33 * @lblock: The block number to look up
34 * @bh_result: The buffer head to return the result in
35 * @create: Non-zero if we may add block to the file
36 *
37 * Returns: errno
38 */
39
40static int get_block(struct inode *inode, sector_t lblock,
41 struct buffer_head *bh_result, int create)
42{
43 struct gfs2_inode *ip = get_v2ip(inode);
44 int new = create;
45 uint64_t dblock;
46 int error;
47
48 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
49 if (error)
50 return error;
51
52 if (!dblock)
53 return 0;
54
55 map_bh(bh_result, inode->i_sb, dblock);
56 if (new)
57 set_buffer_new(bh_result);
58
59 return 0;
60}
61
62/**
63 * get_block_noalloc - Fills in a buffer head with details about a block
64 * @inode: The inode
65 * @lblock: The block number to look up
66 * @bh_result: The buffer head to return the result in
67 * @create: Non-zero if we may add block to the file
68 *
69 * Returns: errno
70 */
71
72static int get_block_noalloc(struct inode *inode, sector_t lblock,
73 struct buffer_head *bh_result, int create)
74{
75 struct gfs2_inode *ip = get_v2ip(inode);
76 int new = 0;
77 uint64_t dblock;
78 int error;
79
80 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
81 if (error)
82 return error;
83
84 if (dblock)
85 map_bh(bh_result, inode->i_sb, dblock);
86 else if (gfs2_assert_withdraw(ip->i_sbd, !create))
87 error = -EIO;
88
89 return error;
90}
91
92static int get_blocks(struct inode *inode, sector_t lblock,
93 unsigned long max_blocks, struct buffer_head *bh_result,
94 int create)
95{
96 struct gfs2_inode *ip = get_v2ip(inode);
97 int new = create;
98 uint64_t dblock;
99 uint32_t extlen;
100 int error;
101
102 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
103 if (error)
104 return error;
105
106 if (!dblock)
107 return 0;
108
109 map_bh(bh_result, inode->i_sb, dblock);
110 if (new)
111 set_buffer_new(bh_result);
112
113 if (extlen > max_blocks)
114 extlen = max_blocks;
115 bh_result->b_size = extlen << inode->i_blkbits;
116
117 return 0;
118}
119
120static int get_blocks_noalloc(struct inode *inode, sector_t lblock,
121 unsigned long max_blocks,
122 struct buffer_head *bh_result, int create)
123{
124 struct gfs2_inode *ip = get_v2ip(inode);
125 int new = 0;
126 uint64_t dblock;
127 uint32_t extlen;
128 int error;
129
130 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
131 if (error)
132 return error;
133
134 if (dblock) {
135 map_bh(bh_result, inode->i_sb, dblock);
136 if (extlen > max_blocks)
137 extlen = max_blocks;
138 bh_result->b_size = extlen << inode->i_blkbits;
139 } else if (gfs2_assert_withdraw(ip->i_sbd, !create))
140 error = -EIO;
141
142 return error;
143}
144
145/**
146 * gfs2_writepage - Write complete page
147 * @page: Page to write
148 *
149 * Returns: errno
150 *
151 * Use Linux VFS block_write_full_page() to write one page,
152 * using GFS2's get_block_noalloc to find which blocks to write.
153 */
154
155static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
156{
157 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
158 struct gfs2_sbd *sdp = ip->i_sbd;
159 int error;
160
161 atomic_inc(&sdp->sd_ops_address);
162
163 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
164 unlock_page(page);
165 return -EIO;
166 }
167 if (get_transaction) {
168 redirty_page_for_writepage(wbc, page);
169 unlock_page(page);
170 return 0;
171 }
172
173 error = block_write_full_page(page, get_block_noalloc, wbc);
174
175 gfs2_meta_cache_flush(ip);
176
177 return error;
178}
179
180/**
181 * stuffed_readpage - Fill in a Linux page with stuffed file data
182 * @ip: the inode
183 * @page: the page
184 *
185 * Returns: errno
186 */
187
188static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
189{
190 struct buffer_head *dibh;
191 void *kaddr;
192 int error;
193
194 error = gfs2_meta_inode_buffer(ip, &dibh);
195 if (error)
196 return error;
197
198 kaddr = kmap(page);
199 memcpy((char *)kaddr,
200 dibh->b_data + sizeof(struct gfs2_dinode),
201 ip->i_di.di_size);
202 memset((char *)kaddr + ip->i_di.di_size,
203 0,
204 PAGE_CACHE_SIZE - ip->i_di.di_size);
205 kunmap(page);
206
207 brelse(dibh);
208
209 SetPageUptodate(page);
210
211 return 0;
212}
213
214static int zero_readpage(struct page *page)
215{
216 void *kaddr;
217
218 kaddr = kmap(page);
219 memset(kaddr, 0, PAGE_CACHE_SIZE);
220 kunmap(page);
221
222 SetPageUptodate(page);
223 unlock_page(page);
224
225 return 0;
226}
227
228/**
229 * jdata_readpage - readpage that goes through gfs2_jdata_read_mem()
230 * @ip:
231 * @page: The page to read
232 *
233 * Returns: errno
234 */
235
236static int jdata_readpage(struct gfs2_inode *ip, struct page *page)
237{
238 void *kaddr;
239 int ret;
240
241 kaddr = kmap(page);
242
243 ret = gfs2_jdata_read_mem(ip, kaddr,
244 (uint64_t)page->index << PAGE_CACHE_SHIFT,
245 PAGE_CACHE_SIZE);
246 if (ret >= 0) {
247 if (ret < PAGE_CACHE_SIZE)
248 memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret);
249 SetPageUptodate(page);
250 ret = 0;
251 }
252
253 kunmap(page);
254
255 unlock_page(page);
256
257 return ret;
258}
259
260/**
261 * gfs2_readpage - readpage with locking
262 * @file: The file to read a page for
263 * @page: The page to read
264 *
265 * Returns: errno
266 */
267
268static int gfs2_readpage(struct file *file, struct page *page)
269{
270 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
271 struct gfs2_sbd *sdp = ip->i_sbd;
272 int error;
273
274 atomic_inc(&sdp->sd_ops_address);
275
276 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) {
277 unlock_page(page);
278 return -EOPNOTSUPP;
279 }
280
281 if (!gfs2_is_jdata(ip)) {
282 if (gfs2_is_stuffed(ip)) {
283 if (!page->index) {
284 error = stuffed_readpage(ip, page);
285 unlock_page(page);
286 } else
287 error = zero_readpage(page);
288 } else
289 error = block_read_full_page(page, get_block);
290 } else
291 error = jdata_readpage(ip, page);
292
293 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
294 error = -EIO;
295
296 return error;
297}
298
299/**
300 * gfs2_prepare_write - Prepare to write a page to a file
301 * @file: The file to write to
302 * @page: The page which is to be prepared for writing
303 * @from: From (byte range within page)
304 * @to: To (byte range within page)
305 *
306 * Returns: errno
307 */
308
309static int gfs2_prepare_write(struct file *file, struct page *page,
310 unsigned from, unsigned to)
311{
312 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
313 struct gfs2_sbd *sdp = ip->i_sbd;
314 int error = 0;
315
316 atomic_inc(&sdp->sd_ops_address);
317
318 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
319 return -EOPNOTSUPP;
320
321 if (gfs2_is_stuffed(ip)) {
322 uint64_t file_size;
323 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
324
325 if (file_size > sdp->sd_sb.sb_bsize -
326 sizeof(struct gfs2_dinode)) {
327 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
328 page);
329 if (!error)
330 error = block_prepare_write(page, from, to,
331 get_block);
332 } else if (!PageUptodate(page))
333 error = stuffed_readpage(ip, page);
334 } else
335 error = block_prepare_write(page, from, to, get_block);
336
337 return error;
338}
339
340/**
341 * gfs2_commit_write - Commit write to a file
342 * @file: The file to write to
343 * @page: The page containing the data
344 * @from: From (byte range within page)
345 * @to: To (byte range within page)
346 *
347 * Returns: errno
348 */
349
350static int gfs2_commit_write(struct file *file, struct page *page,
351 unsigned from, unsigned to)
352{
353 struct inode *inode = page->mapping->host;
354 struct gfs2_inode *ip = get_v2ip(inode);
355 struct gfs2_sbd *sdp = ip->i_sbd;
356 int error;
357
358 atomic_inc(&sdp->sd_ops_address);
359
360 if (gfs2_is_stuffed(ip)) {
361 struct buffer_head *dibh;
362 uint64_t file_size;
363 void *kaddr;
364
365 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
366
367 error = gfs2_meta_inode_buffer(ip, &dibh);
368 if (error)
369 goto fail;
370
371 gfs2_trans_add_bh(ip->i_gl, dibh);
372
373 kaddr = kmap(page);
374 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
375 (char *)kaddr + from,
376 to - from);
377 kunmap(page);
378
379 brelse(dibh);
380
381 SetPageUptodate(page);
382
383 if (inode->i_size < file_size)
384 i_size_write(inode, file_size);
385 } else {
386 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
387 gfs2_page_add_databufs(sdp, page, from, to);
388 error = generic_commit_write(file, page, from, to);
389 if (error)
390 goto fail;
391 }
392
393 return 0;
394
395 fail:
396 ClearPageUptodate(page);
397
398 return error;
399}
400
401/**
402 * gfs2_bmap - Block map function
403 * @mapping: Address space info
404 * @lblock: The block to map
405 *
406 * Returns: The disk address for the block or 0 on hole or error
407 */
408
409static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
410{
411 struct gfs2_inode *ip = get_v2ip(mapping->host);
412 struct gfs2_holder i_gh;
413 sector_t dblock = 0;
414 int error;
415
416 atomic_inc(&ip->i_sbd->sd_ops_address);
417
418 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
419 if (error)
420 return 0;
421
422 if (!gfs2_is_stuffed(ip))
423 dblock = generic_block_bmap(mapping, lblock, get_block);
424
425 gfs2_glock_dq_uninit(&i_gh);
426
427 return dblock;
428}
429
430static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
431{
432 struct gfs2_databuf *db;
433
434 gfs2_log_lock(sdp);
435 db = get_v2db(bh);
436 if (db) {
437 db->db_bh = NULL;
438 set_v2db(bh, NULL);
439 gfs2_log_unlock(sdp);
440 brelse(bh);
441 } else
442 gfs2_log_unlock(sdp);
443
444 lock_buffer(bh);
445 clear_buffer_dirty(bh);
446 bh->b_bdev = NULL;
447 clear_buffer_mapped(bh);
448 clear_buffer_req(bh);
449 clear_buffer_new(bh);
450 clear_buffer_delay(bh);
451 unlock_buffer(bh);
452}
453
454static int gfs2_invalidatepage(struct page *page, unsigned long offset)
455{
456 struct gfs2_sbd *sdp = get_v2sdp(page->mapping->host->i_sb);
457 struct buffer_head *head, *bh, *next;
458 unsigned int curr_off = 0;
459 int ret = 1;
460
461 BUG_ON(!PageLocked(page));
462 if (!page_has_buffers(page))
463 return 1;
464
465 bh = head = page_buffers(page);
466 do {
467 unsigned int next_off = curr_off + bh->b_size;
468 next = bh->b_this_page;
469
470 if (offset <= curr_off)
471 discard_buffer(sdp, bh);
472
473 curr_off = next_off;
474 bh = next;
475 } while (bh != head);
476
477 if (!offset)
478 ret = try_to_release_page(page, 0);
479
480 return ret;
481}
482
483static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
484 loff_t offset, unsigned long nr_segs)
485{
486 struct file *file = iocb->ki_filp;
487 struct inode *inode = file->f_mapping->host;
488 struct gfs2_inode *ip = get_v2ip(inode);
489 struct gfs2_sbd *sdp = ip->i_sbd;
490 get_blocks_t *gb = get_blocks;
491
492 atomic_inc(&sdp->sd_ops_address);
493
494 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
495 gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
496 return -EINVAL;
497
498 if (rw == WRITE && !get_transaction)
499 gb = get_blocks_noalloc;
500
501 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
502 offset, nr_segs, gb, NULL);
503}
504
505struct address_space_operations gfs2_file_aops = {
506 .writepage = gfs2_writepage,
507 .readpage = gfs2_readpage,
508 .sync_page = block_sync_page,
509 .prepare_write = gfs2_prepare_write,
510 .commit_write = gfs2_commit_write,
511 .bmap = gfs2_bmap,
512 .invalidatepage = gfs2_invalidatepage,
513 .direct_IO = gfs2_direct_IO,
514};
515
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
new file mode 100644
index 000000000000..ced9ea0fdd31
--- /dev/null
+++ b/fs/gfs2/ops_address.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_ADDRESS_DOT_H__
11#define __OPS_ADDRESS_DOT_H__
12
13extern struct address_space_operations gfs2_file_aops;
14
15#endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
new file mode 100644
index 000000000000..5c618611c11b
--- /dev/null
+++ b/fs/gfs2/ops_dentry.c
@@ -0,0 +1,117 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/smp_lock.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "dir.h"
20#include "glock.h"
21#include "ops_dentry.h"
22
23/**
24 * gfs2_drevalidate - Check directory lookup consistency
25 * @dentry: the mapping to check
26 * @nd:
27 *
28 * Check to make sure the lookup necessary to arrive at this inode from its
29 * parent is still good.
30 *
31 * Returns: 1 if the dentry is ok, 0 if it isn't
32 */
33
34static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
35{
36 struct dentry *parent = dget_parent(dentry);
37 struct gfs2_inode *dip = get_v2ip(parent->d_inode);
38 struct gfs2_sbd *sdp = dip->i_sbd;
39 struct inode *inode;
40 struct gfs2_holder d_gh;
41 struct gfs2_inode *ip;
42 struct gfs2_inum inum;
43 unsigned int type;
44 int error;
45
46 lock_kernel();
47
48 atomic_inc(&sdp->sd_ops_dentry);
49
50 inode = dentry->d_inode;
51 if (inode && is_bad_inode(inode))
52 goto invalid;
53
54 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
55 if (error)
56 goto fail;
57
58 error = gfs2_dir_search(dip, &dentry->d_name, &inum, &type);
59 switch (error) {
60 case 0:
61 if (!inode)
62 goto invalid_gunlock;
63 break;
64 case -ENOENT:
65 if (!inode)
66 goto valid_gunlock;
67 goto invalid_gunlock;
68 default:
69 goto fail_gunlock;
70 }
71
72 ip = get_v2ip(inode);
73
74 if (!gfs2_inum_equal(&ip->i_num, &inum))
75 goto invalid_gunlock;
76
77 if (IF2DT(ip->i_di.di_mode) != type) {
78 gfs2_consist_inode(dip);
79 goto fail_gunlock;
80 }
81
82 valid_gunlock:
83 gfs2_glock_dq_uninit(&d_gh);
84
85 valid:
86 unlock_kernel();
87 dput(parent);
88 return 1;
89
90 invalid_gunlock:
91 gfs2_glock_dq_uninit(&d_gh);
92
93 invalid:
94 if (inode && S_ISDIR(inode->i_mode)) {
95 if (have_submounts(dentry))
96 goto valid;
97 shrink_dcache_parent(dentry);
98 }
99 d_drop(dentry);
100
101 unlock_kernel();
102 dput(parent);
103 return 0;
104
105 fail_gunlock:
106 gfs2_glock_dq_uninit(&d_gh);
107
108 fail:
109 unlock_kernel();
110 dput(parent);
111 return 0;
112}
113
114struct dentry_operations gfs2_dops = {
115 .d_revalidate = gfs2_drevalidate,
116};
117
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h
new file mode 100644
index 000000000000..94e3ee170165
--- /dev/null
+++ b/fs/gfs2/ops_dentry.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_DENTRY_DOT_H__
11#define __OPS_DENTRY_DOT_H__
12
13extern struct dentry_operations gfs2_dops;
14
15#endif /* __OPS_DENTRY_DOT_H__ */
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
new file mode 100644
index 000000000000..0ae3a0af192d
--- /dev/null
+++ b/fs/gfs2/ops_export.c
@@ -0,0 +1,310 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "dir.h"
19#include "glock.h"
20#include "glops.h"
21#include "inode.h"
22#include "ops_export.h"
23#include "rgrp.h"
24
25static struct dentry *gfs2_decode_fh(struct super_block *sb,
26 __u32 *fh,
27 int fh_len,
28 int fh_type,
29 int (*acceptable)(void *context,
30 struct dentry *dentry),
31 void *context)
32{
33 struct gfs2_inum this, parent;
34
35 atomic_inc(&get_v2sdp(sb)->sd_ops_export);
36
37 if (fh_type != fh_len)
38 return NULL;
39
40 memset(&parent, 0, sizeof(struct gfs2_inum));
41
42 switch (fh_type) {
43 case 8:
44 parent.no_formal_ino = ((uint64_t)be32_to_cpu(fh[4])) << 32;
45 parent.no_formal_ino |= be32_to_cpu(fh[5]);
46 parent.no_addr = ((uint64_t)be32_to_cpu(fh[6])) << 32;
47 parent.no_addr |= be32_to_cpu(fh[7]);
48 case 4:
49 this.no_formal_ino = ((uint64_t)be32_to_cpu(fh[0])) << 32;
50 this.no_formal_ino |= be32_to_cpu(fh[1]);
51 this.no_addr = ((uint64_t)be32_to_cpu(fh[2])) << 32;
52 this.no_addr |= be32_to_cpu(fh[3]);
53 break;
54 default:
55 return NULL;
56 }
57
58 return gfs2_export_ops.find_exported_dentry(sb, &this, &parent,
59 acceptable, context);
60}
61
62static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
63 int connectable)
64{
65 struct inode *inode = dentry->d_inode;
66 struct gfs2_inode *ip = get_v2ip(inode);
67 struct gfs2_sbd *sdp = ip->i_sbd;
68
69 atomic_inc(&sdp->sd_ops_export);
70
71 if (*len < 4 || (connectable && *len < 8))
72 return 255;
73
74 fh[0] = ip->i_num.no_formal_ino >> 32;
75 fh[0] = cpu_to_be32(fh[0]);
76 fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
77 fh[1] = cpu_to_be32(fh[1]);
78 fh[2] = ip->i_num.no_addr >> 32;
79 fh[2] = cpu_to_be32(fh[2]);
80 fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
81 fh[3] = cpu_to_be32(fh[3]);
82 *len = 4;
83
84 if (!connectable || ip == sdp->sd_root_dir)
85 return *len;
86
87 spin_lock(&dentry->d_lock);
88 inode = dentry->d_parent->d_inode;
89 ip = get_v2ip(inode);
90 gfs2_inode_hold(ip);
91 spin_unlock(&dentry->d_lock);
92
93 fh[4] = ip->i_num.no_formal_ino >> 32;
94 fh[4] = cpu_to_be32(fh[4]);
95 fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
96 fh[5] = cpu_to_be32(fh[5]);
97 fh[6] = ip->i_num.no_addr >> 32;
98 fh[6] = cpu_to_be32(fh[6]);
99 fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
100 fh[7] = cpu_to_be32(fh[7]);
101 *len = 8;
102
103 gfs2_inode_put(ip);
104
105 return *len;
106}
107
108struct get_name_filldir {
109 struct gfs2_inum inum;
110 char *name;
111};
112
113static int get_name_filldir(void *opaque, const char *name, unsigned int length,
114 uint64_t offset, struct gfs2_inum *inum,
115 unsigned int type)
116{
117 struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
118
119 if (!gfs2_inum_equal(inum, &gnfd->inum))
120 return 0;
121
122 memcpy(gnfd->name, name, length);
123 gnfd->name[length] = 0;
124
125 return 1;
126}
127
128static int gfs2_get_name(struct dentry *parent, char *name,
129 struct dentry *child)
130{
131 struct inode *dir = parent->d_inode;
132 struct inode *inode = child->d_inode;
133 struct gfs2_inode *dip, *ip;
134 struct get_name_filldir gnfd;
135 struct gfs2_holder gh;
136 uint64_t offset = 0;
137 int error;
138
139 if (!dir)
140 return -EINVAL;
141
142 atomic_inc(&get_v2sdp(dir->i_sb)->sd_ops_export);
143
144 if (!S_ISDIR(dir->i_mode) || !inode)
145 return -EINVAL;
146
147 dip = get_v2ip(dir);
148 ip = get_v2ip(inode);
149
150 *name = 0;
151 gnfd.inum = ip->i_num;
152 gnfd.name = name;
153
154 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
155 if (error)
156 return error;
157
158 error = gfs2_dir_read(dip, &offset, &gnfd, get_name_filldir);
159
160 gfs2_glock_dq_uninit(&gh);
161
162 if (!error && !*name)
163 error = -ENOENT;
164
165 return error;
166}
167
168static struct dentry *gfs2_get_parent(struct dentry *child)
169{
170 struct gfs2_inode *dip = get_v2ip(child->d_inode);
171 struct qstr dotdot = { .name = "..", .len = 2 };
172 struct gfs2_inode *ip;
173 struct inode *inode;
174 struct dentry *dentry;
175 int error;
176
177 atomic_inc(&dip->i_sbd->sd_ops_export);
178
179 error = gfs2_lookupi(dip, &dotdot, 1, &ip);
180 if (error)
181 return ERR_PTR(error);
182
183 inode = gfs2_ip2v(ip);
184 gfs2_inode_put(ip);
185
186 if (!inode)
187 return ERR_PTR(-ENOMEM);
188
189 dentry = d_alloc_anon(inode);
190 if (!dentry) {
191 iput(inode);
192 return ERR_PTR(-ENOMEM);
193 }
194
195 return dentry;
196}
197
198static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_p)
199{
200 struct gfs2_sbd *sdp = get_v2sdp(sb);
201 struct gfs2_inum *inum = (struct gfs2_inum *)inum_p;
202 struct gfs2_holder i_gh, ri_gh, rgd_gh;
203 struct gfs2_rgrpd *rgd;
204 struct gfs2_inode *ip;
205 struct inode *inode;
206 struct dentry *dentry;
207 int error;
208
209 atomic_inc(&sdp->sd_ops_export);
210
211 /* System files? */
212
213 inode = gfs2_iget(sb, inum);
214 if (inode) {
215 ip = get_v2ip(inode);
216 if (ip->i_num.no_formal_ino != inum->no_formal_ino) {
217 iput(inode);
218 return ERR_PTR(-ESTALE);
219 }
220 goto out_inode;
221 }
222
223 error = gfs2_glock_nq_num(sdp,
224 inum->no_addr, &gfs2_inode_glops,
225 LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
226 &i_gh);
227 if (error)
228 return ERR_PTR(error);
229
230 error = gfs2_inode_get(i_gh.gh_gl, inum, NO_CREATE, &ip);
231 if (error)
232 goto fail;
233 if (ip)
234 goto out_ip;
235
236 error = gfs2_rindex_hold(sdp, &ri_gh);
237 if (error)
238 goto fail;
239
240 error = -EINVAL;
241 rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
242 if (!rgd)
243 goto fail_rindex;
244
245 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
246 if (error)
247 goto fail_rindex;
248
249 error = -ESTALE;
250 if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
251 goto fail_rgd;
252
253 gfs2_glock_dq_uninit(&rgd_gh);
254 gfs2_glock_dq_uninit(&ri_gh);
255
256 error = gfs2_inode_get(i_gh.gh_gl, inum, CREATE, &ip);
257 if (error)
258 goto fail;
259
260 error = gfs2_inode_refresh(ip);
261 if (error) {
262 gfs2_inode_put(ip);
263 goto fail;
264 }
265
266 atomic_inc(&sdp->sd_fh2dentry_misses);
267
268 out_ip:
269 error = -EIO;
270 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM) {
271 gfs2_inode_put(ip);
272 goto fail;
273 }
274
275 gfs2_glock_dq_uninit(&i_gh);
276
277 inode = gfs2_ip2v(ip);
278 gfs2_inode_put(ip);
279
280 if (!inode)
281 return ERR_PTR(-ENOMEM);
282
283 out_inode:
284 dentry = d_alloc_anon(inode);
285 if (!dentry) {
286 iput(inode);
287 return ERR_PTR(-ENOMEM);
288 }
289
290 return dentry;
291
292 fail_rgd:
293 gfs2_glock_dq_uninit(&rgd_gh);
294
295 fail_rindex:
296 gfs2_glock_dq_uninit(&ri_gh);
297
298 fail:
299 gfs2_glock_dq_uninit(&i_gh);
300 return ERR_PTR(error);
301}
302
303struct export_operations gfs2_export_ops = {
304 .decode_fh = gfs2_decode_fh,
305 .encode_fh = gfs2_encode_fh,
306 .get_name = gfs2_get_name,
307 .get_parent = gfs2_get_parent,
308 .get_dentry = gfs2_get_dentry,
309};
310
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
new file mode 100644
index 000000000000..2f342f3d8755
--- /dev/null
+++ b/fs/gfs2/ops_export.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_EXPORT_DOT_H__
11#define __OPS_EXPORT_DOT_H__
12
13extern struct export_operations gfs2_export_ops;
14
15#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
new file mode 100644
index 000000000000..1e6f51b74f44
--- /dev/null
+++ b/fs/gfs2/ops_file.c
@@ -0,0 +1,1597 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/uio.h>
17#include <linux/blkdev.h>
18#include <linux/mm.h>
19#include <linux/smp_lock.h>
20#include <linux/gfs2_ioctl.h>
21#include <asm/semaphore.h>
22#include <asm/uaccess.h>
23
24#include "gfs2.h"
25#include "bmap.h"
26#include "dir.h"
27#include "glock.h"
28#include "glops.h"
29#include "inode.h"
30#include "jdata.h"
31#include "lm.h"
32#include "log.h"
33#include "meta_io.h"
34#include "ops_file.h"
35#include "ops_vm.h"
36#include "quota.h"
37#include "rgrp.h"
38#include "trans.h"
39
40/* "bad" is for NFS support */
41struct filldir_bad_entry {
42 char *fbe_name;
43 unsigned int fbe_length;
44 uint64_t fbe_offset;
45 struct gfs2_inum fbe_inum;
46 unsigned int fbe_type;
47};
48
49struct filldir_bad {
50 struct gfs2_sbd *fdb_sbd;
51
52 struct filldir_bad_entry *fdb_entry;
53 unsigned int fdb_entry_num;
54 unsigned int fdb_entry_off;
55
56 char *fdb_name;
57 unsigned int fdb_name_size;
58 unsigned int fdb_name_off;
59};
60
61/* For regular, non-NFS */
62struct filldir_reg {
63 struct gfs2_sbd *fdr_sbd;
64 int fdr_prefetch;
65
66 filldir_t fdr_filldir;
67 void *fdr_opaque;
68};
69
70typedef ssize_t(*do_rw_t) (struct file *file,
71 char __user *buf,
72 size_t size, loff_t *offset,
73 unsigned int num_gh, struct gfs2_holder *ghs);
74
75/**
76 * gfs2_llseek - seek to a location in a file
77 * @file: the file
78 * @offset: the offset
79 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
80 *
81 * SEEK_END requires the glock for the file because it references the
82 * file's size.
83 *
84 * Returns: The new offset, or errno
85 */
86
87static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
88{
89 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
90 struct gfs2_holder i_gh;
91 loff_t error;
92
93 atomic_inc(&ip->i_sbd->sd_ops_file);
94
95 if (origin == 2) {
96 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
97 &i_gh);
98 if (!error) {
99 error = remote_llseek(file, offset, origin);
100 gfs2_glock_dq_uninit(&i_gh);
101 }
102 } else
103 error = remote_llseek(file, offset, origin);
104
105 return error;
106}
107
108static inline unsigned int vma2state(struct vm_area_struct *vma)
109{
110 if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) ==
111 (VM_MAYWRITE | VM_MAYSHARE))
112 return LM_ST_EXCLUSIVE;
113 return LM_ST_SHARED;
114}
115
116static ssize_t walk_vm_hard(struct file *file, const char __user *buf, size_t size,
117 loff_t *offset, do_rw_t operation)
118{
119 struct gfs2_holder *ghs;
120 unsigned int num_gh = 0;
121 ssize_t count;
122 struct super_block *sb = file->f_dentry->d_inode->i_sb;
123 struct mm_struct *mm = current->mm;
124 struct vm_area_struct *vma;
125 unsigned long start = (unsigned long)buf;
126 unsigned long end = start + size;
127 int dumping = (current->flags & PF_DUMPCORE);
128 unsigned int x = 0;
129
130 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
131 if (end <= vma->vm_start)
132 break;
133 if (vma->vm_file &&
134 vma->vm_file->f_dentry->d_inode->i_sb == sb) {
135 num_gh++;
136 }
137 }
138
139 ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL);
140 if (!ghs) {
141 if (!dumping)
142 up_read(&mm->mmap_sem);
143 return -ENOMEM;
144 }
145
146 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
147 if (end <= vma->vm_start)
148 break;
149 if (vma->vm_file) {
150 struct inode *inode = vma->vm_file->f_dentry->d_inode;
151 if (inode->i_sb == sb)
152 gfs2_holder_init(get_v2ip(inode)->i_gl,
153 vma2state(vma), 0, &ghs[x++]);
154 }
155 }
156
157 if (!dumping)
158 up_read(&mm->mmap_sem);
159
160 gfs2_assert(get_v2sdp(sb), x == num_gh);
161
162 count = operation(file, buf, size, offset, num_gh, ghs);
163
164 while (num_gh--)
165 gfs2_holder_uninit(&ghs[num_gh]);
166 kfree(ghs);
167
168 return count;
169}
170
171/**
172 * walk_vm - Walk the vmas associated with a buffer for read or write.
173 * If any of them are gfs2, pass the gfs2 inode down to the read/write
174 * worker function so that locks can be acquired in the correct order.
175 * @file: The file to read/write from/to
176 * @buf: The buffer to copy to/from
177 * @size: The amount of data requested
178 * @offset: The current file offset
179 * @operation: The read or write worker function
180 *
181 * Outputs: Offset - updated according to number of bytes written
182 *
183 * Returns: The number of bytes written, errno on failure
184 */
185
186static ssize_t walk_vm(struct file *file, const char __user *buf, size_t size,
187 loff_t *offset, do_rw_t operation)
188{
189 struct gfs2_holder gh;
190
191 if (current->mm) {
192 struct super_block *sb = file->f_dentry->d_inode->i_sb;
193 struct mm_struct *mm = current->mm;
194 struct vm_area_struct *vma;
195 unsigned long start = (unsigned long)buf;
196 unsigned long end = start + size;
197 int dumping = (current->flags & PF_DUMPCORE);
198
199 if (!dumping)
200 down_read(&mm->mmap_sem);
201
202 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
203 if (end <= vma->vm_start)
204 break;
205 if (vma->vm_file &&
206 vma->vm_file->f_dentry->d_inode->i_sb == sb)
207 goto do_locks;
208 }
209
210 if (!dumping)
211 up_read(&mm->mmap_sem);
212 }
213
214 return operation(file, buf, size, offset, 0, &gh);
215
216do_locks:
217 return walk_vm_hard(file, buf, size, offset, operation);
218}
219
220static ssize_t do_jdata_read(struct file *file, char __user *buf, size_t size,
221 loff_t *offset)
222{
223 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
224 ssize_t count = 0;
225
226 if (*offset < 0)
227 return -EINVAL;
228 if (!access_ok(VERIFY_WRITE, buf, size))
229 return -EFAULT;
230
231 if (!(file->f_flags & O_LARGEFILE)) {
232 if (*offset >= MAX_NON_LFS)
233 return -EFBIG;
234 if (*offset + size > MAX_NON_LFS)
235 size = MAX_NON_LFS - *offset;
236 }
237
238 count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user);
239
240 if (count > 0)
241 *offset += count;
242
243 return count;
244}
245
246/**
247 * do_read_direct - Read bytes from a file
248 * @file: The file to read from
249 * @buf: The buffer to copy into
250 * @size: The amount of data requested
251 * @offset: The current file offset
252 * @num_gh: The number of other locks we need to do the read
253 * @ghs: the locks we need plus one for our lock
254 *
255 * Outputs: Offset - updated according to number of bytes read
256 *
257 * Returns: The number of bytes read, errno on failure
258 */
259
260static ssize_t do_read_direct(struct file *file, char __user *buf, size_t size,
261 loff_t *offset, unsigned int num_gh,
262 struct gfs2_holder *ghs)
263{
264 struct inode *inode = file->f_mapping->host;
265 struct gfs2_inode *ip = get_v2ip(inode);
266 unsigned int state = LM_ST_DEFERRED;
267 int flags = 0;
268 unsigned int x;
269 ssize_t count = 0;
270 int error;
271
272 for (x = 0; x < num_gh; x++)
273 if (ghs[x].gh_gl == ip->i_gl) {
274 state = LM_ST_SHARED;
275 flags |= GL_LOCAL_EXCL;
276 break;
277 }
278
279 gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
280
281 error = gfs2_glock_nq_m(num_gh + 1, ghs);
282 if (error)
283 goto out;
284
285 error = -EINVAL;
286 if (gfs2_is_jdata(ip))
287 goto out_gunlock;
288
289 if (gfs2_is_stuffed(ip)) {
290 size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1;
291
292 if (((*offset) & mask) || (((unsigned long)buf) & mask))
293 goto out_gunlock;
294
295 count = do_jdata_read(file, buf, size & ~mask, offset);
296 } else
297 count = generic_file_read(file, buf, size, offset);
298
299 error = 0;
300
301 out_gunlock:
302 gfs2_glock_dq_m(num_gh + 1, ghs);
303
304 out:
305 gfs2_holder_uninit(&ghs[num_gh]);
306
307 return (count) ? count : error;
308}
309
310/**
311 * do_read_buf - Read bytes from a file
312 * @file: The file to read from
313 * @buf: The buffer to copy into
314 * @size: The amount of data requested
315 * @offset: The current file offset
316 * @num_gh: The number of other locks we need to do the read
317 * @ghs: the locks we need plus one for our lock
318 *
319 * Outputs: Offset - updated according to number of bytes read
320 *
321 * Returns: The number of bytes read, errno on failure
322 */
323
324static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
325 loff_t *offset, unsigned int num_gh,
326 struct gfs2_holder *ghs)
327{
328 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
329 ssize_t count = 0;
330 int error;
331
332 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
333
334 error = gfs2_glock_nq_m_atime(num_gh + 1, ghs);
335 if (error)
336 goto out;
337
338 if (gfs2_is_jdata(ip))
339 count = do_jdata_read(file, buf, size, offset);
340 else
341 count = generic_file_read(file, buf, size, offset);
342
343 gfs2_glock_dq_m(num_gh + 1, ghs);
344
345 out:
346 gfs2_holder_uninit(&ghs[num_gh]);
347
348 return (count) ? count : error;
349}
350
351/**
352 * gfs2_read - Read bytes from a file
353 * @file: The file to read from
354 * @buf: The buffer to copy into
355 * @size: The amount of data requested
356 * @offset: The current file offset
357 *
358 * Outputs: Offset - updated according to number of bytes read
359 *
360 * Returns: The number of bytes read, errno on failure
361 */
362
363static ssize_t gfs2_read(struct file *file, char __user *buf, size_t size,
364 loff_t *offset)
365{
366 atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
367
368 if (file->f_flags & O_DIRECT)
369 return walk_vm(file, buf, size, offset, do_read_direct);
370 else
371 return walk_vm(file, buf, size, offset, do_read_buf);
372}
373
374/**
375 * grope_mapping - feel up a mapping that needs to be written
376 * @buf: the start of the memory to be written
377 * @size: the size of the memory to be written
378 *
379 * We do this after acquiring the locks on the mapping,
380 * but before starting the write transaction. We need to make
381 * sure that we don't cause recursive transactions if blocks
382 * need to be allocated to the file backing the mapping.
383 *
384 * Returns: errno
385 */
386
387static int grope_mapping(const char __user *buf, size_t size)
388{
389 const char __user *stop = buf + size;
390 char c;
391
392 while (buf < stop) {
393 if (copy_from_user(&c, buf, 1))
394 return -EFAULT;
395 buf += PAGE_CACHE_SIZE;
396 buf = (const char __user *)PAGE_ALIGN((unsigned long)buf);
397 }
398
399 return 0;
400}
401
402/**
403 * do_write_direct_alloc - Write bytes to a file
404 * @file: The file to write to
405 * @buf: The buffer to copy from
406 * @size: The amount of data requested
407 * @offset: The current file offset
408 *
409 * Outputs: Offset - updated according to number of bytes written
410 *
411 * Returns: The number of bytes written, errno on failure
412 */
413
414static ssize_t do_write_direct_alloc(struct file *file, const char __user *buf, size_t size,
415 loff_t *offset)
416{
417 struct inode *inode = file->f_mapping->host;
418 struct gfs2_inode *ip = get_v2ip(inode);
419 struct gfs2_sbd *sdp = ip->i_sbd;
420 struct gfs2_alloc *al = NULL;
421 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
422 struct buffer_head *dibh;
423 unsigned int data_blocks, ind_blocks;
424 ssize_t count;
425 int error;
426
427 gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
428
429 al = gfs2_alloc_get(ip);
430
431 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
432 if (error)
433 goto fail;
434
435 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
436 if (error)
437 goto fail_gunlock_q;
438
439 al->al_requested = data_blocks + ind_blocks;
440
441 error = gfs2_inplace_reserve(ip);
442 if (error)
443 goto fail_gunlock_q;
444
445 error = gfs2_trans_begin(sdp,
446 al->al_rgd->rd_ri.ri_length + ind_blocks +
447 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
448 if (error)
449 goto fail_ipres;
450
451 if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
452 error = gfs2_meta_inode_buffer(ip, &dibh);
453 if (error)
454 goto fail_end_trans;
455
456 ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
457 (~(S_ISUID | S_ISGID)) : (~S_ISUID);
458
459 gfs2_trans_add_bh(ip->i_gl, dibh);
460 gfs2_dinode_out(&ip->i_di, dibh->b_data);
461 brelse(dibh);
462 }
463
464 if (gfs2_is_stuffed(ip)) {
465 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL);
466 if (error)
467 goto fail_end_trans;
468 }
469
470 count = generic_file_write_nolock(file, &local_iov, 1, offset);
471 if (count < 0) {
472 error = count;
473 goto fail_end_trans;
474 }
475
476 error = gfs2_meta_inode_buffer(ip, &dibh);
477 if (error)
478 goto fail_end_trans;
479
480 if (ip->i_di.di_size < inode->i_size)
481 ip->i_di.di_size = inode->i_size;
482 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
483
484 gfs2_trans_add_bh(ip->i_gl, dibh);
485 gfs2_dinode_out(&ip->i_di, dibh->b_data);
486 brelse(dibh);
487
488 gfs2_trans_end(sdp);
489
490 if (file->f_flags & O_SYNC)
491 gfs2_log_flush_glock(ip->i_gl);
492
493 gfs2_inplace_release(ip);
494 gfs2_quota_unlock(ip);
495 gfs2_alloc_put(ip);
496
497 if (file->f_mapping->nrpages) {
498 error = filemap_fdatawrite(file->f_mapping);
499 if (!error)
500 error = filemap_fdatawait(file->f_mapping);
501 }
502 if (error)
503 return error;
504
505 return count;
506
507 fail_end_trans:
508 gfs2_trans_end(sdp);
509
510 fail_ipres:
511 gfs2_inplace_release(ip);
512
513 fail_gunlock_q:
514 gfs2_quota_unlock(ip);
515
516 fail:
517 gfs2_alloc_put(ip);
518
519 return error;
520}
521
522/**
523 * do_write_direct - Write bytes to a file
524 * @file: The file to write to
525 * @buf: The buffer to copy from
526 * @size: The amount of data requested
527 * @offset: The current file offset
528 * @num_gh: The number of other locks we need to do the read
529 * @gh: the locks we need plus one for our lock
530 *
531 * Outputs: Offset - updated according to number of bytes written
532 *
533 * Returns: The number of bytes written, errno on failure
534 */
535
536static ssize_t do_write_direct(struct file *file, const char __user *buf, size_t size,
537 loff_t *offset, unsigned int num_gh,
538 struct gfs2_holder *ghs)
539{
540 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
541 struct gfs2_sbd *sdp = ip->i_sbd;
542 struct gfs2_file *fp = get_v2fp(file);
543 unsigned int state = LM_ST_DEFERRED;
544 int alloc_required;
545 unsigned int x;
546 size_t s;
547 ssize_t count = 0;
548 int error;
549
550 if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
551 state = LM_ST_EXCLUSIVE;
552 else
553 for (x = 0; x < num_gh; x++)
554 if (ghs[x].gh_gl == ip->i_gl) {
555 state = LM_ST_EXCLUSIVE;
556 break;
557 }
558
559 restart:
560 gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
561
562 error = gfs2_glock_nq_m(num_gh + 1, ghs);
563 if (error)
564 goto out;
565
566 error = -EINVAL;
567 if (gfs2_is_jdata(ip))
568 goto out_gunlock;
569
570 if (num_gh) {
571 error = grope_mapping(buf, size);
572 if (error)
573 goto out_gunlock;
574 }
575
576 if (file->f_flags & O_APPEND)
577 *offset = ip->i_di.di_size;
578
579 if (!(file->f_flags & O_LARGEFILE)) {
580 error = -EFBIG;
581 if (*offset >= MAX_NON_LFS)
582 goto out_gunlock;
583 if (*offset + size > MAX_NON_LFS)
584 size = MAX_NON_LFS - *offset;
585 }
586
587 if (gfs2_is_stuffed(ip) ||
588 *offset + size > ip->i_di.di_size ||
589 ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
590 alloc_required = 1;
591 else {
592 error = gfs2_write_alloc_required(ip, *offset, size,
593 &alloc_required);
594 if (error)
595 goto out_gunlock;
596 }
597
598 if (alloc_required && state != LM_ST_EXCLUSIVE) {
599 gfs2_glock_dq_m(num_gh + 1, ghs);
600 gfs2_holder_uninit(&ghs[num_gh]);
601 state = LM_ST_EXCLUSIVE;
602 goto restart;
603 }
604
605 if (alloc_required) {
606 set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
607
608 /* split large writes into smaller atomic transactions */
609 while (size) {
610 s = gfs2_tune_get(sdp, gt_max_atomic_write);
611 if (s > size)
612 s = size;
613
614 error = do_write_direct_alloc(file, buf, s, offset);
615 if (error < 0)
616 goto out_gunlock;
617
618 buf += error;
619 size -= error;
620 count += error;
621 }
622 } else {
623 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
624 struct gfs2_holder t_gh;
625
626 clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
627
628 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
629 GL_NEVER_RECURSE, &t_gh);
630 if (error)
631 goto out_gunlock;
632
633 count = generic_file_write_nolock(file, &local_iov, 1, offset);
634
635 gfs2_glock_dq_uninit(&t_gh);
636 }
637
638 error = 0;
639
640 out_gunlock:
641 gfs2_glock_dq_m(num_gh + 1, ghs);
642
643 out:
644 gfs2_holder_uninit(&ghs[num_gh]);
645
646 return (count) ? count : error;
647}
648
649/**
650 * do_do_write_buf - Write bytes to a file
651 * @file: The file to write to
652 * @buf: The buffer to copy from
653 * @size: The amount of data requested
654 * @offset: The current file offset
655 *
656 * Outputs: Offset - updated according to number of bytes written
657 *
658 * Returns: The number of bytes written, errno on failure
659 */
660
661static ssize_t do_do_write_buf(struct file *file, const char __user *buf, size_t size,
662 loff_t *offset)
663{
664 struct inode *inode = file->f_mapping->host;
665 struct gfs2_inode *ip = get_v2ip(inode);
666 struct gfs2_sbd *sdp = ip->i_sbd;
667 struct gfs2_alloc *al = NULL;
668 struct buffer_head *dibh;
669 unsigned int data_blocks, ind_blocks;
670 int alloc_required, journaled;
671 ssize_t count;
672 int error;
673
674 journaled = gfs2_is_jdata(ip);
675
676 gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
677
678 error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required);
679 if (error)
680 return error;
681
682 if (alloc_required) {
683 al = gfs2_alloc_get(ip);
684
685 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
686 if (error)
687 goto fail;
688
689 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
690 if (error)
691 goto fail_gunlock_q;
692
693 al->al_requested = data_blocks + ind_blocks;
694
695 error = gfs2_inplace_reserve(ip);
696 if (error)
697 goto fail_gunlock_q;
698
699 error = gfs2_trans_begin(sdp,
700 al->al_rgd->rd_ri.ri_length +
701 ind_blocks +
702 ((journaled) ? data_blocks : 0) +
703 RES_DINODE + RES_STATFS + RES_QUOTA,
704 0);
705 if (error)
706 goto fail_ipres;
707 } else {
708 error = gfs2_trans_begin(sdp,
709 ((journaled) ? data_blocks : 0) +
710 RES_DINODE,
711 0);
712 if (error)
713 goto fail_ipres;
714 }
715
716 if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
717 error = gfs2_meta_inode_buffer(ip, &dibh);
718 if (error)
719 goto fail_end_trans;
720
721 ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
722 (~(S_ISUID | S_ISGID)) : (~S_ISUID);
723
724 gfs2_trans_add_bh(ip->i_gl, dibh);
725 gfs2_dinode_out(&ip->i_di, dibh->b_data);
726 brelse(dibh);
727 }
728
729 if (journaled) {
730 count = gfs2_jdata_write(ip, buf, *offset, size,
731 gfs2_copy_from_user);
732 if (count < 0) {
733 error = count;
734 goto fail_end_trans;
735 }
736
737 *offset += count;
738 } else {
739 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
740
741 count = generic_file_write_nolock(file, &local_iov, 1, offset);
742 if (count < 0) {
743 error = count;
744 goto fail_end_trans;
745 }
746
747 error = gfs2_meta_inode_buffer(ip, &dibh);
748 if (error)
749 goto fail_end_trans;
750
751 if (ip->i_di.di_size < inode->i_size)
752 ip->i_di.di_size = inode->i_size;
753 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
754
755 gfs2_trans_add_bh(ip->i_gl, dibh);
756 gfs2_dinode_out(&ip->i_di, dibh->b_data);
757 brelse(dibh);
758 }
759
760 gfs2_trans_end(sdp);
761
762 if (file->f_flags & O_SYNC || IS_SYNC(inode)) {
763 gfs2_log_flush_glock(ip->i_gl);
764 error = filemap_fdatawrite(file->f_mapping);
765 if (error == 0)
766 error = filemap_fdatawait(file->f_mapping);
767 if (error)
768 goto fail_ipres;
769 }
770
771 if (alloc_required) {
772 gfs2_assert_warn(sdp, count != size ||
773 al->al_alloced);
774 gfs2_inplace_release(ip);
775 gfs2_quota_unlock(ip);
776 gfs2_alloc_put(ip);
777 }
778
779 return count;
780
781 fail_end_trans:
782 gfs2_trans_end(sdp);
783
784 fail_ipres:
785 if (alloc_required)
786 gfs2_inplace_release(ip);
787
788 fail_gunlock_q:
789 if (alloc_required)
790 gfs2_quota_unlock(ip);
791
792 fail:
793 if (alloc_required)
794 gfs2_alloc_put(ip);
795
796 return error;
797}
798
799/**
800 * do_write_buf - Write bytes to a file
801 * @file: The file to write to
802 * @buf: The buffer to copy from
803 * @size: The amount of data requested
804 * @offset: The current file offset
805 * @num_gh: The number of other locks we need to do the read
806 * @gh: the locks we need plus one for our lock
807 *
808 * Outputs: Offset - updated according to number of bytes written
809 *
810 * Returns: The number of bytes written, errno on failure
811 */
812
813static ssize_t do_write_buf(struct file *file, const char __user *buf, size_t size,
814 loff_t *offset, unsigned int num_gh,
815 struct gfs2_holder *ghs)
816{
817 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
818 struct gfs2_sbd *sdp = ip->i_sbd;
819 size_t s;
820 ssize_t count = 0;
821 int error;
822
823 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
824
825 error = gfs2_glock_nq_m(num_gh + 1, ghs);
826 if (error)
827 goto out;
828
829 if (num_gh) {
830 error = grope_mapping(buf, size);
831 if (error)
832 goto out_gunlock;
833 }
834
835 if (file->f_flags & O_APPEND)
836 *offset = ip->i_di.di_size;
837
838 if (!(file->f_flags & O_LARGEFILE)) {
839 error = -EFBIG;
840 if (*offset >= MAX_NON_LFS)
841 goto out_gunlock;
842 if (*offset + size > MAX_NON_LFS)
843 size = MAX_NON_LFS - *offset;
844 }
845
846 /* split large writes into smaller atomic transactions */
847 while (size) {
848 s = gfs2_tune_get(sdp, gt_max_atomic_write);
849 if (s > size)
850 s = size;
851
852 error = do_do_write_buf(file, buf, s, offset);
853 if (error < 0)
854 goto out_gunlock;
855
856 buf += error;
857 size -= error;
858 count += error;
859 }
860
861 error = 0;
862
863 out_gunlock:
864 gfs2_glock_dq_m(num_gh + 1, ghs);
865
866 out:
867 gfs2_holder_uninit(&ghs[num_gh]);
868
869 return (count) ? count : error;
870}
871
872/**
873 * gfs2_write - Write bytes to a file
874 * @file: The file to write to
875 * @buf: The buffer to copy from
876 * @size: The amount of data requested
877 * @offset: The current file offset
878 *
879 * Outputs: Offset - updated according to number of bytes written
880 *
881 * Returns: The number of bytes written, errno on failure
882 */
883
884static ssize_t gfs2_write(struct file *file, const char __user *buf,
885 size_t size, loff_t *offset)
886{
887 struct inode *inode = file->f_mapping->host;
888 ssize_t count;
889
890 atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file);
891
892 if (*offset < 0)
893 return -EINVAL;
894 if (!access_ok(VERIFY_READ, buf, size))
895 return -EFAULT;
896
897 mutex_lock(&inode->i_mutex);
898 if (file->f_flags & O_DIRECT)
899 count = walk_vm(file, buf, size, offset,
900 do_write_direct);
901 else
902 count = walk_vm(file, buf, size, offset, do_write_buf);
903 mutex_unlock(&inode->i_mutex);
904
905 return count;
906}
907
908/**
909 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
910 * @opaque: opaque data used by the function
911 * @name: the name of the directory entry
912 * @length: the length of the name
913 * @offset: the entry's offset in the directory
914 * @inum: the inode number the entry points to
915 * @type: the type of inode the entry points to
916 *
917 * Returns: 0 on success, 1 if buffer full
918 */
919
920static int filldir_reg_func(void *opaque, const char *name, unsigned int length,
921 uint64_t offset, struct gfs2_inum *inum,
922 unsigned int type)
923{
924 struct filldir_reg *fdr = (struct filldir_reg *)opaque;
925 struct gfs2_sbd *sdp = fdr->fdr_sbd;
926 int error;
927
928 error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
929 inum->no_formal_ino, type);
930 if (error)
931 return 1;
932
933 if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
934 gfs2_glock_prefetch_num(sdp,
935 inum->no_addr, &gfs2_inode_glops,
936 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
937 gfs2_glock_prefetch_num(sdp,
938 inum->no_addr, &gfs2_iopen_glops,
939 LM_ST_SHARED, LM_FLAG_TRY);
940 }
941
942 return 0;
943}
944
945/**
946 * readdir_reg - Read directory entries from a directory
947 * @file: The directory to read from
948 * @dirent: Buffer for dirents
949 * @filldir: Function used to do the copying
950 *
951 * Returns: errno
952 */
953
954static int readdir_reg(struct file *file, void *dirent, filldir_t filldir)
955{
956 struct gfs2_inode *dip = get_v2ip(file->f_mapping->host);
957 struct filldir_reg fdr;
958 struct gfs2_holder d_gh;
959 uint64_t offset = file->f_pos;
960 int error;
961
962 fdr.fdr_sbd = dip->i_sbd;
963 fdr.fdr_prefetch = 1;
964 fdr.fdr_filldir = filldir;
965 fdr.fdr_opaque = dirent;
966
967 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
968 error = gfs2_glock_nq_atime(&d_gh);
969 if (error) {
970 gfs2_holder_uninit(&d_gh);
971 return error;
972 }
973
974 error = gfs2_dir_read(dip, &offset, &fdr, filldir_reg_func);
975
976 gfs2_glock_dq_uninit(&d_gh);
977
978 file->f_pos = offset;
979
980 return error;
981}
982
983/**
984 * filldir_bad_func - Report a directory entry to the caller of gfs2_dir_read()
985 * @opaque: opaque data used by the function
986 * @name: the name of the directory entry
987 * @length: the length of the name
988 * @offset: the entry's offset in the directory
989 * @inum: the inode number the entry points to
990 * @type: the type of inode the entry points to
991 *
992 * For supporting NFS.
993 *
994 * Returns: 0 on success, 1 if buffer full
995 */
996
997static int filldir_bad_func(void *opaque, const char *name, unsigned int length,
998 uint64_t offset, struct gfs2_inum *inum,
999 unsigned int type)
1000{
1001 struct filldir_bad *fdb = (struct filldir_bad *)opaque;
1002 struct gfs2_sbd *sdp = fdb->fdb_sbd;
1003 struct filldir_bad_entry *fbe;
1004
1005 if (fdb->fdb_entry_off == fdb->fdb_entry_num ||
1006 fdb->fdb_name_off + length > fdb->fdb_name_size)
1007 return 1;
1008
1009 fbe = &fdb->fdb_entry[fdb->fdb_entry_off];
1010 fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off;
1011 memcpy(fbe->fbe_name, name, length);
1012 fbe->fbe_length = length;
1013 fbe->fbe_offset = offset;
1014 fbe->fbe_inum = *inum;
1015 fbe->fbe_type = type;
1016
1017 fdb->fdb_entry_off++;
1018 fdb->fdb_name_off += length;
1019
1020 if (!(length == 1 && *name == '.')) {
1021 gfs2_glock_prefetch_num(sdp,
1022 inum->no_addr, &gfs2_inode_glops,
1023 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
1024 gfs2_glock_prefetch_num(sdp,
1025 inum->no_addr, &gfs2_iopen_glops,
1026 LM_ST_SHARED, LM_FLAG_TRY);
1027 }
1028
1029 return 0;
1030}
1031
1032/**
1033 * readdir_bad - Read directory entries from a directory
1034 * @file: The directory to read from
1035 * @dirent: Buffer for dirents
1036 * @filldir: Function used to do the copying
1037 *
1038 * For supporting NFS.
1039 *
1040 * Returns: errno
1041 */
1042
1043static int readdir_bad(struct file *file, void *dirent, filldir_t filldir)
1044{
1045 struct gfs2_inode *dip = get_v2ip(file->f_mapping->host);
1046 struct gfs2_sbd *sdp = dip->i_sbd;
1047 struct filldir_reg fdr;
1048 unsigned int entries, size;
1049 struct filldir_bad *fdb;
1050 struct gfs2_holder d_gh;
1051 uint64_t offset = file->f_pos;
1052 unsigned int x;
1053 struct filldir_bad_entry *fbe;
1054 int error;
1055
1056 entries = gfs2_tune_get(sdp, gt_entries_per_readdir);
1057 size = sizeof(struct filldir_bad) +
1058 entries * (sizeof(struct filldir_bad_entry) + GFS2_FAST_NAME_SIZE);
1059
1060 fdb = kzalloc(size, GFP_KERNEL);
1061 if (!fdb)
1062 return -ENOMEM;
1063
1064 fdb->fdb_sbd = sdp;
1065 fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1);
1066 fdb->fdb_entry_num = entries;
1067 fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) +
1068 entries * sizeof(struct filldir_bad_entry);
1069 fdb->fdb_name_size = entries * GFS2_FAST_NAME_SIZE;
1070
1071 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
1072 error = gfs2_glock_nq_atime(&d_gh);
1073 if (error) {
1074 gfs2_holder_uninit(&d_gh);
1075 goto out;
1076 }
1077
1078 error = gfs2_dir_read(dip, &offset, fdb, filldir_bad_func);
1079
1080 gfs2_glock_dq_uninit(&d_gh);
1081
1082 fdr.fdr_sbd = sdp;
1083 fdr.fdr_prefetch = 0;
1084 fdr.fdr_filldir = filldir;
1085 fdr.fdr_opaque = dirent;
1086
1087 for (x = 0; x < fdb->fdb_entry_off; x++) {
1088 fbe = &fdb->fdb_entry[x];
1089
1090 error = filldir_reg_func(&fdr,
1091 fbe->fbe_name, fbe->fbe_length,
1092 fbe->fbe_offset,
1093 &fbe->fbe_inum, fbe->fbe_type);
1094 if (error) {
1095 file->f_pos = fbe->fbe_offset;
1096 error = 0;
1097 goto out;
1098 }
1099 }
1100
1101 file->f_pos = offset;
1102
1103 out:
1104 kfree(fdb);
1105
1106 return error;
1107}
1108
1109/**
1110 * gfs2_readdir - Read directory entries from a directory
1111 * @file: The directory to read from
1112 * @dirent: Buffer for dirents
1113 * @filldir: Function used to do the copying
1114 *
1115 * Returns: errno
1116 */
1117
1118static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
1119{
1120 int error;
1121
1122 atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
1123
1124 if (strcmp(current->comm, "nfsd") != 0)
1125 error = readdir_reg(file, dirent, filldir);
1126 else
1127 error = readdir_bad(file, dirent, filldir);
1128
1129 return error;
1130}
1131
1132static int gfs2_ioctl_flags(struct gfs2_inode *ip, unsigned int cmd, unsigned long arg)
1133{
1134 unsigned int lmode = (cmd == GFS2_IOCTL_SETFLAGS) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
1135 struct buffer_head *dibh;
1136 struct gfs2_holder i_gh;
1137 int error;
1138 __u32 flags = 0, change;
1139
1140 if (cmd == GFS2_IOCTL_SETFLAGS) {
1141 error = get_user(flags, (__u32 __user *)arg);
1142 if (error)
1143 return -EFAULT;
1144 }
1145
1146 error = gfs2_glock_nq_init(ip->i_gl, lmode, 0, &i_gh);
1147 if (error)
1148 return error;
1149
1150 if (cmd == GFS2_IOCTL_SETFLAGS) {
1151 change = flags ^ ip->i_di.di_flags;
1152 error = -EPERM;
1153 if (change & (GFS2_DIF_IMMUTABLE|GFS2_DIF_APPENDONLY)) {
1154 if (!capable(CAP_LINUX_IMMUTABLE))
1155 goto out;
1156 }
1157 error = -EINVAL;
1158 if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) {
1159 if (!S_ISREG(ip->i_di.di_mode))
1160 goto out;
1161 /* FIXME: Would be nice not to require the following test */
1162 if ((flags & GFS2_DIF_JDATA) && ip->i_di.di_size)
1163 goto out;
1164 }
1165 if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) {
1166 if (!S_ISDIR(ip->i_di.di_mode))
1167 goto out;
1168 }
1169
1170 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
1171 if (error)
1172 goto out;
1173
1174 error = gfs2_meta_inode_buffer(ip, &dibh);
1175 if (error)
1176 goto out_trans_end;
1177
1178 ip->i_di.di_flags = flags;
1179
1180 gfs2_trans_add_bh(ip->i_gl, dibh);
1181 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1182
1183 brelse(dibh);
1184
1185out_trans_end:
1186 gfs2_trans_end(ip->i_sbd);
1187 } else {
1188 flags = ip->i_di.di_flags;
1189 }
1190out:
1191 gfs2_glock_dq_uninit(&i_gh);
1192 if (cmd == GFS2_IOCTL_GETFLAGS) {
1193 if (put_user(flags, (__u32 __user *)arg))
1194 return -EFAULT;
1195 }
1196 return error;
1197}
1198
1199/**
1200 * gfs2_ioctl - do an ioctl on a file
1201 * @inode: the inode
1202 * @file: the file pointer
1203 * @cmd: the ioctl command
1204 * @arg: the argument
1205 *
1206 * Returns: errno
1207 */
1208
1209static int gfs2_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1210 unsigned long arg)
1211{
1212 struct gfs2_inode *ip = get_v2ip(inode);
1213
1214 atomic_inc(&ip->i_sbd->sd_ops_file);
1215
1216 switch (cmd) {
1217 case GFS2_IOCTL_IDENTIFY: {
1218 unsigned int x = GFS2_MAGIC;
1219 if (copy_to_user((unsigned int __user *)arg, &x, sizeof(unsigned int)))
1220 return -EFAULT;
1221 return 0;
1222
1223 case GFS2_IOCTL_SETFLAGS:
1224 case GFS2_IOCTL_GETFLAGS:
1225 return gfs2_ioctl_flags(ip, cmd, arg);
1226 }
1227
1228 default:
1229 return -ENOTTY;
1230 }
1231}
1232
1233/**
1234 * gfs2_mmap -
1235 * @file: The file to map
1236 * @vma: The VMA which described the mapping
1237 *
1238 * Returns: 0 or error code
1239 */
1240
1241static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
1242{
1243 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
1244 struct gfs2_holder i_gh;
1245 int error;
1246
1247 atomic_inc(&ip->i_sbd->sd_ops_file);
1248
1249 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
1250 error = gfs2_glock_nq_atime(&i_gh);
1251 if (error) {
1252 gfs2_holder_uninit(&i_gh);
1253 return error;
1254 }
1255
1256 if (gfs2_is_jdata(ip)) {
1257 if (vma->vm_flags & VM_MAYSHARE)
1258 error = -EOPNOTSUPP;
1259 else
1260 vma->vm_ops = &gfs2_vm_ops_private;
1261 } else {
1262 /* This is VM_MAYWRITE instead of VM_WRITE because a call
1263 to mprotect() can turn on VM_WRITE later. */
1264
1265 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
1266 (VM_MAYSHARE | VM_MAYWRITE))
1267 vma->vm_ops = &gfs2_vm_ops_sharewrite;
1268 else
1269 vma->vm_ops = &gfs2_vm_ops_private;
1270 }
1271
1272 gfs2_glock_dq_uninit(&i_gh);
1273
1274 return error;
1275}
1276
1277/**
1278 * gfs2_open - open a file
1279 * @inode: the inode to open
1280 * @file: the struct file for this opening
1281 *
1282 * Returns: errno
1283 */
1284
1285static int gfs2_open(struct inode *inode, struct file *file)
1286{
1287 struct gfs2_inode *ip = get_v2ip(inode);
1288 struct gfs2_holder i_gh;
1289 struct gfs2_file *fp;
1290 int error;
1291
1292 atomic_inc(&ip->i_sbd->sd_ops_file);
1293
1294 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
1295 if (!fp)
1296 return -ENOMEM;
1297
1298 init_MUTEX(&fp->f_fl_mutex);
1299
1300 fp->f_inode = ip;
1301 fp->f_vfile = file;
1302
1303 gfs2_assert_warn(ip->i_sbd, !get_v2fp(file));
1304 set_v2fp(file, fp);
1305
1306 if (S_ISREG(ip->i_di.di_mode)) {
1307 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
1308 &i_gh);
1309 if (error)
1310 goto fail;
1311
1312 if (!(file->f_flags & O_LARGEFILE) &&
1313 ip->i_di.di_size > MAX_NON_LFS) {
1314 error = -EFBIG;
1315 goto fail_gunlock;
1316 }
1317
1318 /* Listen to the Direct I/O flag */
1319
1320 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
1321 file->f_flags |= O_DIRECT;
1322
1323 /* Don't let the user open O_DIRECT on a jdata file */
1324
1325 if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) {
1326 error = -EINVAL;
1327 goto fail_gunlock;
1328 }
1329
1330 gfs2_glock_dq_uninit(&i_gh);
1331 }
1332
1333 return 0;
1334
1335 fail_gunlock:
1336 gfs2_glock_dq_uninit(&i_gh);
1337
1338 fail:
1339 set_v2fp(file, NULL);
1340 kfree(fp);
1341
1342 return error;
1343}
1344
1345/**
1346 * gfs2_close - called to close a struct file
1347 * @inode: the inode the struct file belongs to
1348 * @file: the struct file being closed
1349 *
1350 * Returns: errno
1351 */
1352
1353static int gfs2_close(struct inode *inode, struct file *file)
1354{
1355 struct gfs2_sbd *sdp = get_v2sdp(inode->i_sb);
1356 struct gfs2_file *fp;
1357
1358 atomic_inc(&sdp->sd_ops_file);
1359
1360 fp = get_v2fp(file);
1361 set_v2fp(file, NULL);
1362
1363 if (gfs2_assert_warn(sdp, fp))
1364 return -EIO;
1365
1366 kfree(fp);
1367
1368 return 0;
1369}
1370
1371/**
1372 * gfs2_fsync - sync the dirty data for a file (across the cluster)
1373 * @file: the file that points to the dentry (we ignore this)
1374 * @dentry: the dentry that points to the inode to sync
1375 *
1376 * Returns: errno
1377 */
1378
1379static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
1380{
1381 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
1382
1383 atomic_inc(&ip->i_sbd->sd_ops_file);
1384 gfs2_log_flush_glock(ip->i_gl);
1385
1386 return 0;
1387}
1388
1389/**
1390 * gfs2_lock - acquire/release a posix lock on a file
1391 * @file: the file pointer
1392 * @cmd: either modify or retrieve lock state, possibly wait
1393 * @fl: type and range of lock
1394 *
1395 * Returns: errno
1396 */
1397
1398static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
1399{
1400 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
1401 struct gfs2_sbd *sdp = ip->i_sbd;
1402 struct lm_lockname name =
1403 { .ln_number = ip->i_num.no_addr,
1404 .ln_type = LM_TYPE_PLOCK };
1405
1406 atomic_inc(&sdp->sd_ops_file);
1407
1408 if (!(fl->fl_flags & FL_POSIX))
1409 return -ENOLCK;
1410 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
1411 return -ENOLCK;
1412
1413 if (sdp->sd_args.ar_localflocks) {
1414 if (IS_GETLK(cmd)) {
1415 struct file_lock *tmp;
1416 lock_kernel();
1417 tmp = posix_test_lock(file, fl);
1418 fl->fl_type = F_UNLCK;
1419 if (tmp)
1420 memcpy(fl, tmp, sizeof(struct file_lock));
1421 unlock_kernel();
1422 return 0;
1423 } else {
1424 int error;
1425 lock_kernel();
1426 error = posix_lock_file_wait(file, fl);
1427 unlock_kernel();
1428 return error;
1429 }
1430 }
1431
1432 if (IS_GETLK(cmd))
1433 return gfs2_lm_plock_get(sdp, &name, file, fl);
1434 else if (fl->fl_type == F_UNLCK)
1435 return gfs2_lm_punlock(sdp, &name, file, fl);
1436 else
1437 return gfs2_lm_plock(sdp, &name, file, cmd, fl);
1438}
1439
1440/**
1441 * gfs2_sendfile - Send bytes to a file or socket
1442 * @in_file: The file to read from
1443 * @out_file: The file to write to
1444 * @count: The amount of data
1445 * @offset: The beginning file offset
1446 *
1447 * Outputs: offset - updated according to number of bytes read
1448 *
1449 * Returns: The number of bytes sent, errno on failure
1450 */
1451
1452static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count,
1453 read_actor_t actor, void *target)
1454{
1455 struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host);
1456 struct gfs2_holder gh;
1457 ssize_t retval;
1458
1459 atomic_inc(&ip->i_sbd->sd_ops_file);
1460
1461 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
1462
1463 retval = gfs2_glock_nq_atime(&gh);
1464 if (retval)
1465 goto out;
1466
1467 if (gfs2_is_jdata(ip))
1468 retval = -EOPNOTSUPP;
1469 else
1470 retval = generic_file_sendfile(in_file, offset, count, actor,
1471 target);
1472
1473 gfs2_glock_dq(&gh);
1474
1475 out:
1476 gfs2_holder_uninit(&gh);
1477
1478 return retval;
1479}
1480
1481static int do_flock(struct file *file, int cmd, struct file_lock *fl)
1482{
1483 struct gfs2_file *fp = get_v2fp(file);
1484 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
1485 struct gfs2_inode *ip = fp->f_inode;
1486 struct gfs2_glock *gl;
1487 unsigned int state;
1488 int flags;
1489 int error = 0;
1490
1491 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
1492 flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
1493
1494 down(&fp->f_fl_mutex);
1495
1496 gl = fl_gh->gh_gl;
1497 if (gl) {
1498 if (fl_gh->gh_state == state)
1499 goto out;
1500 gfs2_glock_hold(gl);
1501 flock_lock_file_wait(file,
1502 &(struct file_lock){.fl_type = F_UNLCK});
1503 gfs2_glock_dq_uninit(fl_gh);
1504 } else {
1505 error = gfs2_glock_get(ip->i_sbd,
1506 ip->i_num.no_addr, &gfs2_flock_glops,
1507 CREATE, &gl);
1508 if (error)
1509 goto out;
1510 }
1511
1512 gfs2_holder_init(gl, state, flags, fl_gh);
1513 gfs2_glock_put(gl);
1514
1515 error = gfs2_glock_nq(fl_gh);
1516 if (error) {
1517 gfs2_holder_uninit(fl_gh);
1518 if (error == GLR_TRYFAILED)
1519 error = -EAGAIN;
1520 } else {
1521 error = flock_lock_file_wait(file, fl);
1522 gfs2_assert_warn(ip->i_sbd, !error);
1523 }
1524
1525 out:
1526 up(&fp->f_fl_mutex);
1527
1528 return error;
1529}
1530
1531static void do_unflock(struct file *file, struct file_lock *fl)
1532{
1533 struct gfs2_file *fp = get_v2fp(file);
1534 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
1535
1536 down(&fp->f_fl_mutex);
1537 flock_lock_file_wait(file, fl);
1538 if (fl_gh->gh_gl)
1539 gfs2_glock_dq_uninit(fl_gh);
1540 up(&fp->f_fl_mutex);
1541}
1542
1543/**
1544 * gfs2_flock - acquire/release a flock lock on a file
1545 * @file: the file pointer
1546 * @cmd: either modify or retrieve lock state, possibly wait
1547 * @fl: type and range of lock
1548 *
1549 * Returns: errno
1550 */
1551
1552static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1553{
1554 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
1555 struct gfs2_sbd *sdp = ip->i_sbd;
1556
1557 atomic_inc(&ip->i_sbd->sd_ops_file);
1558
1559 if (!(fl->fl_flags & FL_FLOCK))
1560 return -ENOLCK;
1561 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
1562 return -ENOLCK;
1563
1564 if (sdp->sd_args.ar_localflocks)
1565 return flock_lock_file_wait(file, fl);
1566
1567 if (fl->fl_type == F_UNLCK) {
1568 do_unflock(file, fl);
1569 return 0;
1570 } else
1571 return do_flock(file, cmd, fl);
1572}
1573
1574struct file_operations gfs2_file_fops = {
1575 .llseek = gfs2_llseek,
1576 .read = gfs2_read,
1577 .write = gfs2_write,
1578 .ioctl = gfs2_ioctl,
1579 .mmap = gfs2_mmap,
1580 .open = gfs2_open,
1581 .release = gfs2_close,
1582 .fsync = gfs2_fsync,
1583 .lock = gfs2_lock,
1584 .sendfile = gfs2_sendfile,
1585 .flock = gfs2_flock,
1586};
1587
1588struct file_operations gfs2_dir_fops = {
1589 .readdir = gfs2_readdir,
1590 .ioctl = gfs2_ioctl,
1591 .open = gfs2_open,
1592 .release = gfs2_close,
1593 .fsync = gfs2_fsync,
1594 .lock = gfs2_lock,
1595 .flock = gfs2_flock,
1596};
1597
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
new file mode 100644
index 000000000000..95123d7bbcdf
--- /dev/null
+++ b/fs/gfs2/ops_file.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_FILE_DOT_H__
11#define __OPS_FILE_DOT_H__
12
13extern struct file_operations gfs2_file_fops;
14extern struct file_operations gfs2_dir_fops;
15
16#endif /* __OPS_FILE_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
new file mode 100644
index 000000000000..c61a80c439a6
--- /dev/null
+++ b/fs/gfs2/ops_fstype.c
@@ -0,0 +1,879 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/vmalloc.h>
16#include <linux/blkdev.h>
17#include <linux/kthread.h>
18#include <asm/semaphore.h>
19
20#include "gfs2.h"
21#include "daemon.h"
22#include "glock.h"
23#include "glops.h"
24#include "inode.h"
25#include "lm.h"
26#include "mount.h"
27#include "ops_export.h"
28#include "ops_fstype.h"
29#include "ops_super.h"
30#include "recovery.h"
31#include "rgrp.h"
32#include "super.h"
33#include "unlinked.h"
34#include "sys.h"
35
36#define DO 0
37#define UNDO 1
38
39static struct gfs2_sbd *init_sbd(struct super_block *sb)
40{
41 struct gfs2_sbd *sdp;
42 unsigned int x;
43
44 sdp = vmalloc(sizeof(struct gfs2_sbd));
45 if (!sdp)
46 return NULL;
47
48 memset(sdp, 0, sizeof(struct gfs2_sbd));
49
50 set_v2sdp(sb, sdp);
51 sdp->sd_vfs = sb;
52
53 gfs2_tune_init(&sdp->sd_tune);
54
55 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
56 sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED;
57 INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list);
58 }
59 INIT_LIST_HEAD(&sdp->sd_reclaim_list);
60 spin_lock_init(&sdp->sd_reclaim_lock);
61 init_waitqueue_head(&sdp->sd_reclaim_wq);
62 init_MUTEX(&sdp->sd_invalidate_inodes_mutex);
63
64 init_MUTEX(&sdp->sd_inum_mutex);
65 spin_lock_init(&sdp->sd_statfs_spin);
66 init_MUTEX(&sdp->sd_statfs_mutex);
67
68 spin_lock_init(&sdp->sd_rindex_spin);
69 init_MUTEX(&sdp->sd_rindex_mutex);
70 INIT_LIST_HEAD(&sdp->sd_rindex_list);
71 INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
72 INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
73
74 INIT_LIST_HEAD(&sdp->sd_jindex_list);
75 spin_lock_init(&sdp->sd_jindex_spin);
76 init_MUTEX(&sdp->sd_jindex_mutex);
77
78 INIT_LIST_HEAD(&sdp->sd_unlinked_list);
79 spin_lock_init(&sdp->sd_unlinked_spin);
80 init_MUTEX(&sdp->sd_unlinked_mutex);
81
82 INIT_LIST_HEAD(&sdp->sd_quota_list);
83 spin_lock_init(&sdp->sd_quota_spin);
84 init_MUTEX(&sdp->sd_quota_mutex);
85
86 spin_lock_init(&sdp->sd_log_lock);
87 init_waitqueue_head(&sdp->sd_log_trans_wq);
88 init_waitqueue_head(&sdp->sd_log_flush_wq);
89
90 INIT_LIST_HEAD(&sdp->sd_log_le_gl);
91 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
92 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
93 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
94 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
95
96 INIT_LIST_HEAD(&sdp->sd_log_blks_list);
97 init_waitqueue_head(&sdp->sd_log_blks_wait);
98
99 INIT_LIST_HEAD(&sdp->sd_ail1_list);
100 INIT_LIST_HEAD(&sdp->sd_ail2_list);
101
102 init_MUTEX(&sdp->sd_log_flush_lock);
103 INIT_LIST_HEAD(&sdp->sd_log_flush_list);
104
105 INIT_LIST_HEAD(&sdp->sd_revoke_list);
106
107 init_MUTEX(&sdp->sd_freeze_lock);
108
109 return sdp;
110}
111
112static void init_vfs(struct gfs2_sbd *sdp)
113{
114 struct super_block *sb = sdp->sd_vfs;
115
116 sb->s_magic = GFS2_MAGIC;
117 sb->s_op = &gfs2_super_ops;
118 sb->s_export_op = &gfs2_export_ops;
119 sb->s_maxbytes = MAX_LFS_FILESIZE;
120
121 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
122 set_bit(SDF_NOATIME, &sdp->sd_flags);
123
124 /* Don't let the VFS update atimes. GFS2 handles this itself. */
125 sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
126
127 /* Set up the buffer cache and fill in some fake block size values
128 to allow us to read-in the on-disk superblock. */
129 sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
130 sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
131 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT;
132 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
133}
134
135static int init_names(struct gfs2_sbd *sdp, int silent)
136{
137 struct gfs2_sb *sb = NULL;
138 char *proto, *table;
139 int error = 0;
140
141 proto = sdp->sd_args.ar_lockproto;
142 table = sdp->sd_args.ar_locktable;
143
144 /* Try to autodetect */
145
146 if (!proto[0] || !table[0]) {
147 struct buffer_head *bh;
148 bh = sb_getblk(sdp->sd_vfs,
149 GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
150 lock_buffer(bh);
151 clear_buffer_uptodate(bh);
152 clear_buffer_dirty(bh);
153 unlock_buffer(bh);
154 ll_rw_block(READ, 1, &bh);
155 wait_on_buffer(bh);
156
157 if (!buffer_uptodate(bh)) {
158 brelse(bh);
159 return -EIO;
160 }
161
162 sb = kmalloc(sizeof(struct gfs2_sb), GFP_KERNEL);
163 if (!sb) {
164 brelse(bh);
165 return -ENOMEM;
166 }
167 gfs2_sb_in(sb, bh->b_data);
168 brelse(bh);
169
170 error = gfs2_check_sb(sdp, sb, silent);
171 if (error)
172 goto out;
173
174 if (!proto[0])
175 proto = sb->sb_lockproto;
176 if (!table[0])
177 table = sb->sb_locktable;
178 }
179
180 if (!table[0])
181 table = sdp->sd_vfs->s_id;
182
183 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
184 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
185
186 out:
187 kfree(sb);
188
189 return error;
190}
191
192static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
193 int undo)
194{
195 struct task_struct *p;
196 int error = 0;
197
198 if (undo)
199 goto fail_trans;
200
201 p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
202 error = IS_ERR(p);
203 if (error) {
204 fs_err(sdp, "can't start scand thread: %d\n", error);
205 return error;
206 }
207 sdp->sd_scand_process = p;
208
209 for (sdp->sd_glockd_num = 0;
210 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
211 sdp->sd_glockd_num++) {
212 p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
213 error = IS_ERR(p);
214 if (error) {
215 fs_err(sdp, "can't start glockd thread: %d\n", error);
216 goto fail;
217 }
218 sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
219 }
220
221 error = gfs2_glock_nq_num(sdp,
222 GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
223 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
224 mount_gh);
225 if (error) {
226 fs_err(sdp, "can't acquire mount glock: %d\n", error);
227 goto fail;
228 }
229
230 error = gfs2_glock_nq_num(sdp,
231 GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
232 LM_ST_SHARED,
233 LM_FLAG_NOEXP | GL_EXACT | GL_NEVER_RECURSE,
234 &sdp->sd_live_gh);
235 if (error) {
236 fs_err(sdp, "can't acquire live glock: %d\n", error);
237 goto fail_mount;
238 }
239
240 error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
241 CREATE, &sdp->sd_rename_gl);
242 if (error) {
243 fs_err(sdp, "can't create rename glock: %d\n", error);
244 goto fail_live;
245 }
246
247 error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
248 CREATE, &sdp->sd_trans_gl);
249 if (error) {
250 fs_err(sdp, "can't create transaction glock: %d\n", error);
251 goto fail_rename;
252 }
253 set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
254
255 return 0;
256
257 fail_trans:
258 gfs2_glock_put(sdp->sd_trans_gl);
259
260 fail_rename:
261 gfs2_glock_put(sdp->sd_rename_gl);
262
263 fail_live:
264 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
265
266 fail_mount:
267 gfs2_glock_dq_uninit(mount_gh);
268
269 fail:
270 while (sdp->sd_glockd_num--)
271 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
272
273 kthread_stop(sdp->sd_scand_process);
274
275 return error;
276}
277
278static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
279{
280 struct super_block *sb = sdp->sd_vfs;
281 struct gfs2_holder sb_gh;
282 int error = 0;
283
284 if (undo) {
285 gfs2_inode_put(sdp->sd_master_dir);
286 return 0;
287 }
288
289 error = gfs2_glock_nq_num(sdp,
290 GFS2_SB_LOCK, &gfs2_meta_glops,
291 LM_ST_SHARED, 0, &sb_gh);
292 if (error) {
293 fs_err(sdp, "can't acquire superblock glock: %d\n", error);
294 return error;
295 }
296
297 error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
298 if (error) {
299 fs_err(sdp, "can't read superblock: %d\n", error);
300 goto out;
301 }
302
303 /* Set up the buffer cache and SB for real */
304 error = -EINVAL;
305 if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
306 fs_err(sdp, "FS block size (%u) is too small for device "
307 "block size (%u)\n",
308 sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
309 goto out;
310 }
311 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
312 fs_err(sdp, "FS block size (%u) is too big for machine "
313 "page size (%u)\n",
314 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
315 goto out;
316 }
317
318 /* Get rid of buffers from the original block size */
319 sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA);
320 sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift;
321
322 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
323
324 error = gfs2_lookup_master_dir(sdp);
325 if (error)
326 fs_err(sdp, "can't read in master directory: %d\n", error);
327
328 out:
329 gfs2_glock_dq_uninit(&sb_gh);
330
331 return error;
332}
333
334static int init_journal(struct gfs2_sbd *sdp, int undo)
335{
336 struct gfs2_holder ji_gh;
337 struct task_struct *p;
338 int jindex = 1;
339 int error = 0;
340
341 if (undo) {
342 jindex = 0;
343 goto fail_recoverd;
344 }
345
346 error = gfs2_lookup_simple(sdp->sd_master_dir, "jindex",
347 &sdp->sd_jindex);
348 if (error) {
349 fs_err(sdp, "can't lookup journal index: %d\n", error);
350 return error;
351 }
352 set_bit(GLF_STICKY, &sdp->sd_jindex->i_gl->gl_flags);
353
354 /* Load in the journal index special file */
355
356 error = gfs2_jindex_hold(sdp, &ji_gh);
357 if (error) {
358 fs_err(sdp, "can't read journal index: %d\n", error);
359 goto fail;
360 }
361
362 error = -EINVAL;
363 if (!gfs2_jindex_size(sdp)) {
364 fs_err(sdp, "no journals!\n");
365 goto fail_jindex;
366 }
367
368 if (sdp->sd_args.ar_spectator) {
369 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
370 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
371 } else {
372 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
373 fs_err(sdp, "can't mount journal #%u\n",
374 sdp->sd_lockstruct.ls_jid);
375 fs_err(sdp, "there are only %u journals (0 - %u)\n",
376 gfs2_jindex_size(sdp),
377 gfs2_jindex_size(sdp) - 1);
378 goto fail_jindex;
379 }
380 sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
381
382 error = gfs2_glock_nq_num(sdp,
383 sdp->sd_lockstruct.ls_jid,
384 &gfs2_journal_glops,
385 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
386 &sdp->sd_journal_gh);
387 if (error) {
388 fs_err(sdp, "can't acquire journal glock: %d\n", error);
389 goto fail_jindex;
390 }
391
392 error = gfs2_glock_nq_init(sdp->sd_jdesc->jd_inode->i_gl,
393 LM_ST_SHARED,
394 LM_FLAG_NOEXP | GL_EXACT,
395 &sdp->sd_jinode_gh);
396 if (error) {
397 fs_err(sdp, "can't acquire journal inode glock: %d\n",
398 error);
399 goto fail_journal_gh;
400 }
401
402 error = gfs2_jdesc_check(sdp->sd_jdesc);
403 if (error) {
404 fs_err(sdp, "my journal (%u) is bad: %d\n",
405 sdp->sd_jdesc->jd_jid, error);
406 goto fail_jinode_gh;
407 }
408 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
409 }
410
411 if (sdp->sd_lockstruct.ls_first) {
412 unsigned int x;
413 for (x = 0; x < sdp->sd_journals; x++) {
414 error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x),
415 WAIT);
416 if (error) {
417 fs_err(sdp, "error recovering journal %u: %d\n",
418 x, error);
419 goto fail_jinode_gh;
420 }
421 }
422
423 gfs2_lm_others_may_mount(sdp);
424 } else if (!sdp->sd_args.ar_spectator) {
425 error = gfs2_recover_journal(sdp->sd_jdesc, WAIT);
426 if (error) {
427 fs_err(sdp, "error recovering my journal: %d\n", error);
428 goto fail_jinode_gh;
429 }
430 }
431
432 set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
433 gfs2_glock_dq_uninit(&ji_gh);
434 jindex = 0;
435
436 /* Disown my Journal glock */
437
438 sdp->sd_journal_gh.gh_owner = NULL;
439 sdp->sd_jinode_gh.gh_owner = NULL;
440
441 p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
442 error = IS_ERR(p);
443 if (error) {
444 fs_err(sdp, "can't start recoverd thread: %d\n", error);
445 goto fail_jinode_gh;
446 }
447 sdp->sd_recoverd_process = p;
448
449 return 0;
450
451 fail_recoverd:
452 kthread_stop(sdp->sd_recoverd_process);
453
454 fail_jinode_gh:
455 if (!sdp->sd_args.ar_spectator)
456 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
457
458 fail_journal_gh:
459 if (!sdp->sd_args.ar_spectator)
460 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
461
462 fail_jindex:
463 gfs2_jindex_free(sdp);
464 if (jindex)
465 gfs2_glock_dq_uninit(&ji_gh);
466
467 fail:
468 gfs2_inode_put(sdp->sd_jindex);
469
470 return error;
471}
472
473int gfs2_lookup_root(struct gfs2_sbd *sdp)
474{
475 int error;
476 struct gfs2_glock *gl;
477
478 error = gfs2_glock_get(sdp, sdp->sd_sb.sb_root_dir.no_addr,
479 &gfs2_inode_glops, CREATE, &gl);
480 if (!error) {
481 error = gfs2_inode_get(gl, &sdp->sd_sb.sb_root_dir,
482 CREATE, &sdp->sd_root_dir);
483 if (!error)
484 gfs2_inode_min_init(sdp->sd_root_dir, DT_DIR);
485 gfs2_glock_put(gl);
486 }
487
488 return error;
489}
490
491
492static int init_inodes(struct gfs2_sbd *sdp, int undo)
493{
494 struct inode *inode;
495 struct dentry **dentry = &sdp->sd_vfs->s_root;
496 int error = 0;
497
498 if (undo)
499 goto fail_dput;
500
501 /* Read in the master inode number inode */
502 error = gfs2_lookup_simple(sdp->sd_master_dir, "inum",
503 &sdp->sd_inum_inode);
504 if (error) {
505 fs_err(sdp, "can't read in inum inode: %d\n", error);
506 return error;
507 }
508
509 /* Read in the master statfs inode */
510 error = gfs2_lookup_simple(sdp->sd_master_dir, "statfs",
511 &sdp->sd_statfs_inode);
512 if (error) {
513 fs_err(sdp, "can't read in statfs inode: %d\n", error);
514 goto fail;
515 }
516
517 /* Read in the resource index inode */
518 error = gfs2_lookup_simple(sdp->sd_master_dir, "rindex",
519 &sdp->sd_rindex);
520 if (error) {
521 fs_err(sdp, "can't get resource index inode: %d\n", error);
522 goto fail_statfs;
523 }
524 set_bit(GLF_STICKY, &sdp->sd_rindex->i_gl->gl_flags);
525 sdp->sd_rindex_vn = sdp->sd_rindex->i_gl->gl_vn - 1;
526
527 /* Read in the quota inode */
528 error = gfs2_lookup_simple(sdp->sd_master_dir, "quota",
529 &sdp->sd_quota_inode);
530 if (error) {
531 fs_err(sdp, "can't get quota file inode: %d\n", error);
532 goto fail_rindex;
533 }
534
535 /* Get the root inode */
536 error = gfs2_lookup_root(sdp);
537 if (error) {
538 fs_err(sdp, "can't read in root inode: %d\n", error);
539 goto fail_qinode;
540 }
541
542 /* Get the root inode/dentry */
543 inode = gfs2_ip2v(sdp->sd_root_dir);
544 if (!inode) {
545 fs_err(sdp, "can't get root inode\n");
546 error = -ENOMEM;
547 goto fail_rooti;
548 }
549
550 *dentry = d_alloc_root(inode);
551 if (!*dentry) {
552 iput(inode);
553 fs_err(sdp, "can't get root dentry\n");
554 error = -ENOMEM;
555 goto fail_rooti;
556 }
557
558 return 0;
559
560 fail_dput:
561 dput(*dentry);
562 *dentry = NULL;
563
564 fail_rooti:
565 gfs2_inode_put(sdp->sd_root_dir);
566
567 fail_qinode:
568 gfs2_inode_put(sdp->sd_quota_inode);
569
570 fail_rindex:
571 gfs2_clear_rgrpd(sdp);
572 gfs2_inode_put(sdp->sd_rindex);
573
574 fail_statfs:
575 gfs2_inode_put(sdp->sd_statfs_inode);
576
577 fail:
578 gfs2_inode_put(sdp->sd_inum_inode);
579
580 return error;
581}
582
583static int init_per_node(struct gfs2_sbd *sdp, int undo)
584{
585 struct gfs2_inode *pn = NULL;
586 char buf[30];
587 int error = 0;
588
589 if (sdp->sd_args.ar_spectator)
590 return 0;
591
592 if (undo)
593 goto fail_qc_gh;
594
595 error = gfs2_lookup_simple(sdp->sd_master_dir, "per_node", &pn);
596 if (error) {
597 fs_err(sdp, "can't find per_node directory: %d\n", error);
598 return error;
599 }
600
601 sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
602 error = gfs2_lookup_simple(pn, buf, &sdp->sd_ir_inode);
603 if (error) {
604 fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
605 goto fail;
606 }
607
608 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
609 error = gfs2_lookup_simple(pn, buf, &sdp->sd_sc_inode);
610 if (error) {
611 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
612 goto fail_ir_i;
613 }
614
615 sprintf(buf, "unlinked_tag%u", sdp->sd_jdesc->jd_jid);
616 error = gfs2_lookup_simple(pn, buf, &sdp->sd_ut_inode);
617 if (error) {
618 fs_err(sdp, "can't find local \"ut\" file: %d\n", error);
619 goto fail_sc_i;
620 }
621
622 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
623 error = gfs2_lookup_simple(pn, buf, &sdp->sd_qc_inode);
624 if (error) {
625 fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
626 goto fail_ut_i;
627 }
628
629 gfs2_inode_put(pn);
630 pn = NULL;
631
632 error = gfs2_glock_nq_init(sdp->sd_ir_inode->i_gl,
633 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
634 &sdp->sd_ir_gh);
635 if (error) {
636 fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
637 goto fail_qc_i;
638 }
639
640 error = gfs2_glock_nq_init(sdp->sd_sc_inode->i_gl,
641 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
642 &sdp->sd_sc_gh);
643 if (error) {
644 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
645 goto fail_ir_gh;
646 }
647
648 error = gfs2_glock_nq_init(sdp->sd_ut_inode->i_gl,
649 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
650 &sdp->sd_ut_gh);
651 if (error) {
652 fs_err(sdp, "can't lock local \"ut\" file: %d\n", error);
653 goto fail_sc_gh;
654 }
655
656 error = gfs2_glock_nq_init(sdp->sd_qc_inode->i_gl,
657 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
658 &sdp->sd_qc_gh);
659 if (error) {
660 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
661 goto fail_ut_gh;
662 }
663
664 return 0;
665
666 fail_qc_gh:
667 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
668
669 fail_ut_gh:
670 gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
671
672 fail_sc_gh:
673 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
674
675 fail_ir_gh:
676 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
677
678 fail_qc_i:
679 gfs2_inode_put(sdp->sd_qc_inode);
680
681 fail_ut_i:
682 gfs2_inode_put(sdp->sd_ut_inode);
683
684 fail_sc_i:
685 gfs2_inode_put(sdp->sd_sc_inode);
686
687 fail_ir_i:
688 gfs2_inode_put(sdp->sd_ir_inode);
689
690 fail:
691 if (pn)
692 gfs2_inode_put(pn);
693 return error;
694}
695
696static int init_threads(struct gfs2_sbd *sdp, int undo)
697{
698 struct task_struct *p;
699 int error = 0;
700
701 if (undo)
702 goto fail_inoded;
703
704 sdp->sd_log_flush_time = jiffies;
705 sdp->sd_jindex_refresh_time = jiffies;
706
707 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
708 error = IS_ERR(p);
709 if (error) {
710 fs_err(sdp, "can't start logd thread: %d\n", error);
711 return error;
712 }
713 sdp->sd_logd_process = p;
714
715 sdp->sd_statfs_sync_time = jiffies;
716 sdp->sd_quota_sync_time = jiffies;
717
718 p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
719 error = IS_ERR(p);
720 if (error) {
721 fs_err(sdp, "can't start quotad thread: %d\n", error);
722 goto fail;
723 }
724 sdp->sd_quotad_process = p;
725
726 p = kthread_run(gfs2_inoded, sdp, "gfs2_inoded");
727 error = IS_ERR(p);
728 if (error) {
729 fs_err(sdp, "can't start inoded thread: %d\n", error);
730 goto fail_quotad;
731 }
732 sdp->sd_inoded_process = p;
733
734 return 0;
735
736 fail_inoded:
737 kthread_stop(sdp->sd_inoded_process);
738
739 fail_quotad:
740 kthread_stop(sdp->sd_quotad_process);
741
742 fail:
743 kthread_stop(sdp->sd_logd_process);
744
745 return error;
746}
747
748/**
749 * fill_super - Read in superblock
750 * @sb: The VFS superblock
751 * @data: Mount options
752 * @silent: Don't complain if it's not a GFS2 filesystem
753 *
754 * Returns: errno
755 */
756
757static int fill_super(struct super_block *sb, void *data, int silent)
758{
759 struct gfs2_sbd *sdp;
760 struct gfs2_holder mount_gh;
761 int error;
762
763 sdp = init_sbd(sb);
764 if (!sdp) {
765 printk("GFS2: can't alloc struct gfs2_sbd\n");
766 return -ENOMEM;
767 }
768
769 error = gfs2_mount_args(sdp, (char *)data, 0);
770 if (error) {
771 printk("GFS2: can't parse mount arguments\n");
772 goto fail;
773 }
774
775 init_vfs(sdp);
776
777 error = init_names(sdp, silent);
778 if (error)
779 goto fail;
780
781 error = gfs2_sys_fs_add(sdp);
782 if (error)
783 goto fail;
784
785 error = gfs2_lm_mount(sdp, silent);
786 if (error)
787 goto fail_sys;
788
789 error = init_locking(sdp, &mount_gh, DO);
790 if (error)
791 goto fail_lm;
792
793 error = init_sb(sdp, silent, DO);
794 if (error)
795 goto fail_locking;
796
797 error = init_journal(sdp, DO);
798 if (error)
799 goto fail_sb;
800
801 error = init_inodes(sdp, DO);
802 if (error)
803 goto fail_journals;
804
805 error = init_per_node(sdp, DO);
806 if (error)
807 goto fail_inodes;
808
809 error = gfs2_statfs_init(sdp);
810 if (error) {
811 fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
812 goto fail_per_node;
813 }
814
815 error = init_threads(sdp, DO);
816 if (error)
817 goto fail_per_node;
818
819 if (!(sb->s_flags & MS_RDONLY)) {
820 error = gfs2_make_fs_rw(sdp);
821 if (error) {
822 fs_err(sdp, "can't make FS RW: %d\n", error);
823 goto fail_threads;
824 }
825 }
826
827 gfs2_glock_dq_uninit(&mount_gh);
828
829 return 0;
830
831 fail_threads:
832 init_threads(sdp, UNDO);
833
834 fail_per_node:
835 init_per_node(sdp, UNDO);
836
837 fail_inodes:
838 init_inodes(sdp, UNDO);
839
840 fail_journals:
841 init_journal(sdp, UNDO);
842
843 fail_sb:
844 init_sb(sdp, 0, UNDO);
845
846 fail_locking:
847 init_locking(sdp, &mount_gh, UNDO);
848
849 fail_lm:
850 gfs2_gl_hash_clear(sdp, WAIT);
851 gfs2_lm_unmount(sdp);
852 while (invalidate_inodes(sb))
853 yield();
854
855 fail_sys:
856 gfs2_sys_fs_del(sdp);
857
858 fail:
859 vfree(sdp);
860 set_v2sdp(sb, NULL);
861
862 return error;
863}
864
865static struct super_block *gfs2_get_sb(struct file_system_type *fs_type,
866 int flags, const char *dev_name,
867 void *data)
868{
869 return get_sb_bdev(fs_type, flags, dev_name, data, fill_super);
870}
871
872struct file_system_type gfs2_fs_type = {
873 .name = "gfs2",
874 .fs_flags = FS_REQUIRES_DEV,
875 .get_sb = gfs2_get_sb,
876 .kill_sb = kill_block_super,
877 .owner = THIS_MODULE,
878};
879
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
new file mode 100644
index 000000000000..7008364e76ea
--- /dev/null
+++ b/fs/gfs2/ops_fstype.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_FSTYPE_DOT_H__
11#define __OPS_FSTYPE_DOT_H__
12
13extern struct file_system_type gfs2_fs_type;
14
15#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
new file mode 100644
index 000000000000..d0f90b88380c
--- /dev/null
+++ b/fs/gfs2/ops_inode.c
@@ -0,0 +1,1265 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/namei.h>
16#include <linux/utsname.h>
17#include <linux/mm.h>
18#include <linux/xattr.h>
19#include <linux/posix_acl.h>
20#include <asm/semaphore.h>
21#include <asm/uaccess.h>
22
23#include "gfs2.h"
24#include "acl.h"
25#include "bmap.h"
26#include "dir.h"
27#include "eaops.h"
28#include "eattr.h"
29#include "glock.h"
30#include "inode.h"
31#include "meta_io.h"
32#include "ops_dentry.h"
33#include "ops_inode.h"
34#include "page.h"
35#include "quota.h"
36#include "rgrp.h"
37#include "trans.h"
38#include "unlinked.h"
39
40/**
41 * gfs2_create - Create a file
42 * @dir: The directory in which to create the file
43 * @dentry: The dentry of the new file
44 * @mode: The mode of the new file
45 *
46 * Returns: errno
47 */
48
49static int gfs2_create(struct inode *dir, struct dentry *dentry,
50 int mode, struct nameidata *nd)
51{
52 struct gfs2_inode *dip = get_v2ip(dir), *ip;
53 struct gfs2_sbd *sdp = dip->i_sbd;
54 struct gfs2_holder ghs[2];
55 struct inode *inode;
56 int new = 1;
57 int error;
58
59 atomic_inc(&sdp->sd_ops_inode);
60
61 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
62
63 for (;;) {
64 error = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
65 if (!error) {
66 ip = get_gl2ip(ghs[1].gh_gl);
67 gfs2_trans_end(sdp);
68 if (dip->i_alloc.al_rgd)
69 gfs2_inplace_release(dip);
70 gfs2_quota_unlock(dip);
71 gfs2_alloc_put(dip);
72 gfs2_glock_dq_uninit_m(2, ghs);
73 break;
74 } else if (error != -EEXIST ||
75 (nd->intent.open.flags & O_EXCL)) {
76 gfs2_holder_uninit(ghs);
77 return error;
78 }
79
80 error = gfs2_lookupi(dip, &dentry->d_name, 0, &ip);
81 if (!error) {
82 new = 0;
83 gfs2_holder_uninit(ghs);
84 break;
85 } else if (error != -ENOENT) {
86 gfs2_holder_uninit(ghs);
87 return error;
88 }
89 }
90
91 inode = gfs2_ip2v(ip);
92 gfs2_inode_put(ip);
93
94 if (!inode)
95 return -ENOMEM;
96
97 d_instantiate(dentry, inode);
98 if (new)
99 mark_inode_dirty(inode);
100
101 return 0;
102}
103
104/**
105 * gfs2_lookup - Look up a filename in a directory and return its inode
106 * @dir: The directory inode
107 * @dentry: The dentry of the new inode
108 * @nd: passed from Linux VFS, ignored by us
109 *
110 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
111 *
112 * Returns: errno
113 */
114
115static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
116 struct nameidata *nd)
117{
118 struct gfs2_inode *dip = get_v2ip(dir), *ip;
119 struct gfs2_sbd *sdp = dip->i_sbd;
120 struct inode *inode = NULL;
121 int error;
122
123 atomic_inc(&sdp->sd_ops_inode);
124
125 if (!sdp->sd_args.ar_localcaching)
126 dentry->d_op = &gfs2_dops;
127
128 error = gfs2_lookupi(dip, &dentry->d_name, 0, &ip);
129 if (!error) {
130 inode = gfs2_ip2v(ip);
131 gfs2_inode_put(ip);
132 if (!inode)
133 return ERR_PTR(-ENOMEM);
134
135 } else if (error != -ENOENT)
136 return ERR_PTR(error);
137
138 if (inode)
139 return d_splice_alias(inode, dentry);
140 d_add(dentry, inode);
141
142 return NULL;
143}
144
145/**
146 * gfs2_link - Link to a file
147 * @old_dentry: The inode to link
148 * @dir: Add link to this directory
149 * @dentry: The name of the link
150 *
151 * Link the inode in "old_dentry" into the directory "dir" with the
152 * name in "dentry".
153 *
154 * Returns: errno
155 */
156
157static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
158 struct dentry *dentry)
159{
160 struct gfs2_inode *dip = get_v2ip(dir);
161 struct gfs2_sbd *sdp = dip->i_sbd;
162 struct inode *inode = old_dentry->d_inode;
163 struct gfs2_inode *ip = get_v2ip(inode);
164 struct gfs2_holder ghs[2];
165 int alloc_required;
166 int error;
167
168 atomic_inc(&sdp->sd_ops_inode);
169
170 if (S_ISDIR(ip->i_di.di_mode))
171 return -EPERM;
172
173 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
174 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
175
176 error = gfs2_glock_nq_m(2, ghs);
177 if (error)
178 goto out;
179
180 error = gfs2_repermission(dir, MAY_WRITE | MAY_EXEC, NULL);
181 if (error)
182 goto out_gunlock;
183
184 error = gfs2_dir_search(dip, &dentry->d_name, NULL, NULL);
185 switch (error) {
186 case -ENOENT:
187 break;
188 case 0:
189 error = -EEXIST;
190 default:
191 goto out_gunlock;
192 }
193
194 error = -EINVAL;
195 if (!dip->i_di.di_nlink)
196 goto out_gunlock;
197 error = -EFBIG;
198 if (dip->i_di.di_entries == (uint32_t)-1)
199 goto out_gunlock;
200 error = -EPERM;
201 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
202 goto out_gunlock;
203 error = -EINVAL;
204 if (!ip->i_di.di_nlink)
205 goto out_gunlock;
206 error = -EMLINK;
207 if (ip->i_di.di_nlink == (uint32_t)-1)
208 goto out_gunlock;
209
210 error = gfs2_diradd_alloc_required(dip, &dentry->d_name,
211 &alloc_required);
212 if (error)
213 goto out_gunlock;
214
215 if (alloc_required) {
216 struct gfs2_alloc *al = gfs2_alloc_get(dip);
217
218 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
219 if (error)
220 goto out_alloc;
221
222 error = gfs2_quota_check(dip, dip->i_di.di_uid,
223 dip->i_di.di_gid);
224 if (error)
225 goto out_gunlock_q;
226
227 al->al_requested = sdp->sd_max_dirres;
228
229 error = gfs2_inplace_reserve(dip);
230 if (error)
231 goto out_gunlock_q;
232
233 error = gfs2_trans_begin(sdp,
234 sdp->sd_max_dirres +
235 al->al_rgd->rd_ri.ri_length +
236 2 * RES_DINODE + RES_STATFS +
237 RES_QUOTA, 0);
238 if (error)
239 goto out_ipres;
240 } else {
241 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
242 if (error)
243 goto out_ipres;
244 }
245
246 error = gfs2_dir_add(dip, &dentry->d_name, &ip->i_num,
247 IF2DT(ip->i_di.di_mode));
248 if (error)
249 goto out_end_trans;
250
251 error = gfs2_change_nlink(ip, +1);
252
253 out_end_trans:
254 gfs2_trans_end(sdp);
255
256 out_ipres:
257 if (alloc_required)
258 gfs2_inplace_release(dip);
259
260 out_gunlock_q:
261 if (alloc_required)
262 gfs2_quota_unlock(dip);
263
264 out_alloc:
265 if (alloc_required)
266 gfs2_alloc_put(dip);
267
268 out_gunlock:
269 gfs2_glock_dq_m(2, ghs);
270
271 out:
272 gfs2_holder_uninit(ghs);
273 gfs2_holder_uninit(ghs + 1);
274
275 if (!error) {
276 atomic_inc(&inode->i_count);
277 d_instantiate(dentry, inode);
278 mark_inode_dirty(inode);
279 }
280
281 return error;
282}
283
284/**
285 * gfs2_unlink - Unlink a file
286 * @dir: The inode of the directory containing the file to unlink
287 * @dentry: The file itself
288 *
289 * Unlink a file. Call gfs2_unlinki()
290 *
291 * Returns: errno
292 */
293
294static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
295{
296 struct gfs2_inode *dip = get_v2ip(dir);
297 struct gfs2_sbd *sdp = dip->i_sbd;
298 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
299 struct gfs2_unlinked *ul;
300 struct gfs2_holder ghs[2];
301 int error;
302
303 atomic_inc(&sdp->sd_ops_inode);
304
305 error = gfs2_unlinked_get(sdp, &ul);
306 if (error)
307 return error;
308
309 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
310 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
311
312 error = gfs2_glock_nq_m(2, ghs);
313 if (error)
314 goto out;
315
316 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
317 if (error)
318 goto out_gunlock;
319
320 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF +
321 RES_UNLINKED, 0);
322 if (error)
323 goto out_gunlock;
324
325 error = gfs2_unlinki(dip, &dentry->d_name, ip,ul);
326
327 gfs2_trans_end(sdp);
328
329 out_gunlock:
330 gfs2_glock_dq_m(2, ghs);
331
332 out:
333 gfs2_holder_uninit(ghs);
334 gfs2_holder_uninit(ghs + 1);
335
336 gfs2_unlinked_put(sdp, ul);
337
338 return error;
339}
340
341/**
342 * gfs2_symlink - Create a symlink
343 * @dir: The directory to create the symlink in
344 * @dentry: The dentry to put the symlink in
345 * @symname: The thing which the link points to
346 *
347 * Returns: errno
348 */
349
350static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
351 const char *symname)
352{
353 struct gfs2_inode *dip = get_v2ip(dir), *ip;
354 struct gfs2_sbd *sdp = dip->i_sbd;
355 struct gfs2_holder ghs[2];
356 struct inode *inode;
357 struct buffer_head *dibh;
358 int size;
359 int error;
360
361 atomic_inc(&sdp->sd_ops_inode);
362
363 /* Must be stuffed with a null terminator for gfs2_follow_link() */
364 size = strlen(symname);
365 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
366 return -ENAMETOOLONG;
367
368 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
369
370 error = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
371 if (error) {
372 gfs2_holder_uninit(ghs);
373 return error;
374 }
375
376 ip = get_gl2ip(ghs[1].gh_gl);
377
378 ip->i_di.di_size = size;
379
380 error = gfs2_meta_inode_buffer(ip, &dibh);
381
382 if (!gfs2_assert_withdraw(sdp, !error)) {
383 gfs2_dinode_out(&ip->i_di, dibh->b_data);
384 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
385 size);
386 brelse(dibh);
387 }
388
389 gfs2_trans_end(sdp);
390 if (dip->i_alloc.al_rgd)
391 gfs2_inplace_release(dip);
392 gfs2_quota_unlock(dip);
393 gfs2_alloc_put(dip);
394
395 gfs2_glock_dq_uninit_m(2, ghs);
396
397 inode = gfs2_ip2v(ip);
398 gfs2_inode_put(ip);
399
400 if (!inode)
401 return -ENOMEM;
402
403 d_instantiate(dentry, inode);
404 mark_inode_dirty(inode);
405
406 return 0;
407}
408
409/**
410 * gfs2_mkdir - Make a directory
411 * @dir: The parent directory of the new one
412 * @dentry: The dentry of the new directory
413 * @mode: The mode of the new directory
414 *
415 * Returns: errno
416 */
417
418static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
419{
420 struct gfs2_inode *dip = get_v2ip(dir), *ip;
421 struct gfs2_sbd *sdp = dip->i_sbd;
422 struct gfs2_holder ghs[2];
423 struct inode *inode;
424 struct buffer_head *dibh;
425 int error;
426
427 atomic_inc(&sdp->sd_ops_inode);
428
429 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
430
431 error = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
432 if (error) {
433 gfs2_holder_uninit(ghs);
434 return error;
435 }
436
437 ip = get_gl2ip(ghs[1].gh_gl);
438
439 ip->i_di.di_nlink = 2;
440 ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
441 ip->i_di.di_flags |= GFS2_DIF_JDATA;
442 ip->i_di.di_payload_format = GFS2_FORMAT_DE;
443 ip->i_di.di_entries = 2;
444
445 error = gfs2_meta_inode_buffer(ip, &dibh);
446
447 if (!gfs2_assert_withdraw(sdp, !error)) {
448 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
449 struct gfs2_dirent *dent;
450
451 gfs2_dirent_alloc(ip, dibh, 1, &dent);
452
453 dent->de_inum = di->di_num; /* already GFS2 endian */
454 dent->de_hash = gfs2_disk_hash(".", 1);
455 dent->de_hash = cpu_to_be32(dent->de_hash);
456 dent->de_type = DT_DIR;
457 memcpy((char *) (dent + 1), ".", 1);
458 di->di_entries = cpu_to_be32(1);
459
460 gfs2_dirent_alloc(ip, dibh, 2, &dent);
461
462 gfs2_inum_out(&dip->i_num, (char *) &dent->de_inum);
463 dent->de_hash = gfs2_disk_hash("..", 2);
464 dent->de_hash = cpu_to_be32(dent->de_hash);
465 dent->de_type = DT_DIR;
466 memcpy((char *) (dent + 1), "..", 2);
467
468 gfs2_dinode_out(&ip->i_di, (char *)di);
469
470 brelse(dibh);
471 }
472
473 error = gfs2_change_nlink(dip, +1);
474 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
475
476 gfs2_trans_end(sdp);
477 if (dip->i_alloc.al_rgd)
478 gfs2_inplace_release(dip);
479 gfs2_quota_unlock(dip);
480 gfs2_alloc_put(dip);
481
482 gfs2_glock_dq_uninit_m(2, ghs);
483
484 inode = gfs2_ip2v(ip);
485 gfs2_inode_put(ip);
486
487 if (!inode)
488 return -ENOMEM;
489
490 d_instantiate(dentry, inode);
491 mark_inode_dirty(inode);
492
493 return 0;
494}
495
496/**
497 * gfs2_rmdir - Remove a directory
498 * @dir: The parent directory of the directory to be removed
499 * @dentry: The dentry of the directory to remove
500 *
501 * Remove a directory. Call gfs2_rmdiri()
502 *
503 * Returns: errno
504 */
505
506static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
507{
508 struct gfs2_inode *dip = get_v2ip(dir);
509 struct gfs2_sbd *sdp = dip->i_sbd;
510 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
511 struct gfs2_unlinked *ul;
512 struct gfs2_holder ghs[2];
513 int error;
514
515 atomic_inc(&sdp->sd_ops_inode);
516
517 error = gfs2_unlinked_get(sdp, &ul);
518 if (error)
519 return error;
520
521 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
522 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
523
524 error = gfs2_glock_nq_m(2, ghs);
525 if (error)
526 goto out;
527
528 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
529 if (error)
530 goto out_gunlock;
531
532 if (ip->i_di.di_entries < 2) {
533 if (gfs2_consist_inode(ip))
534 gfs2_dinode_print(&ip->i_di);
535 error = -EIO;
536 goto out_gunlock;
537 }
538 if (ip->i_di.di_entries > 2) {
539 error = -ENOTEMPTY;
540 goto out_gunlock;
541 }
542
543 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF +
544 RES_UNLINKED, 0);
545 if (error)
546 goto out_gunlock;
547
548 error = gfs2_rmdiri(dip, &dentry->d_name, ip, ul);
549
550 gfs2_trans_end(sdp);
551
552 out_gunlock:
553 gfs2_glock_dq_m(2, ghs);
554
555 out:
556 gfs2_holder_uninit(ghs);
557 gfs2_holder_uninit(ghs + 1);
558
559 gfs2_unlinked_put(sdp, ul);
560
561 return error;
562}
563
564/**
565 * gfs2_mknod - Make a special file
566 * @dir: The directory in which the special file will reside
567 * @dentry: The dentry of the special file
568 * @mode: The mode of the special file
569 * @rdev: The device specification of the special file
570 *
571 */
572
573static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
574 dev_t dev)
575{
576 struct gfs2_inode *dip = get_v2ip(dir), *ip;
577 struct gfs2_sbd *sdp = dip->i_sbd;
578 struct gfs2_holder ghs[2];
579 struct inode *inode;
580 struct buffer_head *dibh;
581 uint32_t major = 0, minor = 0;
582 int error;
583
584 atomic_inc(&sdp->sd_ops_inode);
585
586 switch (mode & S_IFMT) {
587 case S_IFBLK:
588 case S_IFCHR:
589 major = MAJOR(dev);
590 minor = MINOR(dev);
591 break;
592 case S_IFIFO:
593 case S_IFSOCK:
594 break;
595 default:
596 return -EOPNOTSUPP;
597 };
598
599 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
600
601 error = gfs2_createi(ghs, &dentry->d_name, mode);
602 if (error) {
603 gfs2_holder_uninit(ghs);
604 return error;
605 }
606
607 ip = get_gl2ip(ghs[1].gh_gl);
608
609 ip->i_di.di_major = major;
610 ip->i_di.di_minor = minor;
611
612 error = gfs2_meta_inode_buffer(ip, &dibh);
613
614 if (!gfs2_assert_withdraw(sdp, !error)) {
615 gfs2_dinode_out(&ip->i_di, dibh->b_data);
616 brelse(dibh);
617 }
618
619 gfs2_trans_end(sdp);
620 if (dip->i_alloc.al_rgd)
621 gfs2_inplace_release(dip);
622 gfs2_quota_unlock(dip);
623 gfs2_alloc_put(dip);
624
625 gfs2_glock_dq_uninit_m(2, ghs);
626
627 inode = gfs2_ip2v(ip);
628 gfs2_inode_put(ip);
629
630 if (!inode)
631 return -ENOMEM;
632
633 d_instantiate(dentry, inode);
634 mark_inode_dirty(inode);
635
636 return 0;
637}
638
639/**
640 * gfs2_rename - Rename a file
641 * @odir: Parent directory of old file name
642 * @odentry: The old dentry of the file
643 * @ndir: Parent directory of new file name
644 * @ndentry: The new dentry of the file
645 *
646 * Returns: errno
647 */
648
649static int gfs2_rename(struct inode *odir, struct dentry *odentry,
650 struct inode *ndir, struct dentry *ndentry)
651{
652 struct gfs2_inode *odip = get_v2ip(odir);
653 struct gfs2_inode *ndip = get_v2ip(ndir);
654 struct gfs2_inode *ip = get_v2ip(odentry->d_inode);
655 struct gfs2_inode *nip = NULL;
656 struct gfs2_sbd *sdp = odip->i_sbd;
657 struct gfs2_unlinked *ul;
658 struct gfs2_holder ghs[4], r_gh;
659 unsigned int num_gh;
660 int dir_rename = 0;
661 int alloc_required;
662 unsigned int x;
663 int error;
664
665 atomic_inc(&sdp->sd_ops_inode);
666
667 if (ndentry->d_inode) {
668 nip = get_v2ip(ndentry->d_inode);
669 if (ip == nip)
670 return 0;
671 }
672
673 error = gfs2_unlinked_get(sdp, &ul);
674 if (error)
675 return error;
676
677 /* Make sure we aren't trying to move a dirctory into it's subdir */
678
679 if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
680 dir_rename = 1;
681
682 error = gfs2_glock_nq_init(sdp->sd_rename_gl,
683 LM_ST_EXCLUSIVE, 0,
684 &r_gh);
685 if (error)
686 goto out;
687
688 error = gfs2_ok_to_move(ip, ndip);
689 if (error)
690 goto out_gunlock_r;
691 }
692
693 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
694 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
695 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
696 num_gh = 3;
697
698 if (nip)
699 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
700
701 error = gfs2_glock_nq_m(num_gh, ghs);
702 if (error)
703 goto out_uninit;
704
705 /* Check out the old directory */
706
707 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
708 if (error)
709 goto out_gunlock;
710
711 /* Check out the new directory */
712
713 if (nip) {
714 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
715 if (error)
716 goto out_gunlock;
717
718 if (S_ISDIR(nip->i_di.di_mode)) {
719 if (nip->i_di.di_entries < 2) {
720 if (gfs2_consist_inode(nip))
721 gfs2_dinode_print(&nip->i_di);
722 error = -EIO;
723 goto out_gunlock;
724 }
725 if (nip->i_di.di_entries > 2) {
726 error = -ENOTEMPTY;
727 goto out_gunlock;
728 }
729 }
730 } else {
731 error = gfs2_repermission(ndir, MAY_WRITE | MAY_EXEC, NULL);
732 if (error)
733 goto out_gunlock;
734
735 error = gfs2_dir_search(ndip, &ndentry->d_name, NULL, NULL);
736 switch (error) {
737 case -ENOENT:
738 error = 0;
739 break;
740 case 0:
741 error = -EEXIST;
742 default:
743 goto out_gunlock;
744 };
745
746 if (odip != ndip) {
747 if (!ndip->i_di.di_nlink) {
748 error = -EINVAL;
749 goto out_gunlock;
750 }
751 if (ndip->i_di.di_entries == (uint32_t)-1) {
752 error = -EFBIG;
753 goto out_gunlock;
754 }
755 if (S_ISDIR(ip->i_di.di_mode) &&
756 ndip->i_di.di_nlink == (uint32_t)-1) {
757 error = -EMLINK;
758 goto out_gunlock;
759 }
760 }
761 }
762
763 /* Check out the dir to be renamed */
764
765 if (dir_rename) {
766 error = gfs2_repermission(odentry->d_inode, MAY_WRITE, NULL);
767 if (error)
768 goto out_gunlock;
769 }
770
771 error = gfs2_diradd_alloc_required(ndip, &ndentry->d_name,
772 &alloc_required);
773 if (error)
774 goto out_gunlock;
775
776 if (alloc_required) {
777 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
778
779 error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
780 if (error)
781 goto out_alloc;
782
783 error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
784 ndip->i_di.di_gid);
785 if (error)
786 goto out_gunlock_q;
787
788 al->al_requested = sdp->sd_max_dirres;
789
790 error = gfs2_inplace_reserve(ndip);
791 if (error)
792 goto out_gunlock_q;
793
794 error = gfs2_trans_begin(sdp,
795 sdp->sd_max_dirres +
796 al->al_rgd->rd_ri.ri_length +
797 4 * RES_DINODE + 4 * RES_LEAF +
798 RES_UNLINKED + RES_STATFS +
799 RES_QUOTA, 0);
800 if (error)
801 goto out_ipreserv;
802 } else {
803 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
804 5 * RES_LEAF +
805 RES_UNLINKED, 0);
806 if (error)
807 goto out_gunlock;
808 }
809
810 /* Remove the target file, if it exists */
811
812 if (nip) {
813 if (S_ISDIR(nip->i_di.di_mode))
814 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip, ul);
815 else
816 error = gfs2_unlinki(ndip, &ndentry->d_name, nip, ul);
817 if (error)
818 goto out_end_trans;
819 }
820
821 if (dir_rename) {
822 struct qstr name;
823 name.len = 2;
824 name.name = "..";
825
826 error = gfs2_change_nlink(ndip, +1);
827 if (error)
828 goto out_end_trans;
829 error = gfs2_change_nlink(odip, -1);
830 if (error)
831 goto out_end_trans;
832
833 error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
834 if (error)
835 goto out_end_trans;
836 } else {
837 struct buffer_head *dibh;
838 error = gfs2_meta_inode_buffer(ip, &dibh);
839 if (error)
840 goto out_end_trans;
841 ip->i_di.di_ctime = get_seconds();
842 gfs2_trans_add_bh(ip->i_gl, dibh);
843 gfs2_dinode_out(&ip->i_di, dibh->b_data);
844 brelse(dibh);
845 }
846
847 error = gfs2_dir_del(odip, &odentry->d_name);
848 if (error)
849 goto out_end_trans;
850
851 error = gfs2_dir_add(ndip, &ndentry->d_name, &ip->i_num,
852 IF2DT(ip->i_di.di_mode));
853 if (error)
854 goto out_end_trans;
855
856 out_end_trans:
857 gfs2_trans_end(sdp);
858
859 out_ipreserv:
860 if (alloc_required)
861 gfs2_inplace_release(ndip);
862
863 out_gunlock_q:
864 if (alloc_required)
865 gfs2_quota_unlock(ndip);
866
867 out_alloc:
868 if (alloc_required)
869 gfs2_alloc_put(ndip);
870
871 out_gunlock:
872 gfs2_glock_dq_m(num_gh, ghs);
873
874 out_uninit:
875 for (x = 0; x < num_gh; x++)
876 gfs2_holder_uninit(ghs + x);
877
878 out_gunlock_r:
879 if (dir_rename)
880 gfs2_glock_dq_uninit(&r_gh);
881
882 out:
883 gfs2_unlinked_put(sdp, ul);
884
885 return error;
886}
887
888/**
889 * gfs2_readlink - Read the value of a symlink
890 * @dentry: the symlink
891 * @buf: the buffer to read the symlink data into
892 * @size: the size of the buffer
893 *
894 * Returns: errno
895 */
896
897static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
898 int user_size)
899{
900 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
901 char array[GFS2_FAST_NAME_SIZE], *buf = array;
902 unsigned int len = GFS2_FAST_NAME_SIZE;
903 int error;
904
905 atomic_inc(&ip->i_sbd->sd_ops_inode);
906
907 error = gfs2_readlinki(ip, &buf, &len);
908 if (error)
909 return error;
910
911 if (user_size > len - 1)
912 user_size = len - 1;
913
914 if (copy_to_user(user_buf, buf, user_size))
915 error = -EFAULT;
916 else
917 error = user_size;
918
919 if (buf != array)
920 kfree(buf);
921
922 return error;
923}
924
925/**
926 * gfs2_follow_link - Follow a symbolic link
927 * @dentry: The dentry of the link
928 * @nd: Data that we pass to vfs_follow_link()
929 *
930 * This can handle symlinks of any size. It is optimised for symlinks
931 * under GFS2_FAST_NAME_SIZE.
932 *
933 * Returns: 0 on success or error code
934 */
935
936static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
937{
938 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
939 char array[GFS2_FAST_NAME_SIZE], *buf = array;
940 unsigned int len = GFS2_FAST_NAME_SIZE;
941 int error;
942
943 atomic_inc(&ip->i_sbd->sd_ops_inode);
944
945 error = gfs2_readlinki(ip, &buf, &len);
946 if (!error) {
947 error = vfs_follow_link(nd, buf);
948 if (buf != array)
949 kfree(buf);
950 }
951
952 return ERR_PTR(error);
953}
954
955/**
956 * gfs2_permission -
957 * @inode:
958 * @mask:
959 * @nd: passed from Linux VFS, ignored by us
960 *
961 * Returns: errno
962 */
963
964static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
965{
966 struct gfs2_inode *ip = get_v2ip(inode);
967 struct gfs2_holder i_gh;
968 int error;
969
970 atomic_inc(&ip->i_sbd->sd_ops_inode);
971
972 if (ip->i_vn == ip->i_gl->gl_vn)
973 return generic_permission(inode, mask, gfs2_check_acl);
974
975 error = gfs2_glock_nq_init(ip->i_gl,
976 LM_ST_SHARED, LM_FLAG_ANY,
977 &i_gh);
978 if (!error) {
979 error = generic_permission(inode, mask, gfs2_check_acl_locked);
980 gfs2_glock_dq_uninit(&i_gh);
981 }
982
983 return error;
984}
985
986static int setattr_size(struct inode *inode, struct iattr *attr)
987{
988 struct gfs2_inode *ip = get_v2ip(inode);
989 int error;
990
991 if (attr->ia_size != ip->i_di.di_size) {
992 error = vmtruncate(inode, attr->ia_size);
993 if (error)
994 return error;
995 }
996
997 error = gfs2_truncatei(ip, attr->ia_size, gfs2_truncator_page);
998 if (error)
999 return error;
1000
1001 return error;
1002}
1003
1004static int setattr_chown(struct inode *inode, struct iattr *attr)
1005{
1006 struct gfs2_inode *ip = get_v2ip(inode);
1007 struct gfs2_sbd *sdp = ip->i_sbd;
1008 struct buffer_head *dibh;
1009 uint32_t ouid, ogid, nuid, ngid;
1010 int error;
1011
1012 ouid = ip->i_di.di_uid;
1013 ogid = ip->i_di.di_gid;
1014 nuid = attr->ia_uid;
1015 ngid = attr->ia_gid;
1016
1017 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
1018 ouid = nuid = NO_QUOTA_CHANGE;
1019 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1020 ogid = ngid = NO_QUOTA_CHANGE;
1021
1022 gfs2_alloc_get(ip);
1023
1024 error = gfs2_quota_lock(ip, nuid, ngid);
1025 if (error)
1026 goto out_alloc;
1027
1028 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1029 error = gfs2_quota_check(ip, nuid, ngid);
1030 if (error)
1031 goto out_gunlock_q;
1032 }
1033
1034 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
1035 if (error)
1036 goto out_gunlock_q;
1037
1038 error = gfs2_meta_inode_buffer(ip, &dibh);
1039 if (error)
1040 goto out_end_trans;
1041
1042 error = inode_setattr(inode, attr);
1043 gfs2_assert_warn(sdp, !error);
1044 gfs2_inode_attr_out(ip);
1045
1046 gfs2_trans_add_bh(ip->i_gl, dibh);
1047 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1048 brelse(dibh);
1049
1050 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1051 gfs2_quota_change(ip, -ip->i_di.di_blocks,
1052 ouid, ogid);
1053 gfs2_quota_change(ip, ip->i_di.di_blocks,
1054 nuid, ngid);
1055 }
1056
1057 out_end_trans:
1058 gfs2_trans_end(sdp);
1059
1060 out_gunlock_q:
1061 gfs2_quota_unlock(ip);
1062
1063 out_alloc:
1064 gfs2_alloc_put(ip);
1065
1066 return error;
1067}
1068
1069/**
1070 * gfs2_setattr - Change attributes on an inode
1071 * @dentry: The dentry which is changing
1072 * @attr: The structure describing the change
1073 *
1074 * The VFS layer wants to change one or more of an inodes attributes. Write
1075 * that change out to disk.
1076 *
1077 * Returns: errno
1078 */
1079
1080static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1081{
1082 struct inode *inode = dentry->d_inode;
1083 struct gfs2_inode *ip = get_v2ip(inode);
1084 struct gfs2_holder i_gh;
1085 int error;
1086
1087 atomic_inc(&ip->i_sbd->sd_ops_inode);
1088
1089 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1090 if (error)
1091 return error;
1092
1093 error = -EPERM;
1094 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1095 goto out;
1096
1097 error = inode_change_ok(inode, attr);
1098 if (error)
1099 goto out;
1100
1101 if (attr->ia_valid & ATTR_SIZE)
1102 error = setattr_size(inode, attr);
1103 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1104 error = setattr_chown(inode, attr);
1105 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1106 error = gfs2_acl_chmod(ip, attr);
1107 else
1108 error = gfs2_setattr_simple(ip, attr);
1109
1110 out:
1111 gfs2_glock_dq_uninit(&i_gh);
1112
1113 if (!error)
1114 mark_inode_dirty(inode);
1115
1116 return error;
1117}
1118
1119/**
1120 * gfs2_getattr - Read out an inode's attributes
1121 * @mnt: ?
1122 * @dentry: The dentry to stat
1123 * @stat: The inode's stats
1124 *
1125 * Returns: errno
1126 */
1127
1128static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1129 struct kstat *stat)
1130{
1131 struct inode *inode = dentry->d_inode;
1132 struct gfs2_inode *ip = get_v2ip(inode);
1133 struct gfs2_holder gh;
1134 int error;
1135
1136 atomic_inc(&ip->i_sbd->sd_ops_inode);
1137
1138 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1139 if (!error) {
1140 generic_fillattr(inode, stat);
1141 gfs2_glock_dq_uninit(&gh);
1142 }
1143
1144 return error;
1145}
1146
1147static int gfs2_setxattr(struct dentry *dentry, const char *name,
1148 const void *data, size_t size, int flags)
1149{
1150 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
1151 struct gfs2_ea_request er;
1152
1153 atomic_inc(&ip->i_sbd->sd_ops_inode);
1154
1155 memset(&er, 0, sizeof(struct gfs2_ea_request));
1156 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1157 if (er.er_type == GFS2_EATYPE_UNUSED)
1158 return -EOPNOTSUPP;
1159 er.er_data = (char *)data;
1160 er.er_name_len = strlen(er.er_name);
1161 er.er_data_len = size;
1162 er.er_flags = flags;
1163
1164 gfs2_assert_warn(ip->i_sbd, !(er.er_flags & GFS2_ERF_MODE));
1165
1166 return gfs2_ea_set(ip, &er);
1167}
1168
1169static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1170 void *data, size_t size)
1171{
1172 struct gfs2_ea_request er;
1173
1174 atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
1175
1176 memset(&er, 0, sizeof(struct gfs2_ea_request));
1177 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1178 if (er.er_type == GFS2_EATYPE_UNUSED)
1179 return -EOPNOTSUPP;
1180 er.er_data = data;
1181 er.er_name_len = strlen(er.er_name);
1182 er.er_data_len = size;
1183
1184 return gfs2_ea_get(get_v2ip(dentry->d_inode), &er);
1185}
1186
1187static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
1188{
1189 struct gfs2_ea_request er;
1190
1191 atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
1192
1193 memset(&er, 0, sizeof(struct gfs2_ea_request));
1194 er.er_data = (size) ? buffer : NULL;
1195 er.er_data_len = size;
1196
1197 return gfs2_ea_list(get_v2ip(dentry->d_inode), &er);
1198}
1199
1200static int gfs2_removexattr(struct dentry *dentry, const char *name)
1201{
1202 struct gfs2_ea_request er;
1203
1204 atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
1205
1206 memset(&er, 0, sizeof(struct gfs2_ea_request));
1207 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1208 if (er.er_type == GFS2_EATYPE_UNUSED)
1209 return -EOPNOTSUPP;
1210 er.er_name_len = strlen(er.er_name);
1211
1212 return gfs2_ea_remove(get_v2ip(dentry->d_inode), &er);
1213}
1214
1215struct inode_operations gfs2_file_iops = {
1216 .permission = gfs2_permission,
1217 .setattr = gfs2_setattr,
1218 .getattr = gfs2_getattr,
1219 .setxattr = gfs2_setxattr,
1220 .getxattr = gfs2_getxattr,
1221 .listxattr = gfs2_listxattr,
1222 .removexattr = gfs2_removexattr,
1223};
1224
1225struct inode_operations gfs2_dev_iops = {
1226 .permission = gfs2_permission,
1227 .setattr = gfs2_setattr,
1228 .getattr = gfs2_getattr,
1229 .setxattr = gfs2_setxattr,
1230 .getxattr = gfs2_getxattr,
1231 .listxattr = gfs2_listxattr,
1232 .removexattr = gfs2_removexattr,
1233};
1234
1235struct inode_operations gfs2_dir_iops = {
1236 .create = gfs2_create,
1237 .lookup = gfs2_lookup,
1238 .link = gfs2_link,
1239 .unlink = gfs2_unlink,
1240 .symlink = gfs2_symlink,
1241 .mkdir = gfs2_mkdir,
1242 .rmdir = gfs2_rmdir,
1243 .mknod = gfs2_mknod,
1244 .rename = gfs2_rename,
1245 .permission = gfs2_permission,
1246 .setattr = gfs2_setattr,
1247 .getattr = gfs2_getattr,
1248 .setxattr = gfs2_setxattr,
1249 .getxattr = gfs2_getxattr,
1250 .listxattr = gfs2_listxattr,
1251 .removexattr = gfs2_removexattr,
1252};
1253
1254struct inode_operations gfs2_symlink_iops = {
1255 .readlink = gfs2_readlink,
1256 .follow_link = gfs2_follow_link,
1257 .permission = gfs2_permission,
1258 .setattr = gfs2_setattr,
1259 .getattr = gfs2_getattr,
1260 .setxattr = gfs2_setxattr,
1261 .getxattr = gfs2_getxattr,
1262 .listxattr = gfs2_listxattr,
1263 .removexattr = gfs2_removexattr,
1264};
1265
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
new file mode 100644
index 000000000000..5fafd87c8d7b
--- /dev/null
+++ b/fs/gfs2/ops_inode.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_INODE_DOT_H__
11#define __OPS_INODE_DOT_H__
12
13extern struct inode_operations gfs2_file_iops;
14extern struct inode_operations gfs2_dir_iops;
15extern struct inode_operations gfs2_symlink_iops;
16extern struct inode_operations gfs2_dev_iops;
17
18#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
new file mode 100644
index 000000000000..ca6a4d81bc26
--- /dev/null
+++ b/fs/gfs2/ops_super.c
@@ -0,0 +1,401 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/vmalloc.h>
16#include <linux/statfs.h>
17#include <linux/seq_file.h>
18#include <linux/mount.h>
19#include <linux/kthread.h>
20#include <linux/delay.h>
21#include <asm/semaphore.h>
22
23#include "gfs2.h"
24#include "glock.h"
25#include "inode.h"
26#include "lm.h"
27#include "log.h"
28#include "mount.h"
29#include "ops_super.h"
30#include "page.h"
31#include "quota.h"
32#include "recovery.h"
33#include "rgrp.h"
34#include "super.h"
35#include "sys.h"
36
37/**
38 * gfs2_write_inode - Make sure the inode is stable on the disk
39 * @inode: The inode
40 * @sync: synchronous write flag
41 *
42 * Returns: errno
43 */
44
45static int gfs2_write_inode(struct inode *inode, int sync)
46{
47 struct gfs2_inode *ip = get_v2ip(inode);
48
49 atomic_inc(&ip->i_sbd->sd_ops_super);
50
51 if (current->flags & PF_MEMALLOC)
52 return 0;
53 if (ip && sync)
54 gfs2_log_flush_glock(ip->i_gl);
55
56 return 0;
57}
58
59/**
60 * gfs2_put_super - Unmount the filesystem
61 * @sb: The VFS superblock
62 *
63 */
64
65static void gfs2_put_super(struct super_block *sb)
66{
67 struct gfs2_sbd *sdp = get_v2sdp(sb);
68 int error;
69
70 if (!sdp)
71 return;
72
73 atomic_inc(&sdp->sd_ops_super);
74
75 /* Unfreeze the filesystem, if we need to */
76
77 down(&sdp->sd_freeze_lock);
78 if (sdp->sd_freeze_count)
79 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
80 up(&sdp->sd_freeze_lock);
81
82 kthread_stop(sdp->sd_inoded_process);
83 kthread_stop(sdp->sd_quotad_process);
84 kthread_stop(sdp->sd_logd_process);
85 kthread_stop(sdp->sd_recoverd_process);
86 while (sdp->sd_glockd_num--)
87 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
88 kthread_stop(sdp->sd_scand_process);
89
90 if (!(sb->s_flags & MS_RDONLY)) {
91 error = gfs2_make_fs_ro(sdp);
92 if (error)
93 gfs2_io_error(sdp);
94 }
95
96 /* At this point, we're through modifying the disk */
97
98 /* Release stuff */
99
100 gfs2_inode_put(sdp->sd_master_dir);
101 gfs2_inode_put(sdp->sd_jindex);
102 gfs2_inode_put(sdp->sd_inum_inode);
103 gfs2_inode_put(sdp->sd_statfs_inode);
104 gfs2_inode_put(sdp->sd_rindex);
105 gfs2_inode_put(sdp->sd_quota_inode);
106 gfs2_inode_put(sdp->sd_root_dir);
107
108 gfs2_glock_put(sdp->sd_rename_gl);
109 gfs2_glock_put(sdp->sd_trans_gl);
110
111 if (!sdp->sd_args.ar_spectator) {
112 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
113 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
114 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
115 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
116 gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
117 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
118 gfs2_inode_put(sdp->sd_ir_inode);
119 gfs2_inode_put(sdp->sd_sc_inode);
120 gfs2_inode_put(sdp->sd_ut_inode);
121 gfs2_inode_put(sdp->sd_qc_inode);
122 }
123
124 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
125
126 gfs2_clear_rgrpd(sdp);
127 gfs2_jindex_free(sdp);
128
129 /* Take apart glock structures and buffer lists */
130 gfs2_gl_hash_clear(sdp, WAIT);
131
132 /* Unmount the locking protocol */
133 gfs2_lm_unmount(sdp);
134
135 /* At this point, we're through participating in the lockspace */
136
137 gfs2_sys_fs_del(sdp);
138
139 /* Get rid of any extra inodes */
140 while (invalidate_inodes(sb))
141 yield();
142
143 vfree(sdp);
144
145 set_v2sdp(sb, NULL);
146}
147
148/**
149 * gfs2_write_super - disk commit all incore transactions
150 * @sb: the filesystem
151 *
152 * This function is called every time sync(2) is called.
153 * After this exits, all dirty buffers and synced.
154 */
155
156static void gfs2_write_super(struct super_block *sb)
157{
158 struct gfs2_sbd *sdp = get_v2sdp(sb);
159 atomic_inc(&sdp->sd_ops_super);
160 gfs2_log_flush(sdp);
161}
162
163/**
164 * gfs2_write_super_lockfs - prevent further writes to the filesystem
165 * @sb: the VFS structure for the filesystem
166 *
167 */
168
169static void gfs2_write_super_lockfs(struct super_block *sb)
170{
171 struct gfs2_sbd *sdp = get_v2sdp(sb);
172 int error;
173
174 atomic_inc(&sdp->sd_ops_super);
175
176 for (;;) {
177 error = gfs2_freeze_fs(sdp);
178 if (!error)
179 break;
180
181 switch (error) {
182 case -EBUSY:
183 fs_err(sdp, "waiting for recovery before freeze\n");
184 break;
185
186 default:
187 fs_err(sdp, "error freezing FS: %d\n", error);
188 break;
189 }
190
191 fs_err(sdp, "retrying...\n");
192 msleep(1000);
193 }
194}
195
196/**
197 * gfs2_unlockfs - reallow writes to the filesystem
198 * @sb: the VFS structure for the filesystem
199 *
200 */
201
202static void gfs2_unlockfs(struct super_block *sb)
203{
204 struct gfs2_sbd *sdp = get_v2sdp(sb);
205
206 atomic_inc(&sdp->sd_ops_super);
207 gfs2_unfreeze_fs(sdp);
208}
209
210/**
211 * gfs2_statfs - Gather and return stats about the filesystem
212 * @sb: The superblock
213 * @statfsbuf: The buffer
214 *
215 * Returns: 0 on success or error code
216 */
217
218static int gfs2_statfs(struct super_block *sb, struct kstatfs *buf)
219{
220 struct gfs2_sbd *sdp = get_v2sdp(sb);
221 struct gfs2_statfs_change sc;
222 int error;
223
224 atomic_inc(&sdp->sd_ops_super);
225
226 if (gfs2_tune_get(sdp, gt_statfs_slow))
227 error = gfs2_statfs_slow(sdp, &sc);
228 else
229 error = gfs2_statfs_i(sdp, &sc);
230
231 if (error)
232 return error;
233
234 memset(buf, 0, sizeof(struct kstatfs));
235
236 buf->f_type = GFS2_MAGIC;
237 buf->f_bsize = sdp->sd_sb.sb_bsize;
238 buf->f_blocks = sc.sc_total;
239 buf->f_bfree = sc.sc_free;
240 buf->f_bavail = sc.sc_free;
241 buf->f_files = sc.sc_dinodes + sc.sc_free;
242 buf->f_ffree = sc.sc_free;
243 buf->f_namelen = GFS2_FNAMESIZE;
244
245 return 0;
246}
247
248/**
249 * gfs2_remount_fs - called when the FS is remounted
250 * @sb: the filesystem
251 * @flags: the remount flags
252 * @data: extra data passed in (not used right now)
253 *
254 * Returns: errno
255 */
256
257static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
258{
259 struct gfs2_sbd *sdp = get_v2sdp(sb);
260 int error;
261
262 atomic_inc(&sdp->sd_ops_super);
263
264 error = gfs2_mount_args(sdp, data, 1);
265 if (error)
266 return error;
267
268 if (sdp->sd_args.ar_spectator)
269 *flags |= MS_RDONLY;
270 else {
271 if (*flags & MS_RDONLY) {
272 if (!(sb->s_flags & MS_RDONLY))
273 error = gfs2_make_fs_ro(sdp);
274 } else if (!(*flags & MS_RDONLY) &&
275 (sb->s_flags & MS_RDONLY)) {
276 error = gfs2_make_fs_rw(sdp);
277 }
278 }
279
280 if (*flags & (MS_NOATIME | MS_NODIRATIME))
281 set_bit(SDF_NOATIME, &sdp->sd_flags);
282 else
283 clear_bit(SDF_NOATIME, &sdp->sd_flags);
284
285 /* Don't let the VFS update atimes. GFS2 handles this itself. */
286 *flags |= MS_NOATIME | MS_NODIRATIME;
287
288 return error;
289}
290
291/**
292 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
293 * @inode: The VFS inode
294 *
295 */
296
297static void gfs2_clear_inode(struct inode *inode)
298{
299 struct gfs2_inode *ip = get_v2ip(inode);
300
301 atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_super);
302
303 if (ip) {
304 spin_lock(&ip->i_spin);
305 ip->i_vnode = NULL;
306 set_v2ip(inode, NULL);
307 spin_unlock(&ip->i_spin);
308
309 gfs2_glock_schedule_for_reclaim(ip->i_gl);
310 gfs2_inode_put(ip);
311 }
312}
313
314/**
315 * gfs2_show_options - Show mount options for /proc/mounts
316 * @s: seq_file structure
317 * @mnt: vfsmount
318 *
319 * Returns: 0 on success or error code
320 */
321
322static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
323{
324 struct gfs2_sbd *sdp = get_v2sdp(mnt->mnt_sb);
325 struct gfs2_args *args = &sdp->sd_args;
326
327 atomic_inc(&sdp->sd_ops_super);
328
329 if (args->ar_lockproto[0])
330 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
331 if (args->ar_locktable[0])
332 seq_printf(s, ",locktable=%s", args->ar_locktable);
333 if (args->ar_hostdata[0])
334 seq_printf(s, ",hostdata=%s", args->ar_hostdata);
335 if (args->ar_spectator)
336 seq_printf(s, ",spectator");
337 if (args->ar_ignore_local_fs)
338 seq_printf(s, ",ignore_local_fs");
339 if (args->ar_localflocks)
340 seq_printf(s, ",localflocks");
341 if (args->ar_localcaching)
342 seq_printf(s, ",localcaching");
343 if (args->ar_debug)
344 seq_printf(s, ",debug");
345 if (args->ar_upgrade)
346 seq_printf(s, ",upgrade");
347 if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
348 seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
349 if (args->ar_posix_acl)
350 seq_printf(s, ",acl");
351 if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
352 char *state;
353 switch (args->ar_quota) {
354 case GFS2_QUOTA_OFF:
355 state = "off";
356 break;
357 case GFS2_QUOTA_ACCOUNT:
358 state = "account";
359 break;
360 case GFS2_QUOTA_ON:
361 state = "on";
362 break;
363 default:
364 state = "unknown";
365 break;
366 }
367 seq_printf(s, ",quota=%s", state);
368 }
369 if (args->ar_suiddir)
370 seq_printf(s, ",suiddir");
371 if (args->ar_data != GFS2_DATA_DEFAULT) {
372 char *state;
373 switch (args->ar_data) {
374 case GFS2_DATA_WRITEBACK:
375 state = "writeback";
376 break;
377 case GFS2_DATA_ORDERED:
378 state = "ordered";
379 break;
380 default:
381 state = "unknown";
382 break;
383 }
384 seq_printf(s, ",data=%s", state);
385 }
386
387 return 0;
388}
389
390struct super_operations gfs2_super_ops = {
391 .write_inode = gfs2_write_inode,
392 .put_super = gfs2_put_super,
393 .write_super = gfs2_write_super,
394 .write_super_lockfs = gfs2_write_super_lockfs,
395 .unlockfs = gfs2_unlockfs,
396 .statfs = gfs2_statfs,
397 .remount_fs = gfs2_remount_fs,
398 .clear_inode = gfs2_clear_inode,
399 .show_options = gfs2_show_options,
400};
401
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
new file mode 100644
index 000000000000..a41d208dc558
--- /dev/null
+++ b/fs/gfs2/ops_super.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_SUPER_DOT_H__
11#define __OPS_SUPER_DOT_H__
12
13extern struct super_operations gfs2_super_ops;
14
15#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
new file mode 100644
index 000000000000..a1b409ce75e1
--- /dev/null
+++ b/fs/gfs2/ops_vm.c
@@ -0,0 +1,199 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
23#include "ops_vm.h"
24#include "page.h"
25#include "quota.h"
26#include "rgrp.h"
27#include "trans.h"
28
29static void pfault_be_greedy(struct gfs2_inode *ip)
30{
31 unsigned int time;
32
33 spin_lock(&ip->i_spin);
34 time = ip->i_greedy;
35 ip->i_last_pfault = jiffies;
36 spin_unlock(&ip->i_spin);
37
38 gfs2_inode_hold(ip);
39 if (gfs2_glock_be_greedy(ip->i_gl, time))
40 gfs2_inode_put(ip);
41}
42
43static struct page *gfs2_private_nopage(struct vm_area_struct *area,
44 unsigned long address, int *type)
45{
46 struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host);
47 struct gfs2_holder i_gh;
48 struct page *result;
49 int error;
50
51 atomic_inc(&ip->i_sbd->sd_ops_vm);
52
53 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
54 if (error)
55 return NULL;
56
57 set_bit(GIF_PAGED, &ip->i_flags);
58
59 result = filemap_nopage(area, address, type);
60
61 if (result && result != NOPAGE_OOM)
62 pfault_be_greedy(ip);
63
64 gfs2_glock_dq_uninit(&i_gh);
65
66 return result;
67}
68
69static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
70{
71 struct gfs2_sbd *sdp = ip->i_sbd;
72 unsigned long index = page->index;
73 uint64_t lblock = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift);
74 unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
75 struct gfs2_alloc *al;
76 unsigned int data_blocks, ind_blocks;
77 unsigned int x;
78 int error;
79
80 al = gfs2_alloc_get(ip);
81
82 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
83 if (error)
84 goto out;
85
86 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
87 if (error)
88 goto out_gunlock_q;
89
90 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE,
91 &data_blocks, &ind_blocks);
92
93 al->al_requested = data_blocks + ind_blocks;
94
95 error = gfs2_inplace_reserve(ip);
96 if (error)
97 goto out_gunlock_q;
98
99 error = gfs2_trans_begin(sdp,
100 al->al_rgd->rd_ri.ri_length +
101 ind_blocks + RES_DINODE +
102 RES_STATFS + RES_QUOTA, 0);
103 if (error)
104 goto out_ipres;
105
106 if (gfs2_is_stuffed(ip)) {
107 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, NULL);
108 if (error)
109 goto out_trans;
110 }
111
112 for (x = 0; x < blocks; ) {
113 uint64_t dblock;
114 unsigned int extlen;
115 int new = 1;
116
117 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
118 if (error)
119 goto out_trans;
120
121 lblock += extlen;
122 x += extlen;
123 }
124
125 gfs2_assert_warn(sdp, al->al_alloced);
126
127 out_trans:
128 gfs2_trans_end(sdp);
129
130 out_ipres:
131 gfs2_inplace_release(ip);
132
133 out_gunlock_q:
134 gfs2_quota_unlock(ip);
135
136 out:
137 gfs2_alloc_put(ip);
138
139 return error;
140}
141
142static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
143 unsigned long address, int *type)
144{
145 struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host);
146 struct gfs2_holder i_gh;
147 struct page *result = NULL;
148 unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
149 int alloc_required;
150 int error;
151
152 atomic_inc(&ip->i_sbd->sd_ops_vm);
153
154 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
155 if (error)
156 return NULL;
157
158 if (gfs2_is_jdata(ip))
159 goto out;
160
161 set_bit(GIF_PAGED, &ip->i_flags);
162 set_bit(GIF_SW_PAGED, &ip->i_flags);
163
164 error = gfs2_write_alloc_required(ip,
165 (uint64_t)index << PAGE_CACHE_SHIFT,
166 PAGE_CACHE_SIZE, &alloc_required);
167 if (error)
168 goto out;
169
170 result = filemap_nopage(area, address, type);
171 if (!result || result == NOPAGE_OOM)
172 goto out;
173
174 if (alloc_required) {
175 error = alloc_page_backing(ip, result);
176 if (error) {
177 page_cache_release(result);
178 result = NULL;
179 goto out;
180 }
181 set_page_dirty(result);
182 }
183
184 pfault_be_greedy(ip);
185
186 out:
187 gfs2_glock_dq_uninit(&i_gh);
188
189 return result;
190}
191
192struct vm_operations_struct gfs2_vm_ops_private = {
193 .nopage = gfs2_private_nopage,
194};
195
196struct vm_operations_struct gfs2_vm_ops_sharewrite = {
197 .nopage = gfs2_sharewrite_nopage,
198};
199
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h
new file mode 100644
index 000000000000..54e3a8769cbb
--- /dev/null
+++ b/fs/gfs2/ops_vm.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_VM_DOT_H__
11#define __OPS_VM_DOT_H__
12
13extern struct vm_operations_struct gfs2_vm_ops_private;
14extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
15
16#endif /* __OPS_VM_DOT_H__ */
diff --git a/fs/gfs2/page.c b/fs/gfs2/page.c
new file mode 100644
index 000000000000..05453c5a06f0
--- /dev/null
+++ b/fs/gfs2/page.c
@@ -0,0 +1,273 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/mm.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "bmap.h"
21#include "inode.h"
22#include "page.h"
23#include "trans.h"
24
25/**
26 * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
27 * @gl: the glock
28 *
29 */
30
31void gfs2_pte_inval(struct gfs2_glock *gl)
32{
33 struct gfs2_inode *ip;
34 struct inode *inode;
35
36 ip = get_gl2ip(gl);
37 if (!ip || !S_ISREG(ip->i_di.di_mode))
38 return;
39
40 if (!test_bit(GIF_PAGED, &ip->i_flags))
41 return;
42
43 inode = gfs2_ip2v_lookup(ip);
44 if (inode) {
45 unmap_shared_mapping_range(inode->i_mapping, 0, 0);
46 iput(inode);
47
48 if (test_bit(GIF_SW_PAGED, &ip->i_flags))
49 set_bit(GLF_DIRTY, &gl->gl_flags);
50 }
51
52 clear_bit(GIF_SW_PAGED, &ip->i_flags);
53}
54
55/**
56 * gfs2_page_inval - Invalidate all pages associated with a glock
57 * @gl: the glock
58 *
59 */
60
61void gfs2_page_inval(struct gfs2_glock *gl)
62{
63 struct gfs2_inode *ip;
64 struct inode *inode;
65
66 ip = get_gl2ip(gl);
67 if (!ip || !S_ISREG(ip->i_di.di_mode))
68 return;
69
70 inode = gfs2_ip2v_lookup(ip);
71 if (inode) {
72 struct address_space *mapping = inode->i_mapping;
73
74 truncate_inode_pages(mapping, 0);
75 gfs2_assert_withdraw(ip->i_sbd, !mapping->nrpages);
76
77 iput(inode);
78 }
79
80 clear_bit(GIF_PAGED, &ip->i_flags);
81}
82
83/**
84 * gfs2_page_sync - Sync the data pages (not metadata) associated with a glock
85 * @gl: the glock
86 * @flags: DIO_START | DIO_WAIT
87 *
88 * Syncs data (not metadata) for a regular file.
89 * No-op for all other types.
90 */
91
92void gfs2_page_sync(struct gfs2_glock *gl, int flags)
93{
94 struct gfs2_inode *ip;
95 struct inode *inode;
96
97 ip = get_gl2ip(gl);
98 if (!ip || !S_ISREG(ip->i_di.di_mode))
99 return;
100
101 inode = gfs2_ip2v_lookup(ip);
102 if (inode) {
103 struct address_space *mapping = inode->i_mapping;
104 int error = 0;
105
106 if (flags & DIO_START)
107 filemap_fdatawrite(mapping);
108 if (!error && (flags & DIO_WAIT))
109 error = filemap_fdatawait(mapping);
110
111 /* Put back any errors cleared by filemap_fdatawait()
112 so they can be caught by someone who can pass them
113 up to user space. */
114
115 if (error == -ENOSPC)
116 set_bit(AS_ENOSPC, &mapping->flags);
117 else if (error)
118 set_bit(AS_EIO, &mapping->flags);
119
120 iput(inode);
121 }
122}
123
124/**
125 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
126 * @ip: the inode
127 * @dibh: the dinode buffer
128 * @block: the block number that was allocated
129 * @private: any locked page held by the caller process
130 *
131 * Returns: errno
132 */
133
134int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
135 uint64_t block, void *private)
136{
137 struct gfs2_sbd *sdp = ip->i_sbd;
138 struct inode *inode = ip->i_vnode;
139 struct page *page = (struct page *)private;
140 struct buffer_head *bh;
141 int release = 0;
142
143 if (!page || page->index) {
144 page = grab_cache_page(inode->i_mapping, 0);
145 if (!page)
146 return -ENOMEM;
147 release = 1;
148 }
149
150 if (!PageUptodate(page)) {
151 void *kaddr = kmap(page);
152
153 memcpy(kaddr,
154 dibh->b_data + sizeof(struct gfs2_dinode),
155 ip->i_di.di_size);
156 memset(kaddr + ip->i_di.di_size,
157 0,
158 PAGE_CACHE_SIZE - ip->i_di.di_size);
159 kunmap(page);
160
161 SetPageUptodate(page);
162 }
163
164 if (!page_has_buffers(page))
165 create_empty_buffers(page, 1 << inode->i_blkbits,
166 (1 << BH_Uptodate));
167
168 bh = page_buffers(page);
169
170 if (!buffer_mapped(bh))
171 map_bh(bh, inode->i_sb, block);
172
173 set_buffer_uptodate(bh);
174 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
175 gfs2_trans_add_databuf(sdp, bh);
176 mark_buffer_dirty(bh);
177
178 if (release) {
179 unlock_page(page);
180 page_cache_release(page);
181 }
182
183 return 0;
184}
185
186/**
187 * gfs2_truncator_page - truncate a partial data block in the page cache
188 * @ip: the inode
189 * @size: the size the file should be
190 *
191 * Returns: errno
192 */
193
194int gfs2_truncator_page(struct gfs2_inode *ip, uint64_t size)
195{
196 struct gfs2_sbd *sdp = ip->i_sbd;
197 struct inode *inode = ip->i_vnode;
198 struct page *page;
199 struct buffer_head *bh;
200 void *kaddr;
201 uint64_t lbn, dbn;
202 unsigned long index;
203 unsigned int offset;
204 unsigned int bufnum;
205 int new = 0;
206 int error;
207
208 lbn = size >> inode->i_blkbits;
209 error = gfs2_block_map(ip, lbn, &new, &dbn, NULL);
210 if (error || !dbn)
211 return error;
212
213 index = size >> PAGE_CACHE_SHIFT;
214 offset = size & (PAGE_CACHE_SIZE - 1);
215 bufnum = lbn - (index << (PAGE_CACHE_SHIFT - inode->i_blkbits));
216
217 page = read_cache_page(inode->i_mapping, index,
218 (filler_t *)inode->i_mapping->a_ops->readpage,
219 NULL);
220 if (IS_ERR(page))
221 return PTR_ERR(page);
222
223 lock_page(page);
224
225 if (!PageUptodate(page) || PageError(page)) {
226 error = -EIO;
227 goto out;
228 }
229
230 kaddr = kmap(page);
231 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
232 kunmap(page);
233
234 if (!page_has_buffers(page))
235 create_empty_buffers(page, 1 << inode->i_blkbits,
236 (1 << BH_Uptodate));
237
238 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
239 /* Do nothing */;
240
241 if (!buffer_mapped(bh))
242 map_bh(bh, inode->i_sb, dbn);
243
244 set_buffer_uptodate(bh);
245 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
246 gfs2_trans_add_databuf(sdp, bh);
247 mark_buffer_dirty(bh);
248
249 out:
250 unlock_page(page);
251 page_cache_release(page);
252
253 return error;
254}
255
256void gfs2_page_add_databufs(struct gfs2_sbd *sdp, struct page *page,
257 unsigned int from, unsigned int to)
258{
259 struct buffer_head *head = page_buffers(page);
260 unsigned int bsize = head->b_size;
261 struct buffer_head *bh;
262 unsigned int start, end;
263
264 for (bh = head, start = 0;
265 bh != head || !start;
266 bh = bh->b_this_page, start = end) {
267 end = start + bsize;
268 if (end <= from || start >= to)
269 continue;
270 gfs2_trans_add_databuf(sdp, bh);
271 }
272}
273
diff --git a/fs/gfs2/page.h b/fs/gfs2/page.h
new file mode 100644
index 000000000000..7ad8c99ee0ef
--- /dev/null
+++ b/fs/gfs2/page.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __PAGE_DOT_H__
11#define __PAGE_DOT_H__
12
13void gfs2_pte_inval(struct gfs2_glock *gl);
14void gfs2_page_inval(struct gfs2_glock *gl);
15void gfs2_page_sync(struct gfs2_glock *gl, int flags);
16
17int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
18 uint64_t block, void *private);
19int gfs2_truncator_page(struct gfs2_inode *ip, uint64_t size);
20void gfs2_page_add_databufs(struct gfs2_sbd *sdp, struct page *page,
21 unsigned int from, unsigned int to);
22
23#endif /* __PAGE_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
new file mode 100644
index 000000000000..a0320f22b57b
--- /dev/null
+++ b/fs/gfs2/quota.c
@@ -0,0 +1,1238 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11 * Quota change tags are associated with each transaction that allocates or
12 * deallocates space. Those changes are accumulated locally to each node (in a
13 * per-node file) and then are periodically synced to the quota file. This
14 * avoids the bottleneck of constantly touching the quota file, but introduces
15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need to a quota check
19 * program to be run on node crashes or anything like that.
20 *
21 * There are couple of knobs that let the administrator manage the quota
22 * fuzziness. "quota_quantum" sets the maximum time a quota change can be
23 * sitting on one node before being synced to the quota file. (The default is
24 * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency
25 * of quota file syncs increases as the user moves closer to their limit. The
26 * more frequent the syncs, the more accurate the quota enforcement, but that
27 * means that there is more contention between the nodes for the quota file.
28 * The default value is one. This sets the maximum theoretical quota overrun
29 * (with infinite node with infinite bandwidth) to twice the user's limit. (In
30 * practice, the maximum overrun you see should be much less.) A "quota_scale"
31 * number greater than one makes quota syncs more frequent and reduces the
32 * maximum overrun. Numbers less than one (but greater than zero) make quota
33 * syncs less frequent.
34 *
35 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of
36 * the quota file, so it is not being constantly read.
37 */
38
39#include <linux/sched.h>
40#include <linux/slab.h>
41#include <linux/spinlock.h>
42#include <linux/completion.h>
43#include <linux/buffer_head.h>
44#include <linux/tty.h>
45#include <linux/sort.h>
46#include <asm/semaphore.h>
47
48#include "gfs2.h"
49#include "bmap.h"
50#include "glock.h"
51#include "glops.h"
52#include "jdata.h"
53#include "log.h"
54#include "meta_io.h"
55#include "quota.h"
56#include "rgrp.h"
57#include "super.h"
58#include "trans.h"
59
60#define QUOTA_USER 1
61#define QUOTA_GROUP 0
62
63static uint64_t qd2offset(struct gfs2_quota_data *qd)
64{
65 uint64_t offset;
66
67 offset = 2 * (uint64_t)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
68 offset *= sizeof(struct gfs2_quota);
69
70 return offset;
71}
72
73static int qd_alloc(struct gfs2_sbd *sdp, int user, uint32_t id,
74 struct gfs2_quota_data **qdp)
75{
76 struct gfs2_quota_data *qd;
77 int error;
78
79 qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL);
80 if (!qd)
81 return -ENOMEM;
82
83 qd->qd_count = 1;
84 qd->qd_id = id;
85 if (user)
86 set_bit(QDF_USER, &qd->qd_flags);
87 qd->qd_slot = -1;
88
89 error = gfs2_glock_get(sdp, 2 * (uint64_t)id + !user,
90 &gfs2_quota_glops, CREATE, &qd->qd_gl);
91 if (error)
92 goto fail;
93
94 error = gfs2_lvb_hold(qd->qd_gl);
95 gfs2_glock_put(qd->qd_gl);
96 if (error)
97 goto fail;
98
99 *qdp = qd;
100
101 return 0;
102
103 fail:
104 kfree(qd);
105 return error;
106}
107
108static int qd_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
109 struct gfs2_quota_data **qdp)
110{
111 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
112 int error, found;
113
114 *qdp = NULL;
115
116 for (;;) {
117 found = 0;
118 spin_lock(&sdp->sd_quota_spin);
119 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
120 if (qd->qd_id == id &&
121 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
122 qd->qd_count++;
123 found = 1;
124 break;
125 }
126 }
127
128 if (!found)
129 qd = NULL;
130
131 if (!qd && new_qd) {
132 qd = new_qd;
133 list_add(&qd->qd_list, &sdp->sd_quota_list);
134 atomic_inc(&sdp->sd_quota_count);
135 new_qd = NULL;
136 }
137
138 spin_unlock(&sdp->sd_quota_spin);
139
140 if (qd || !create) {
141 if (new_qd) {
142 gfs2_lvb_unhold(new_qd->qd_gl);
143 kfree(new_qd);
144 }
145 *qdp = qd;
146 return 0;
147 }
148
149 error = qd_alloc(sdp, user, id, &new_qd);
150 if (error)
151 return error;
152 }
153}
154
155static void qd_hold(struct gfs2_quota_data *qd)
156{
157 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
158
159 spin_lock(&sdp->sd_quota_spin);
160 gfs2_assert(sdp, qd->qd_count);
161 qd->qd_count++;
162 spin_unlock(&sdp->sd_quota_spin);
163}
164
165static void qd_put(struct gfs2_quota_data *qd)
166{
167 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
168 spin_lock(&sdp->sd_quota_spin);
169 gfs2_assert(sdp, qd->qd_count);
170 if (!--qd->qd_count)
171 qd->qd_last_touched = jiffies;
172 spin_unlock(&sdp->sd_quota_spin);
173}
174
175static int slot_get(struct gfs2_quota_data *qd)
176{
177 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
178 unsigned int c, o = 0, b;
179 unsigned char byte = 0;
180
181 spin_lock(&sdp->sd_quota_spin);
182
183 if (qd->qd_slot_count++) {
184 spin_unlock(&sdp->sd_quota_spin);
185 return 0;
186 }
187
188 for (c = 0; c < sdp->sd_quota_chunks; c++)
189 for (o = 0; o < PAGE_SIZE; o++) {
190 byte = sdp->sd_quota_bitmap[c][o];
191 if (byte != 0xFF)
192 goto found;
193 }
194
195 goto fail;
196
197 found:
198 for (b = 0; b < 8; b++)
199 if (!(byte & (1 << b)))
200 break;
201 qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
202
203 if (qd->qd_slot >= sdp->sd_quota_slots)
204 goto fail;
205
206 sdp->sd_quota_bitmap[c][o] |= 1 << b;
207
208 spin_unlock(&sdp->sd_quota_spin);
209
210 return 0;
211
212 fail:
213 qd->qd_slot_count--;
214 spin_unlock(&sdp->sd_quota_spin);
215 return -ENOSPC;
216}
217
218static void slot_hold(struct gfs2_quota_data *qd)
219{
220 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
221
222 spin_lock(&sdp->sd_quota_spin);
223 gfs2_assert(sdp, qd->qd_slot_count);
224 qd->qd_slot_count++;
225 spin_unlock(&sdp->sd_quota_spin);
226}
227
228static void slot_put(struct gfs2_quota_data *qd)
229{
230 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
231
232 spin_lock(&sdp->sd_quota_spin);
233 gfs2_assert(sdp, qd->qd_slot_count);
234 if (!--qd->qd_slot_count) {
235 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
236 qd->qd_slot = -1;
237 }
238 spin_unlock(&sdp->sd_quota_spin);
239}
240
241static int bh_get(struct gfs2_quota_data *qd)
242{
243 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
244 struct gfs2_inode *ip = sdp->sd_qc_inode;
245 unsigned int block, offset;
246 uint64_t dblock;
247 int new = 0;
248 struct buffer_head *bh;
249 int error;
250
251 down(&sdp->sd_quota_mutex);
252
253 if (qd->qd_bh_count++) {
254 up(&sdp->sd_quota_mutex);
255 return 0;
256 }
257
258 block = qd->qd_slot / sdp->sd_qc_per_block;
259 offset = qd->qd_slot % sdp->sd_qc_per_block;;
260
261 error = gfs2_block_map(ip, block, &new, &dblock, NULL);
262 if (error)
263 goto fail;
264 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
265 if (error)
266 goto fail;
267 error = -EIO;
268 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
269 goto fail_brelse;
270
271 qd->qd_bh = bh;
272 qd->qd_bh_qc = (struct gfs2_quota_change *)
273 (bh->b_data + sizeof(struct gfs2_meta_header) +
274 offset * sizeof(struct gfs2_quota_change));
275
276 up(&sdp->sd_quota_mutex);
277
278 return 0;
279
280 fail_brelse:
281 brelse(bh);
282
283 fail:
284 qd->qd_bh_count--;
285 up(&sdp->sd_quota_mutex);
286 return error;
287}
288
289static void bh_put(struct gfs2_quota_data *qd)
290{
291 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
292
293 down(&sdp->sd_quota_mutex);
294 gfs2_assert(sdp, qd->qd_bh_count);
295 if (!--qd->qd_bh_count) {
296 brelse(qd->qd_bh);
297 qd->qd_bh = NULL;
298 qd->qd_bh_qc = NULL;
299 }
300 up(&sdp->sd_quota_mutex);
301}
302
303static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
304{
305 struct gfs2_quota_data *qd = NULL;
306 int error;
307 int found = 0;
308
309 *qdp = NULL;
310
311 if (sdp->sd_vfs->s_flags & MS_RDONLY)
312 return 0;
313
314 spin_lock(&sdp->sd_quota_spin);
315
316 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
317 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
318 !test_bit(QDF_CHANGE, &qd->qd_flags) ||
319 qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
320 continue;
321
322 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
323
324 set_bit(QDF_LOCKED, &qd->qd_flags);
325 gfs2_assert_warn(sdp, qd->qd_count);
326 qd->qd_count++;
327 qd->qd_change_sync = qd->qd_change;
328 gfs2_assert_warn(sdp, qd->qd_slot_count);
329 qd->qd_slot_count++;
330 found = 1;
331
332 break;
333 }
334
335 if (!found)
336 qd = NULL;
337
338 spin_unlock(&sdp->sd_quota_spin);
339
340 if (qd) {
341 gfs2_assert_warn(sdp, qd->qd_change_sync);
342 error = bh_get(qd);
343 if (error) {
344 clear_bit(QDF_LOCKED, &qd->qd_flags);
345 slot_put(qd);
346 qd_put(qd);
347 return error;
348 }
349 }
350
351 *qdp = qd;
352
353 return 0;
354}
355
356static int qd_trylock(struct gfs2_quota_data *qd)
357{
358 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
359
360 if (sdp->sd_vfs->s_flags & MS_RDONLY)
361 return 0;
362
363 spin_lock(&sdp->sd_quota_spin);
364
365 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
366 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
367 spin_unlock(&sdp->sd_quota_spin);
368 return 0;
369 }
370
371 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
372
373 set_bit(QDF_LOCKED, &qd->qd_flags);
374 gfs2_assert_warn(sdp, qd->qd_count);
375 qd->qd_count++;
376 qd->qd_change_sync = qd->qd_change;
377 gfs2_assert_warn(sdp, qd->qd_slot_count);
378 qd->qd_slot_count++;
379
380 spin_unlock(&sdp->sd_quota_spin);
381
382 gfs2_assert_warn(sdp, qd->qd_change_sync);
383 if (bh_get(qd)) {
384 clear_bit(QDF_LOCKED, &qd->qd_flags);
385 slot_put(qd);
386 qd_put(qd);
387 return 0;
388 }
389
390 return 1;
391}
392
393static void qd_unlock(struct gfs2_quota_data *qd)
394{
395 gfs2_assert_warn(qd->qd_gl->gl_sbd, test_bit(QDF_LOCKED, &qd->qd_flags));
396 clear_bit(QDF_LOCKED, &qd->qd_flags);
397 bh_put(qd);
398 slot_put(qd);
399 qd_put(qd);
400}
401
402static int qdsb_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
403 struct gfs2_quota_data **qdp)
404{
405 int error;
406
407 error = qd_get(sdp, user, id, create, qdp);
408 if (error)
409 return error;
410
411 error = slot_get(*qdp);
412 if (error)
413 goto fail;
414
415 error = bh_get(*qdp);
416 if (error)
417 goto fail_slot;
418
419 return 0;
420
421 fail_slot:
422 slot_put(*qdp);
423
424 fail:
425 qd_put(*qdp);
426 return error;
427}
428
429static void qdsb_put(struct gfs2_quota_data *qd)
430{
431 bh_put(qd);
432 slot_put(qd);
433 qd_put(qd);
434}
435
436int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
437{
438 struct gfs2_sbd *sdp = ip->i_sbd;
439 struct gfs2_alloc *al = &ip->i_alloc;
440 struct gfs2_quota_data **qd = al->al_qd;
441 int error;
442
443 if (gfs2_assert_warn(sdp, !al->al_qd_num) ||
444 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
445 return -EIO;
446
447 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
448 return 0;
449
450 error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
451 if (error)
452 goto out;
453 al->al_qd_num++;
454 qd++;
455
456 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
457 if (error)
458 goto out;
459 al->al_qd_num++;
460 qd++;
461
462 if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
463 error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
464 if (error)
465 goto out;
466 al->al_qd_num++;
467 qd++;
468 }
469
470 if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
471 error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
472 if (error)
473 goto out;
474 al->al_qd_num++;
475 qd++;
476 }
477
478 out:
479 if (error)
480 gfs2_quota_unhold(ip);
481
482 return error;
483}
484
485void gfs2_quota_unhold(struct gfs2_inode *ip)
486{
487 struct gfs2_sbd *sdp = ip->i_sbd;
488 struct gfs2_alloc *al = &ip->i_alloc;
489 unsigned int x;
490
491 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
492
493 for (x = 0; x < al->al_qd_num; x++) {
494 qdsb_put(al->al_qd[x]);
495 al->al_qd[x] = NULL;
496 }
497 al->al_qd_num = 0;
498}
499
500static int sort_qd(const void *a, const void *b)
501{
502 struct gfs2_quota_data *qd_a = *(struct gfs2_quota_data **)a;
503 struct gfs2_quota_data *qd_b = *(struct gfs2_quota_data **)b;
504 int ret = 0;
505
506 if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
507 !test_bit(QDF_USER, &qd_b->qd_flags)) {
508 if (test_bit(QDF_USER, &qd_a->qd_flags))
509 ret = -1;
510 else
511 ret = 1;
512 } else {
513 if (qd_a->qd_id < qd_b->qd_id)
514 ret = -1;
515 else if (qd_a->qd_id > qd_b->qd_id)
516 ret = 1;
517 }
518
519 return ret;
520}
521
522static void do_qc(struct gfs2_quota_data *qd, int64_t change)
523{
524 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
525 struct gfs2_inode *ip = sdp->sd_qc_inode;
526 struct gfs2_quota_change *qc = qd->qd_bh_qc;
527 int64_t x;
528
529 down(&sdp->sd_quota_mutex);
530 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh);
531
532 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
533 qc->qc_change = 0;
534 qc->qc_flags = 0;
535 if (test_bit(QDF_USER, &qd->qd_flags))
536 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
537 qc->qc_id = cpu_to_be32(qd->qd_id);
538 }
539
540 x = qc->qc_change;
541 x = be64_to_cpu(x) + change;
542 qc->qc_change = cpu_to_be64(x);
543
544 spin_lock(&sdp->sd_quota_spin);
545 qd->qd_change = x;
546 spin_unlock(&sdp->sd_quota_spin);
547
548 if (!x) {
549 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
550 clear_bit(QDF_CHANGE, &qd->qd_flags);
551 qc->qc_flags = 0;
552 qc->qc_id = 0;
553 slot_put(qd);
554 qd_put(qd);
555 } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
556 qd_hold(qd);
557 slot_hold(qd);
558 }
559
560 up(&sdp->sd_quota_mutex);
561}
562
563static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
564{
565 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
566 struct gfs2_inode *ip = sdp->sd_quota_inode;
567 unsigned int data_blocks, ind_blocks;
568 struct gfs2_holder *ghs, i_gh;
569 unsigned int qx, x;
570 struct gfs2_quota_data *qd;
571 uint64_t offset;
572 unsigned int nalloc = 0;
573 struct gfs2_alloc *al = NULL;
574 int error;
575
576 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
577 &data_blocks, &ind_blocks);
578
579 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL);
580 if (!ghs)
581 return -ENOMEM;
582
583 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
584 for (qx = 0; qx < num_qd; qx++) {
585 error = gfs2_glock_nq_init(qda[qx]->qd_gl,
586 LM_ST_EXCLUSIVE,
587 GL_NOCACHE, &ghs[qx]);
588 if (error)
589 goto out;
590 }
591
592 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
593 if (error)
594 goto out;
595
596 for (x = 0; x < num_qd; x++) {
597 int alloc_required;
598
599 offset = qd2offset(qda[x]);
600 error = gfs2_write_alloc_required(ip, offset,
601 sizeof(struct gfs2_quota),
602 &alloc_required);
603 if (error)
604 goto out_gunlock;
605 if (alloc_required)
606 nalloc++;
607 }
608
609 if (nalloc) {
610 al = gfs2_alloc_get(ip);
611
612 al->al_requested = nalloc * (data_blocks + ind_blocks);
613
614 error = gfs2_inplace_reserve(ip);
615 if (error)
616 goto out_alloc;
617
618 error = gfs2_trans_begin(sdp,
619 al->al_rgd->rd_ri.ri_length +
620 num_qd * data_blocks +
621 nalloc * ind_blocks +
622 RES_DINODE + num_qd +
623 RES_STATFS, 0);
624 if (error)
625 goto out_ipres;
626 } else {
627 error = gfs2_trans_begin(sdp,
628 num_qd * data_blocks +
629 RES_DINODE + num_qd, 0);
630 if (error)
631 goto out_gunlock;
632 }
633
634 for (x = 0; x < num_qd; x++) {
635 char buf[sizeof(struct gfs2_quota)];
636 struct gfs2_quota q;
637
638 qd = qda[x];
639 offset = qd2offset(qd);
640
641 /* The quota file may not be a multiple of
642 sizeof(struct gfs2_quota) bytes. */
643 memset(buf, 0, sizeof(struct gfs2_quota));
644
645 error = gfs2_jdata_read_mem(ip, buf, offset,
646 sizeof(struct gfs2_quota));
647 if (error < 0)
648 goto out_end_trans;
649
650 gfs2_quota_in(&q, buf);
651 q.qu_value += qda[x]->qd_change_sync;
652 gfs2_quota_out(&q, buf);
653
654 error = gfs2_jdata_write_mem(ip, buf, offset,
655 sizeof(struct gfs2_quota));
656 if (error < 0)
657 goto out_end_trans;
658 else if (error != sizeof(struct gfs2_quota)) {
659 error = -EIO;
660 goto out_end_trans;
661 }
662
663 do_qc(qd, -qd->qd_change_sync);
664
665 memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
666 qd->qd_qb.qb_magic = GFS2_MAGIC;
667 qd->qd_qb.qb_limit = q.qu_limit;
668 qd->qd_qb.qb_warn = q.qu_warn;
669 qd->qd_qb.qb_value = q.qu_value;
670
671 gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
672 }
673
674 error = 0;
675
676 out_end_trans:
677 gfs2_trans_end(sdp);
678
679 out_ipres:
680 if (nalloc)
681 gfs2_inplace_release(ip);
682
683 out_alloc:
684 if (nalloc)
685 gfs2_alloc_put(ip);
686
687 out_gunlock:
688 gfs2_glock_dq_uninit(&i_gh);
689
690 out:
691 while (qx--)
692 gfs2_glock_dq_uninit(&ghs[qx]);
693 kfree(ghs);
694 gfs2_log_flush_glock(ip->i_gl);
695
696 return error;
697}
698
699static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
700 struct gfs2_holder *q_gh)
701{
702 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
703 struct gfs2_holder i_gh;
704 struct gfs2_quota q;
705 char buf[sizeof(struct gfs2_quota)];
706 int error;
707
708 restart:
709 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
710 if (error)
711 return error;
712
713 gfs2_quota_lvb_in(&qd->qd_qb, qd->qd_gl->gl_lvb);
714
715 if (force_refresh || qd->qd_qb.qb_magic != GFS2_MAGIC) {
716 gfs2_glock_dq_uninit(q_gh);
717 error = gfs2_glock_nq_init(qd->qd_gl,
718 LM_ST_EXCLUSIVE, GL_NOCACHE,
719 q_gh);
720 if (error)
721 return error;
722
723 error = gfs2_glock_nq_init(sdp->sd_quota_inode->i_gl,
724 LM_ST_SHARED, 0,
725 &i_gh);
726 if (error)
727 goto fail;
728
729 memset(buf, 0, sizeof(struct gfs2_quota));
730
731 error = gfs2_jdata_read_mem(sdp->sd_quota_inode, buf,
732 qd2offset(qd),
733 sizeof(struct gfs2_quota));
734 if (error < 0)
735 goto fail_gunlock;
736
737 gfs2_glock_dq_uninit(&i_gh);
738
739 gfs2_quota_in(&q, buf);
740
741 memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
742 qd->qd_qb.qb_magic = GFS2_MAGIC;
743 qd->qd_qb.qb_limit = q.qu_limit;
744 qd->qd_qb.qb_warn = q.qu_warn;
745 qd->qd_qb.qb_value = q.qu_value;
746
747 gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
748
749 if (gfs2_glock_is_blocking(qd->qd_gl)) {
750 gfs2_glock_dq_uninit(q_gh);
751 force_refresh = 0;
752 goto restart;
753 }
754 }
755
756 return 0;
757
758 fail_gunlock:
759 gfs2_glock_dq_uninit(&i_gh);
760
761 fail:
762 gfs2_glock_dq_uninit(q_gh);
763
764 return error;
765}
766
767int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
768{
769 struct gfs2_sbd *sdp = ip->i_sbd;
770 struct gfs2_alloc *al = &ip->i_alloc;
771 unsigned int x;
772 int error = 0;
773
774 gfs2_quota_hold(ip, uid, gid);
775
776 if (capable(CAP_SYS_RESOURCE) ||
777 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
778 return 0;
779
780 sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *),
781 sort_qd, NULL);
782
783 for (x = 0; x < al->al_qd_num; x++) {
784 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
785 if (error)
786 break;
787 }
788
789 if (!error)
790 set_bit(GIF_QD_LOCKED, &ip->i_flags);
791 else {
792 while (x--)
793 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
794 gfs2_quota_unhold(ip);
795 }
796
797 return error;
798}
799
800static int need_sync(struct gfs2_quota_data *qd)
801{
802 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
803 struct gfs2_tune *gt = &sdp->sd_tune;
804 int64_t value;
805 unsigned int num, den;
806 int do_sync = 1;
807
808 if (!qd->qd_qb.qb_limit)
809 return 0;
810
811 spin_lock(&sdp->sd_quota_spin);
812 value = qd->qd_change;
813 spin_unlock(&sdp->sd_quota_spin);
814
815 spin_lock(&gt->gt_spin);
816 num = gt->gt_quota_scale_num;
817 den = gt->gt_quota_scale_den;
818 spin_unlock(&gt->gt_spin);
819
820 if (value < 0)
821 do_sync = 0;
822 else if (qd->qd_qb.qb_value >= (int64_t)qd->qd_qb.qb_limit)
823 do_sync = 0;
824 else {
825 value *= gfs2_jindex_size(sdp) * num;
826 do_div(value, den);
827 value += qd->qd_qb.qb_value;
828 if (value < (int64_t)qd->qd_qb.qb_limit)
829 do_sync = 0;
830 }
831
832 return do_sync;
833}
834
835void gfs2_quota_unlock(struct gfs2_inode *ip)
836{
837 struct gfs2_alloc *al = &ip->i_alloc;
838 struct gfs2_quota_data *qda[4];
839 unsigned int count = 0;
840 unsigned int x;
841
842 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
843 goto out;
844
845 for (x = 0; x < al->al_qd_num; x++) {
846 struct gfs2_quota_data *qd;
847 int sync;
848
849 qd = al->al_qd[x];
850 sync = need_sync(qd);
851
852 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
853
854 if (sync && qd_trylock(qd))
855 qda[count++] = qd;
856 }
857
858 if (count) {
859 do_sync(count, qda);
860 for (x = 0; x < count; x++)
861 qd_unlock(qda[x]);
862 }
863
864 out:
865 gfs2_quota_unhold(ip);
866}
867
868#define MAX_LINE 256
869
870static int print_message(struct gfs2_quota_data *qd, char *type)
871{
872 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
873 char *line;
874 int len;
875
876 line = kmalloc(MAX_LINE, GFP_KERNEL);
877 if (!line)
878 return -ENOMEM;
879
880 len = snprintf(line, MAX_LINE-1, "GFS2: fsid=%s: quota %s for %s %u\r\n",
881 sdp->sd_fsname, type,
882 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
883 qd->qd_id);
884 line[MAX_LINE-1] = 0;
885
886 if (current->signal) { /* Is this test still required? */
887 tty_write_message(current->signal->tty, line);
888 }
889
890 kfree(line);
891
892 return 0;
893}
894
895int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
896{
897 struct gfs2_sbd *sdp = ip->i_sbd;
898 struct gfs2_alloc *al = &ip->i_alloc;
899 struct gfs2_quota_data *qd;
900 int64_t value;
901 unsigned int x;
902 int error = 0;
903
904 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
905 return 0;
906
907 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
908 return 0;
909
910 for (x = 0; x < al->al_qd_num; x++) {
911 qd = al->al_qd[x];
912
913 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
914 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
915 continue;
916
917 value = qd->qd_qb.qb_value;
918 spin_lock(&sdp->sd_quota_spin);
919 value += qd->qd_change;
920 spin_unlock(&sdp->sd_quota_spin);
921
922 if (qd->qd_qb.qb_limit && (int64_t)qd->qd_qb.qb_limit < value) {
923 print_message(qd, "exceeded");
924 error = -EDQUOT;
925 break;
926 } else if (qd->qd_qb.qb_warn &&
927 (int64_t)qd->qd_qb.qb_warn < value &&
928 time_after_eq(jiffies, qd->qd_last_warn +
929 gfs2_tune_get(sdp, gt_quota_warn_period) * HZ)) {
930 error = print_message(qd, "warning");
931 qd->qd_last_warn = jiffies;
932 }
933 }
934
935 return error;
936}
937
938void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
939 uint32_t uid, uint32_t gid)
940{
941 struct gfs2_alloc *al = &ip->i_alloc;
942 struct gfs2_quota_data *qd;
943 unsigned int x;
944 unsigned int found = 0;
945
946 if (gfs2_assert_warn(ip->i_sbd, change))
947 return;
948 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
949 return;
950
951 for (x = 0; x < al->al_qd_num; x++) {
952 qd = al->al_qd[x];
953
954 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
955 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
956 do_qc(qd, change);
957 found++;
958 }
959 }
960}
961
962int gfs2_quota_sync(struct gfs2_sbd *sdp)
963{
964 struct gfs2_quota_data **qda;
965 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
966 unsigned int num_qd;
967 unsigned int x;
968 int error = 0;
969
970 sdp->sd_quota_sync_gen++;
971
972 qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
973 if (!qda)
974 return -ENOMEM;
975
976 do {
977 num_qd = 0;
978
979 for (;;) {
980 error = qd_fish(sdp, qda + num_qd);
981 if (error || !qda[num_qd])
982 break;
983 if (++num_qd == max_qd)
984 break;
985 }
986
987 if (num_qd) {
988 if (!error)
989 error = do_sync(num_qd, qda);
990 if (!error)
991 for (x = 0; x < num_qd; x++)
992 qda[x]->qd_sync_gen =
993 sdp->sd_quota_sync_gen;
994
995 for (x = 0; x < num_qd; x++)
996 qd_unlock(qda[x]);
997 }
998 } while (!error && num_qd == max_qd);
999
1000 kfree(qda);
1001
1002 return error;
1003}
1004
1005int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id)
1006{
1007 struct gfs2_quota_data *qd;
1008 struct gfs2_holder q_gh;
1009 int error;
1010
1011 error = qd_get(sdp, user, id, CREATE, &qd);
1012 if (error)
1013 return error;
1014
1015 error = do_glock(qd, FORCE, &q_gh);
1016 if (!error)
1017 gfs2_glock_dq_uninit(&q_gh);
1018
1019 qd_put(qd);
1020
1021 return error;
1022}
1023
1024int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
1025 struct gfs2_quota *q)
1026{
1027 struct gfs2_quota_data *qd;
1028 struct gfs2_holder q_gh;
1029 int error;
1030
1031 if (((user) ? (id != current->fsuid) : (!in_group_p(id))) &&
1032 !capable(CAP_SYS_ADMIN))
1033 return -EACCES;
1034
1035 error = qd_get(sdp, user, id, CREATE, &qd);
1036 if (error)
1037 return error;
1038
1039 error = do_glock(qd, NO_FORCE, &q_gh);
1040 if (error)
1041 goto out;
1042
1043 memset(q, 0, sizeof(struct gfs2_quota));
1044 q->qu_limit = qd->qd_qb.qb_limit;
1045 q->qu_warn = qd->qd_qb.qb_warn;
1046 q->qu_value = qd->qd_qb.qb_value;
1047
1048 spin_lock(&sdp->sd_quota_spin);
1049 q->qu_value += qd->qd_change;
1050 spin_unlock(&sdp->sd_quota_spin);
1051
1052 gfs2_glock_dq_uninit(&q_gh);
1053
1054 out:
1055 qd_put(qd);
1056
1057 return error;
1058}
1059
1060int gfs2_quota_init(struct gfs2_sbd *sdp)
1061{
1062 struct gfs2_inode *ip = sdp->sd_qc_inode;
1063 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
1064 unsigned int x, slot = 0;
1065 unsigned int found = 0;
1066 uint64_t dblock;
1067 uint32_t extlen = 0;
1068 int error;
1069
1070 if (!ip->i_di.di_size ||
1071 ip->i_di.di_size > (64 << 20) ||
1072 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
1073 gfs2_consist_inode(ip);
1074 return -EIO;
1075 }
1076 sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
1077 sdp->sd_quota_chunks = DIV_RU(sdp->sd_quota_slots, 8 * PAGE_SIZE);
1078
1079 error = -ENOMEM;
1080
1081 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
1082 sizeof(unsigned char *), GFP_KERNEL);
1083 if (!sdp->sd_quota_bitmap)
1084 return error;
1085
1086 for (x = 0; x < sdp->sd_quota_chunks; x++) {
1087 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
1088 if (!sdp->sd_quota_bitmap[x])
1089 goto fail;
1090 }
1091
1092 for (x = 0; x < blocks; x++) {
1093 struct buffer_head *bh;
1094 unsigned int y;
1095
1096 if (!extlen) {
1097 int new = 0;
1098 error = gfs2_block_map(ip, x, &new, &dblock, &extlen);
1099 if (error)
1100 goto fail;
1101 }
1102 gfs2_meta_ra(ip->i_gl, dblock, extlen);
1103 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
1104 &bh);
1105 if (error)
1106 goto fail;
1107 error = -EIO;
1108 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
1109 brelse(bh);
1110 goto fail;
1111 }
1112
1113 for (y = 0;
1114 y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
1115 y++, slot++) {
1116 struct gfs2_quota_change qc;
1117 struct gfs2_quota_data *qd;
1118
1119 gfs2_quota_change_in(&qc, bh->b_data +
1120 sizeof(struct gfs2_meta_header) +
1121 y * sizeof(struct gfs2_quota_change));
1122 if (!qc.qc_change)
1123 continue;
1124
1125 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
1126 qc.qc_id, &qd);
1127 if (error) {
1128 brelse(bh);
1129 goto fail;
1130 }
1131
1132 set_bit(QDF_CHANGE, &qd->qd_flags);
1133 qd->qd_change = qc.qc_change;
1134 qd->qd_slot = slot;
1135 qd->qd_slot_count = 1;
1136 qd->qd_last_touched = jiffies;
1137
1138 spin_lock(&sdp->sd_quota_spin);
1139 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1140 list_add(&qd->qd_list, &sdp->sd_quota_list);
1141 atomic_inc(&sdp->sd_quota_count);
1142 spin_unlock(&sdp->sd_quota_spin);
1143
1144 found++;
1145 }
1146
1147 brelse(bh);
1148 dblock++;
1149 extlen--;
1150 }
1151
1152 if (found)
1153 fs_info(sdp, "found %u quota changes\n", found);
1154
1155 return 0;
1156
1157 fail:
1158 gfs2_quota_cleanup(sdp);
1159 return error;
1160}
1161
1162void gfs2_quota_scan(struct gfs2_sbd *sdp)
1163{
1164 struct gfs2_quota_data *qd, *safe;
1165 LIST_HEAD(dead);
1166
1167 spin_lock(&sdp->sd_quota_spin);
1168 list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
1169 if (!qd->qd_count &&
1170 time_after_eq(jiffies, qd->qd_last_touched +
1171 gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
1172 list_move(&qd->qd_list, &dead);
1173 gfs2_assert_warn(sdp,
1174 atomic_read(&sdp->sd_quota_count) > 0);
1175 atomic_dec(&sdp->sd_quota_count);
1176 }
1177 }
1178 spin_unlock(&sdp->sd_quota_spin);
1179
1180 while (!list_empty(&dead)) {
1181 qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
1182 list_del(&qd->qd_list);
1183
1184 gfs2_assert_warn(sdp, !qd->qd_change);
1185 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1186 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1187
1188 gfs2_lvb_unhold(qd->qd_gl);
1189 kfree(qd);
1190 }
1191}
1192
1193void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1194{
1195 struct list_head *head = &sdp->sd_quota_list;
1196 struct gfs2_quota_data *qd;
1197 unsigned int x;
1198
1199 spin_lock(&sdp->sd_quota_spin);
1200 while (!list_empty(head)) {
1201 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1202
1203 if (qd->qd_count > 1 ||
1204 (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1205 list_move(&qd->qd_list, head);
1206 spin_unlock(&sdp->sd_quota_spin);
1207 schedule();
1208 spin_lock(&sdp->sd_quota_spin);
1209 continue;
1210 }
1211
1212 list_del(&qd->qd_list);
1213 atomic_dec(&sdp->sd_quota_count);
1214 spin_unlock(&sdp->sd_quota_spin);
1215
1216 if (!qd->qd_count) {
1217 gfs2_assert_warn(sdp, !qd->qd_change);
1218 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1219 } else
1220 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1221 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1222
1223 gfs2_lvb_unhold(qd->qd_gl);
1224 kfree(qd);
1225
1226 spin_lock(&sdp->sd_quota_spin);
1227 }
1228 spin_unlock(&sdp->sd_quota_spin);
1229
1230 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1231
1232 if (sdp->sd_quota_bitmap) {
1233 for (x = 0; x < sdp->sd_quota_chunks; x++)
1234 kfree(sdp->sd_quota_bitmap[x]);
1235 kfree(sdp->sd_quota_bitmap);
1236 }
1237}
1238
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
new file mode 100644
index 000000000000..005529f6895d
--- /dev/null
+++ b/fs/gfs2/quota.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __QUOTA_DOT_H__
11#define __QUOTA_DOT_H__
12
13#define NO_QUOTA_CHANGE ((uint32_t)-1)
14
15int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
16void gfs2_quota_unhold(struct gfs2_inode *ip);
17
18int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
19void gfs2_quota_unlock(struct gfs2_inode *ip);
20
21int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
22void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
23 uint32_t uid, uint32_t gid);
24
25int gfs2_quota_sync(struct gfs2_sbd *sdp);
26int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id);
27int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
28 struct gfs2_quota *q);
29
30int gfs2_quota_init(struct gfs2_sbd *sdp);
31void gfs2_quota_scan(struct gfs2_sbd *sdp);
32void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
33
34#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
new file mode 100644
index 000000000000..15cd26fbcff9
--- /dev/null
+++ b/fs/gfs2/recovery.c
@@ -0,0 +1,570 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "glock.h"
20#include "glops.h"
21#include "lm.h"
22#include "lops.h"
23#include "meta_io.h"
24#include "recovery.h"
25#include "super.h"
26
27int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
28 struct buffer_head **bh)
29{
30 struct gfs2_glock *gl = jd->jd_inode->i_gl;
31 int new = 0;
32 uint64_t dblock;
33 uint32_t extlen;
34 int error;
35
36 error = gfs2_block_map(jd->jd_inode, blk, &new, &dblock, &extlen);
37 if (error)
38 return error;
39 if (!dblock) {
40 gfs2_consist_inode(jd->jd_inode);
41 return -EIO;
42 }
43
44 gfs2_meta_ra(gl, dblock, extlen);
45 error = gfs2_meta_read(gl, dblock, DIO_START | DIO_WAIT, bh);
46
47 return error;
48}
49
50int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
51{
52 struct list_head *head = &sdp->sd_revoke_list;
53 struct gfs2_revoke_replay *rr;
54 int found = 0;
55
56 list_for_each_entry(rr, head, rr_list) {
57 if (rr->rr_blkno == blkno) {
58 found = 1;
59 break;
60 }
61 }
62
63 if (found) {
64 rr->rr_where = where;
65 return 0;
66 }
67
68 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
69 if (!rr)
70 return -ENOMEM;
71
72 rr->rr_blkno = blkno;
73 rr->rr_where = where;
74 list_add(&rr->rr_list, head);
75
76 return 1;
77}
78
79int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
80{
81 struct gfs2_revoke_replay *rr;
82 int wrap, a, b, revoke;
83 int found = 0;
84
85 list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
86 if (rr->rr_blkno == blkno) {
87 found = 1;
88 break;
89 }
90 }
91
92 if (!found)
93 return 0;
94
95 wrap = (rr->rr_where < sdp->sd_replay_tail);
96 a = (sdp->sd_replay_tail < where);
97 b = (where < rr->rr_where);
98 revoke = (wrap) ? (a || b) : (a && b);
99
100 return revoke;
101}
102
103void gfs2_revoke_clean(struct gfs2_sbd *sdp)
104{
105 struct list_head *head = &sdp->sd_revoke_list;
106 struct gfs2_revoke_replay *rr;
107
108 while (!list_empty(head)) {
109 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
110 list_del(&rr->rr_list);
111 kfree(rr);
112 }
113}
114
115/**
116 * get_log_header - read the log header for a given segment
117 * @jd: the journal
118 * @blk: the block to look at
119 * @lh: the log header to return
120 *
121 * Read the log header for a given segement in a given journal. Do a few
122 * sanity checks on it.
123 *
124 * Returns: 0 on success,
125 * 1 if the header was invalid or incomplete,
126 * errno on error
127 */
128
129static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
130 struct gfs2_log_header *head)
131{
132 struct buffer_head *bh;
133 struct gfs2_log_header lh;
134 uint32_t hash;
135 int error;
136
137 error = gfs2_replay_read_block(jd, blk, &bh);
138 if (error)
139 return error;
140
141 memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
142 lh.lh_hash = 0;
143 hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
144 gfs2_log_header_in(&lh, bh->b_data);
145
146 brelse(bh);
147
148 if (lh.lh_header.mh_magic != GFS2_MAGIC ||
149 lh.lh_header.mh_type != GFS2_METATYPE_LH ||
150 lh.lh_blkno != blk ||
151 lh.lh_hash != hash)
152 return 1;
153
154 *head = lh;
155
156 return 0;
157}
158
159/**
160 * find_good_lh - find a good log header
161 * @jd: the journal
162 * @blk: the segment to start searching from
163 * @lh: the log header to fill in
164 * @forward: if true search forward in the log, else search backward
165 *
166 * Call get_log_header() to get a log header for a segment, but if the
167 * segment is bad, either scan forward or backward until we find a good one.
168 *
169 * Returns: errno
170 */
171
172static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
173 struct gfs2_log_header *head)
174{
175 unsigned int orig_blk = *blk;
176 int error;
177
178 for (;;) {
179 error = get_log_header(jd, *blk, head);
180 if (error <= 0)
181 return error;
182
183 if (++*blk == jd->jd_blocks)
184 *blk = 0;
185
186 if (*blk == orig_blk) {
187 gfs2_consist_inode(jd->jd_inode);
188 return -EIO;
189 }
190 }
191}
192
193/**
194 * jhead_scan - make sure we've found the head of the log
195 * @jd: the journal
196 * @head: this is filled in with the log descriptor of the head
197 *
198 * At this point, seg and lh should be either the head of the log or just
199 * before. Scan forward until we find the head.
200 *
201 * Returns: errno
202 */
203
204static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
205{
206 unsigned int blk = head->lh_blkno;
207 struct gfs2_log_header lh;
208 int error;
209
210 for (;;) {
211 if (++blk == jd->jd_blocks)
212 blk = 0;
213
214 error = get_log_header(jd, blk, &lh);
215 if (error < 0)
216 return error;
217 if (error == 1)
218 continue;
219
220 if (lh.lh_sequence == head->lh_sequence) {
221 gfs2_consist_inode(jd->jd_inode);
222 return -EIO;
223 }
224 if (lh.lh_sequence < head->lh_sequence)
225 break;
226
227 *head = lh;
228 }
229
230 return 0;
231}
232
233/**
234 * gfs2_find_jhead - find the head of a log
235 * @jd: the journal
236 * @head: the log descriptor for the head of the log is returned here
237 *
238 * Do a binary search of a journal and find the valid log entry with the
239 * highest sequence number. (i.e. the log head)
240 *
241 * Returns: errno
242 */
243
244int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
245{
246 struct gfs2_log_header lh_1, lh_m;
247 uint32_t blk_1, blk_2, blk_m;
248 int error;
249
250 blk_1 = 0;
251 blk_2 = jd->jd_blocks - 1;
252
253 for (;;) {
254 blk_m = (blk_1 + blk_2) / 2;
255
256 error = find_good_lh(jd, &blk_1, &lh_1);
257 if (error)
258 return error;
259
260 error = find_good_lh(jd, &blk_m, &lh_m);
261 if (error)
262 return error;
263
264 if (blk_1 == blk_m || blk_m == blk_2)
265 break;
266
267 if (lh_1.lh_sequence <= lh_m.lh_sequence)
268 blk_1 = blk_m;
269 else
270 blk_2 = blk_m;
271 }
272
273 error = jhead_scan(jd, &lh_1);
274 if (error)
275 return error;
276
277 *head = lh_1;
278
279 return error;
280}
281
282/**
283 * foreach_descriptor - go through the active part of the log
284 * @jd: the journal
285 * @start: the first log header in the active region
286 * @end: the last log header (don't process the contents of this entry))
287 *
288 * Call a given function once for every log descriptor in the active
289 * portion of the log.
290 *
291 * Returns: errno
292 */
293
294static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
295 unsigned int end, int pass)
296{
297 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
298 struct buffer_head *bh;
299 struct gfs2_log_descriptor *ld;
300 int error = 0;
301 u32 length;
302 __be64 *ptr;
303 unsigned int offset = sizeof(struct gfs2_log_descriptor);
304 offset += (sizeof(__be64)-1);
305 offset &= ~(sizeof(__be64)-1);
306
307 while (start != end) {
308 error = gfs2_replay_read_block(jd, start, &bh);
309 if (error)
310 return error;
311 if (gfs2_meta_check(sdp, bh)) {
312 brelse(bh);
313 return -EIO;
314 }
315 ld = (struct gfs2_log_descriptor *)bh->b_data;
316 length = be32_to_cpu(ld->ld_length);
317
318 if (be16_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
319 struct gfs2_log_header lh;
320 error = get_log_header(jd, start, &lh);
321 if (!error) {
322 gfs2_replay_incr_blk(sdp, &start);
323 continue;
324 }
325 if (error == 1) {
326 gfs2_consist_inode(jd->jd_inode);
327 error = -EIO;
328 }
329 brelse(bh);
330 return error;
331 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
332 brelse(bh);
333 return -EIO;
334 }
335 ptr = (__be64 *)(bh->b_data + offset);
336 error = lops_scan_elements(jd, start, ld, ptr, pass);
337 if (error) {
338 brelse(bh);
339 return error;
340 }
341
342 while (length--)
343 gfs2_replay_incr_blk(sdp, &start);
344
345 brelse(bh);
346 }
347
348 return 0;
349}
350
351/**
352 * clean_journal - mark a dirty journal as being clean
353 * @sdp: the filesystem
354 * @jd: the journal
355 * @gl: the journal's glock
356 * @head: the head journal to start from
357 *
358 * Returns: errno
359 */
360
361static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
362{
363 struct gfs2_inode *ip = jd->jd_inode;
364 struct gfs2_sbd *sdp = ip->i_sbd;
365 unsigned int lblock;
366 int new = 0;
367 uint64_t dblock;
368 struct gfs2_log_header *lh;
369 uint32_t hash;
370 struct buffer_head *bh;
371 int error;
372
373 lblock = head->lh_blkno;
374 gfs2_replay_incr_blk(sdp, &lblock);
375 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
376 if (error)
377 return error;
378 if (!dblock) {
379 gfs2_consist_inode(ip);
380 return -EIO;
381 }
382
383 bh = sb_getblk(sdp->sd_vfs, dblock);
384 lock_buffer(bh);
385 memset(bh->b_data, 0, bh->b_size);
386 set_buffer_uptodate(bh);
387 clear_buffer_dirty(bh);
388 unlock_buffer(bh);
389
390 lh = (struct gfs2_log_header *)bh->b_data;
391 memset(lh, 0, sizeof(struct gfs2_log_header));
392 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
393 lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
394 lh->lh_header.mh_format = cpu_to_be16(GFS2_FORMAT_LH);
395 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
396 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
397 lh->lh_blkno = cpu_to_be32(lblock);
398 hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
399 lh->lh_hash = cpu_to_be32(hash);
400
401 set_buffer_dirty(bh);
402 if (sync_dirty_buffer(bh))
403 gfs2_io_error_bh(sdp, bh);
404 brelse(bh);
405
406 return error;
407}
408
409/**
410 * gfs2_recover_journal - recovery a given journal
411 * @jd: the struct gfs2_jdesc describing the journal
412 * @wait: Don't return until the journal is clean (or an error is encountered)
413 *
414 * Acquire the journal's lock, check to see if the journal is clean, and
415 * do recovery if necessary.
416 *
417 * Returns: errno
418 */
419
420int gfs2_recover_journal(struct gfs2_jdesc *jd, int wait)
421{
422 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
423 struct gfs2_log_header head;
424 struct gfs2_holder j_gh, ji_gh, t_gh;
425 unsigned long t;
426 int ro = 0;
427 unsigned int pass;
428 int error;
429
430 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid);
431
432 /* Aquire the journal lock so we can do recovery */
433
434 error = gfs2_glock_nq_num(sdp,
435 jd->jd_jid, &gfs2_journal_glops,
436 LM_ST_EXCLUSIVE,
437 LM_FLAG_NOEXP |
438 ((wait) ? 0 : LM_FLAG_TRY) |
439 GL_NOCACHE, &j_gh);
440 switch (error) {
441 case 0:
442 break;
443
444 case GLR_TRYFAILED:
445 fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
446 error = 0;
447
448 default:
449 goto fail;
450 };
451
452 error = gfs2_glock_nq_init(jd->jd_inode->i_gl, LM_ST_SHARED,
453 LM_FLAG_NOEXP, &ji_gh);
454 if (error)
455 goto fail_gunlock_j;
456
457 fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
458
459 error = gfs2_jdesc_check(jd);
460 if (error)
461 goto fail_gunlock_ji;
462
463 error = gfs2_find_jhead(jd, &head);
464 if (error)
465 goto fail_gunlock_ji;
466
467 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
468 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
469 jd->jd_jid);
470
471 t = jiffies;
472
473 /* Acquire a shared hold on the transaction lock */
474
475 error = gfs2_glock_nq_init(sdp->sd_trans_gl,
476 LM_ST_SHARED,
477 LM_FLAG_NOEXP |
478 LM_FLAG_PRIORITY |
479 GL_NEVER_RECURSE |
480 GL_NOCANCEL |
481 GL_NOCACHE,
482 &t_gh);
483 if (error)
484 goto fail_gunlock_ji;
485
486 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
487 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
488 ro = 1;
489 } else {
490 if (sdp->sd_vfs->s_flags & MS_RDONLY)
491 ro = 1;
492 }
493
494 if (ro) {
495 fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
496 jd->jd_jid);
497 error = -EROFS;
498 goto fail_gunlock_tr;
499 }
500
501 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
502
503 for (pass = 0; pass < 2; pass++) {
504 lops_before_scan(jd, &head, pass);
505 error = foreach_descriptor(jd, head.lh_tail,
506 head.lh_blkno, pass);
507 lops_after_scan(jd, error, pass);
508 if (error)
509 goto fail_gunlock_tr;
510 }
511
512 error = clean_journal(jd, &head);
513 if (error)
514 goto fail_gunlock_tr;
515
516 gfs2_glock_dq_uninit(&t_gh);
517
518 t = DIV_RU(jiffies - t, HZ);
519
520 fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
521 jd->jd_jid, t);
522 }
523
524 gfs2_glock_dq_uninit(&ji_gh);
525
526 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
527
528 gfs2_glock_dq_uninit(&j_gh);
529
530 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
531
532 return 0;
533
534 fail_gunlock_tr:
535 gfs2_glock_dq_uninit(&t_gh);
536
537 fail_gunlock_ji:
538 gfs2_glock_dq_uninit(&ji_gh);
539
540 fail_gunlock_j:
541 gfs2_glock_dq_uninit(&j_gh);
542
543 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
544
545 fail:
546 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
547
548 return error;
549}
550
551/**
552 * gfs2_check_journals - Recover any dirty journals
553 * @sdp: the filesystem
554 *
555 */
556
557void gfs2_check_journals(struct gfs2_sbd *sdp)
558{
559 struct gfs2_jdesc *jd;
560
561 for (;;) {
562 jd = gfs2_jdesc_find_dirty(sdp);
563 if (!jd)
564 break;
565
566 if (jd != sdp->sd_jdesc)
567 gfs2_recover_journal(jd, NO_WAIT);
568 }
569}
570
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
new file mode 100644
index 000000000000..50d7eb57881c
--- /dev/null
+++ b/fs/gfs2/recovery.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RECOVERY_DOT_H__
11#define __RECOVERY_DOT_H__
12
13static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
14{
15 if (++*blk == sdp->sd_jdesc->jd_blocks)
16 *blk = 0;
17}
18
19int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
20 struct buffer_head **bh);
21
22int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
23int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
24void gfs2_revoke_clean(struct gfs2_sbd *sdp);
25
26int gfs2_find_jhead(struct gfs2_jdesc *jd,
27 struct gfs2_log_header *head);
28int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, int wait);
29void gfs2_check_journals(struct gfs2_sbd *sdp);
30
31#endif /* __RECOVERY_DOT_H__ */
32
diff --git a/fs/gfs2/resize.c b/fs/gfs2/resize.c
new file mode 100644
index 000000000000..67d26b99a73c
--- /dev/null
+++ b/fs/gfs2/resize.c
@@ -0,0 +1,291 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "dir.h"
20#include "glock.h"
21#include "inode.h"
22#include "jdata.h"
23#include "meta_io.h"
24#include "quota.h"
25#include "resize.h"
26#include "rgrp.h"
27#include "super.h"
28#include "trans.h"
29
30/* A single transaction needs to add the structs to rindex and make the
31 statfs change. */
32
33int gfs2_resize_add_rgrps(struct gfs2_sbd *sdp, char __user *buf,
34 unsigned int size)
35{
36 unsigned int num = size / sizeof(struct gfs2_rindex);
37 struct gfs2_inode *ip = sdp->sd_rindex;
38 struct gfs2_alloc *al = NULL;
39 struct gfs2_holder i_gh;
40 unsigned int data_blocks, ind_blocks;
41 int alloc_required;
42 unsigned int x;
43 int error;
44
45 gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
46
47 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
48 LM_FLAG_PRIORITY | GL_SYNC, &i_gh);
49 if (error)
50 return error;
51
52 if (!gfs2_is_jdata(ip)) {
53 gfs2_consist_inode(ip);
54 error = -EIO;
55 goto out;
56 }
57
58 error = gfs2_write_alloc_required(ip, ip->i_di.di_size, size,
59 &alloc_required);
60 if (error)
61 goto out;
62
63 if (alloc_required) {
64 al = gfs2_alloc_get(ip);
65
66 al->al_requested = data_blocks + ind_blocks;
67
68 error = gfs2_inplace_reserve(ip);
69 if (error)
70 goto out_alloc;
71
72 error = gfs2_trans_begin(sdp,
73 al->al_rgd->rd_ri.ri_length +
74 data_blocks + ind_blocks +
75 RES_DINODE + RES_STATFS, 0);
76 if (error)
77 goto out_relse;
78 } else {
79 error = gfs2_trans_begin(sdp, data_blocks +
80 RES_DINODE + RES_STATFS, 0);
81 if (error)
82 goto out;
83 }
84
85 for (x = 0; x < num; x++) {
86 struct gfs2_rindex ri;
87 char ri_buf[sizeof(struct gfs2_rindex)];
88
89 error = copy_from_user(&ri, buf, sizeof(struct gfs2_rindex));
90 if (error) {
91 error = -EFAULT;
92 goto out_trans;
93 }
94 gfs2_rindex_out(&ri, ri_buf);
95
96 error = gfs2_jdata_write_mem(ip, ri_buf, ip->i_di.di_size,
97 sizeof(struct gfs2_rindex));
98 if (error < 0)
99 goto out_trans;
100 gfs2_assert_withdraw(sdp, error == sizeof(struct gfs2_rindex));
101 error = 0;
102
103 gfs2_statfs_change(sdp, ri.ri_data, ri.ri_data, 0);
104
105 buf += sizeof(struct gfs2_rindex);
106 }
107
108 out_trans:
109 gfs2_trans_end(sdp);
110
111 out_relse:
112 if (alloc_required)
113 gfs2_inplace_release(ip);
114
115 out_alloc:
116 if (alloc_required)
117 gfs2_alloc_put(ip);
118
119 out:
120 ip->i_gl->gl_vn++;
121 gfs2_glock_dq_uninit(&i_gh);
122
123 return error;
124}
125
126static void drop_dentries(struct gfs2_inode *ip)
127{
128 struct inode *inode;
129 struct dentry *d;
130
131 inode = gfs2_ip2v_lookup(ip);
132 if (!inode)
133 return;
134
135 restart:
136 spin_lock(&dcache_lock);
137 list_for_each_entry(d, &inode->i_dentry, d_alias) {
138 if (d_unhashed(d))
139 continue;
140 dget_locked(d);
141 __d_drop(d);
142 spin_unlock(&dcache_lock);
143 dput(d);
144 goto restart;
145 }
146 spin_unlock(&dcache_lock);
147
148 iput(inode);
149}
150
151/* This is called by an ioctl to rename an ordinary file that's represented
152 in the vfs to a hidden system file that isn't represented in the vfs. It's
153 used to add journals, along with the associated system files, to a fs. */
154
155int gfs2_rename2system(struct gfs2_inode *ip,
156 struct gfs2_inode *old_dip, char *old_name,
157 struct gfs2_inode *new_dip, char *new_name)
158{
159 struct gfs2_sbd *sdp = ip->i_sbd;
160 struct gfs2_holder ghs[3];
161 struct qstr old_qstr, new_qstr;
162 struct gfs2_inum inum;
163 int alloc_required;
164 struct buffer_head *dibh;
165 int error;
166
167 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, ghs);
168 gfs2_holder_init(old_dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
169 gfs2_holder_init(new_dip->i_gl, LM_ST_EXCLUSIVE, GL_SYNC, ghs + 2);
170
171 error = gfs2_glock_nq_m(3, ghs);
172 if (error)
173 goto out;
174
175 error = -EMLINK;
176 if (ip->i_di.di_nlink != 1)
177 goto out_gunlock;
178 error = -EINVAL;
179 if (!S_ISREG(ip->i_di.di_mode))
180 goto out_gunlock;
181
182 old_qstr.name = old_name;
183 old_qstr.len = strlen(old_name);
184 error = gfs2_dir_search(old_dip, &old_qstr, &inum, NULL);
185 switch (error) {
186 case 0:
187 break;
188 default:
189 goto out_gunlock;
190 }
191
192 error = -EINVAL;
193 if (!gfs2_inum_equal(&inum, &ip->i_num))
194 goto out_gunlock;
195
196 new_qstr.name = new_name;
197 new_qstr.len = strlen(new_name);
198 error = gfs2_dir_search(new_dip, &new_qstr, NULL, NULL);
199 switch (error) {
200 case -ENOENT:
201 break;
202 case 0:
203 error = -EEXIST;
204 default:
205 goto out_gunlock;
206 }
207
208 gfs2_alloc_get(ip);
209
210 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
211 if (error)
212 goto out_alloc;
213
214 error = gfs2_diradd_alloc_required(new_dip, &new_qstr, &alloc_required);
215 if (error)
216 goto out_unhold;
217
218 if (alloc_required) {
219 struct gfs2_alloc *al = gfs2_alloc_get(new_dip);
220
221 al->al_requested = sdp->sd_max_dirres;
222
223 error = gfs2_inplace_reserve(new_dip);
224 if (error)
225 goto out_alloc2;
226
227 error = gfs2_trans_begin(sdp,
228 sdp->sd_max_dirres +
229 al->al_rgd->rd_ri.ri_length +
230 3 * RES_DINODE + RES_LEAF +
231 RES_STATFS + RES_QUOTA, 0);
232 if (error)
233 goto out_ipreserv;
234 } else {
235 error = gfs2_trans_begin(sdp,
236 3 * RES_DINODE + 2 * RES_LEAF +
237 RES_QUOTA, 0);
238 if (error)
239 goto out_unhold;
240 }
241
242 error = gfs2_dir_del(old_dip, &old_qstr);
243 if (error)
244 goto out_trans;
245
246 error = gfs2_dir_add(new_dip, &new_qstr, &ip->i_num,
247 IF2DT(ip->i_di.di_mode));
248 if (error)
249 goto out_trans;
250
251 gfs2_quota_change(ip, -ip->i_di.di_blocks, ip->i_di.di_uid,
252 ip->i_di.di_gid);
253
254 error = gfs2_meta_inode_buffer(ip, &dibh);
255 if (error)
256 goto out_trans;
257 ip->i_di.di_flags |= GFS2_DIF_SYSTEM;
258 gfs2_trans_add_bh(ip->i_gl, dibh);
259 gfs2_dinode_out(&ip->i_di, dibh->b_data);
260 brelse(dibh);
261
262 drop_dentries(ip);
263
264 out_trans:
265 gfs2_trans_end(sdp);
266
267 out_ipreserv:
268 if (alloc_required)
269 gfs2_inplace_release(new_dip);
270
271 out_alloc2:
272 if (alloc_required)
273 gfs2_alloc_put(new_dip);
274
275 out_unhold:
276 gfs2_quota_unhold(ip);
277
278 out_alloc:
279 gfs2_alloc_put(ip);
280
281 out_gunlock:
282 gfs2_glock_dq_m(3, ghs);
283
284 out:
285 gfs2_holder_uninit(ghs);
286 gfs2_holder_uninit(ghs + 1);
287 gfs2_holder_uninit(ghs + 2);
288
289 return error;
290}
291
diff --git a/fs/gfs2/resize.h b/fs/gfs2/resize.h
new file mode 100644
index 000000000000..bb97f0501598
--- /dev/null
+++ b/fs/gfs2/resize.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RESIZE_DOT_H__
11#define __RESIZE_DOT_H__
12
13int gfs2_resize_add_rgrps(struct gfs2_sbd *sdp, char __user *buf,
14 unsigned int size);
15int gfs2_rename2system(struct gfs2_inode *ip,
16 struct gfs2_inode *old_dip, char *old_name,
17 struct gfs2_inode *new_dip, char *new_name);
18
19#endif /* __RESIZE_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
new file mode 100644
index 000000000000..8df6fba20fac
--- /dev/null
+++ b/fs/gfs2/rgrp.c
@@ -0,0 +1,1361 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bits.h"
19#include "glock.h"
20#include "glops.h"
21#include "jdata.h"
22#include "lops.h"
23#include "meta_io.h"
24#include "quota.h"
25#include "rgrp.h"
26#include "super.h"
27#include "trans.h"
28
29/**
30 * gfs2_rgrp_verify - Verify that a resource group is consistent
31 * @sdp: the filesystem
32 * @rgd: the rgrp
33 *
34 */
35
36void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
37{
38 struct gfs2_sbd *sdp = rgd->rd_sbd;
39 struct gfs2_bitmap *bi = NULL;
40 uint32_t length = rgd->rd_ri.ri_length;
41 uint32_t count[4], tmp;
42 int buf, x;
43
44 memset(count, 0, 4 * sizeof(uint32_t));
45
46 /* Count # blocks in each of 4 possible allocation states */
47 for (buf = 0; buf < length; buf++) {
48 bi = rgd->rd_bits + buf;
49 for (x = 0; x < 4; x++)
50 count[x] += gfs2_bitcount(rgd,
51 bi->bi_bh->b_data +
52 bi->bi_offset,
53 bi->bi_len, x);
54 }
55
56 if (count[0] != rgd->rd_rg.rg_free) {
57 if (gfs2_consist_rgrpd(rgd))
58 fs_err(sdp, "free data mismatch: %u != %u\n",
59 count[0], rgd->rd_rg.rg_free);
60 return;
61 }
62
63 tmp = rgd->rd_ri.ri_data -
64 rgd->rd_rg.rg_free -
65 rgd->rd_rg.rg_dinodes;
66 if (count[1] != tmp) {
67 if (gfs2_consist_rgrpd(rgd))
68 fs_err(sdp, "used data mismatch: %u != %u\n",
69 count[1], tmp);
70 return;
71 }
72
73 if (count[2]) {
74 if (gfs2_consist_rgrpd(rgd))
75 fs_err(sdp, "free metadata mismatch: %u != 0\n",
76 count[2]);
77 return;
78 }
79
80 if (count[3] != rgd->rd_rg.rg_dinodes) {
81 if (gfs2_consist_rgrpd(rgd))
82 fs_err(sdp, "used metadata mismatch: %u != %u\n",
83 count[3], rgd->rd_rg.rg_dinodes);
84 return;
85 }
86}
87
88static inline int rgrp_contains_block(struct gfs2_rindex *ri, uint64_t block)
89{
90 uint64_t first = ri->ri_data0;
91 uint64_t last = first + ri->ri_data;
92 return !!(first <= block && block < last);
93}
94
95/**
96 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
97 * @sdp: The GFS2 superblock
98 * @n: The data block number
99 *
100 * Returns: The resource group, or NULL if not found
101 */
102
103struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk)
104{
105 struct gfs2_rgrpd *rgd;
106
107 spin_lock(&sdp->sd_rindex_spin);
108
109 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
110 if (rgrp_contains_block(&rgd->rd_ri, blk)) {
111 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
112 spin_unlock(&sdp->sd_rindex_spin);
113 return rgd;
114 }
115 }
116
117 spin_unlock(&sdp->sd_rindex_spin);
118
119 return NULL;
120}
121
122/**
123 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
124 * @sdp: The GFS2 superblock
125 *
126 * Returns: The first rgrp in the filesystem
127 */
128
129struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
130{
131 gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
132 return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
133}
134
135/**
136 * gfs2_rgrpd_get_next - get the next RG
137 * @rgd: A RG
138 *
139 * Returns: The next rgrp
140 */
141
142struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
143{
144 if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
145 return NULL;
146 return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
147}
148
149static void clear_rgrpdi(struct gfs2_sbd *sdp)
150{
151 struct list_head *head;
152 struct gfs2_rgrpd *rgd;
153 struct gfs2_glock *gl;
154
155 spin_lock(&sdp->sd_rindex_spin);
156 sdp->sd_rindex_forward = NULL;
157 head = &sdp->sd_rindex_recent_list;
158 while (!list_empty(head)) {
159 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
160 list_del(&rgd->rd_recent);
161 }
162 spin_unlock(&sdp->sd_rindex_spin);
163
164 head = &sdp->sd_rindex_list;
165 while (!list_empty(head)) {
166 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
167 gl = rgd->rd_gl;
168
169 list_del(&rgd->rd_list);
170 list_del(&rgd->rd_list_mru);
171
172 if (gl) {
173 set_gl2rgd(gl, NULL);
174 gfs2_glock_put(gl);
175 }
176
177 kfree(rgd->rd_bits);
178 kfree(rgd);
179 }
180}
181
182void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
183{
184 down(&sdp->sd_rindex_mutex);
185 clear_rgrpdi(sdp);
186 up(&sdp->sd_rindex_mutex);
187}
188
189/**
190 * gfs2_compute_bitstructs - Compute the bitmap sizes
191 * @rgd: The resource group descriptor
192 *
193 * Calculates bitmap descriptors, one for each block that contains bitmap data
194 *
195 * Returns: errno
196 */
197
198static int compute_bitstructs(struct gfs2_rgrpd *rgd)
199{
200 struct gfs2_sbd *sdp = rgd->rd_sbd;
201 struct gfs2_bitmap *bi;
202 uint32_t length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
203 uint32_t bytes_left, bytes;
204 int x;
205
206 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_KERNEL);
207 if (!rgd->rd_bits)
208 return -ENOMEM;
209
210 bytes_left = rgd->rd_ri.ri_bitbytes;
211
212 for (x = 0; x < length; x++) {
213 bi = rgd->rd_bits + x;
214
215 /* small rgrp; bitmap stored completely in header block */
216 if (length == 1) {
217 bytes = bytes_left;
218 bi->bi_offset = sizeof(struct gfs2_rgrp);
219 bi->bi_start = 0;
220 bi->bi_len = bytes;
221 /* header block */
222 } else if (x == 0) {
223 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
224 bi->bi_offset = sizeof(struct gfs2_rgrp);
225 bi->bi_start = 0;
226 bi->bi_len = bytes;
227 /* last block */
228 } else if (x + 1 == length) {
229 bytes = bytes_left;
230 bi->bi_offset = sizeof(struct gfs2_meta_header);
231 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
232 bi->bi_len = bytes;
233 /* other blocks */
234 } else {
235 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
236 bi->bi_offset = sizeof(struct gfs2_meta_header);
237 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
238 bi->bi_len = bytes;
239 }
240
241 bytes_left -= bytes;
242 }
243
244 if (bytes_left) {
245 gfs2_consist_rgrpd(rgd);
246 return -EIO;
247 }
248 bi = rgd->rd_bits + (length - 1);
249 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
250 if (gfs2_consist_rgrpd(rgd)) {
251 gfs2_rindex_print(&rgd->rd_ri);
252 fs_err(sdp, "start=%u len=%u offset=%u\n",
253 bi->bi_start, bi->bi_len, bi->bi_offset);
254 }
255 return -EIO;
256 }
257
258 return 0;
259}
260
261/**
262 * gfs2_ri_update - Pull in a new resource index from the disk
263 * @gl: The glock covering the rindex inode
264 *
265 * Returns: 0 on successful update, error code otherwise
266 */
267
268static int gfs2_ri_update(struct gfs2_inode *ip)
269{
270 struct gfs2_sbd *sdp = ip->i_sbd;
271 struct gfs2_rgrpd *rgd;
272 char buf[sizeof(struct gfs2_rindex)];
273 uint64_t junk = ip->i_di.di_size;
274 int error;
275
276 if (do_div(junk, sizeof(struct gfs2_rindex))) {
277 gfs2_consist_inode(ip);
278 return -EIO;
279 }
280
281 clear_rgrpdi(sdp);
282
283 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
284 error = gfs2_jdata_read_mem(ip, buf,
285 sdp->sd_rgrps *
286 sizeof(struct gfs2_rindex),
287 sizeof(struct gfs2_rindex));
288 if (!error)
289 break;
290 if (error != sizeof(struct gfs2_rindex)) {
291 if (error > 0)
292 error = -EIO;
293 goto fail;
294 }
295
296 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_KERNEL);
297 error = -ENOMEM;
298 if (!rgd)
299 goto fail;
300
301 init_MUTEX(&rgd->rd_mutex);
302 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
303 rgd->rd_sbd = sdp;
304
305 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
306 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
307
308 gfs2_rindex_in(&rgd->rd_ri, buf);
309
310 error = compute_bitstructs(rgd);
311 if (error)
312 goto fail;
313
314 error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
315 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
316 if (error)
317 goto fail;
318
319 set_gl2rgd(rgd->rd_gl, rgd);
320 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
321 }
322
323 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
324
325 return 0;
326
327 fail:
328 clear_rgrpdi(sdp);
329
330 return error;
331}
332
333/**
334 * gfs2_rindex_hold - Grab a lock on the rindex
335 * @sdp: The GFS2 superblock
336 * @ri_gh: the glock holder
337 *
338 * We grab a lock on the rindex inode to make sure that it doesn't
339 * change whilst we are performing an operation. We keep this lock
340 * for quite long periods of time compared to other locks. This
341 * doesn't matter, since it is shared and it is very, very rarely
342 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
343 *
344 * This makes sure that we're using the latest copy of the resource index
345 * special file, which might have been updated if someone expanded the
346 * filesystem (via gfs2_grow utility), which adds new resource groups.
347 *
348 * Returns: 0 on success, error code otherwise
349 */
350
351int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
352{
353 struct gfs2_inode *ip = sdp->sd_rindex;
354 struct gfs2_glock *gl = ip->i_gl;
355 int error;
356
357 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
358 if (error)
359 return error;
360
361 /* Read new copy from disk if we don't have the latest */
362 if (sdp->sd_rindex_vn != gl->gl_vn) {
363 down(&sdp->sd_rindex_mutex);
364 if (sdp->sd_rindex_vn != gl->gl_vn) {
365 error = gfs2_ri_update(ip);
366 if (error)
367 gfs2_glock_dq_uninit(ri_gh);
368 }
369 up(&sdp->sd_rindex_mutex);
370 }
371
372 return error;
373}
374
375/**
376 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
377 * @rgd: the struct gfs2_rgrpd describing the RG to read in
378 *
379 * Read in all of a Resource Group's header and bitmap blocks.
380 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
381 *
382 * Returns: errno
383 */
384
385int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
386{
387 struct gfs2_sbd *sdp = rgd->rd_sbd;
388 struct gfs2_glock *gl = rgd->rd_gl;
389 unsigned int length = rgd->rd_ri.ri_length;
390 struct gfs2_bitmap *bi;
391 unsigned int x, y;
392 int error;
393
394 down(&rgd->rd_mutex);
395
396 spin_lock(&sdp->sd_rindex_spin);
397 if (rgd->rd_bh_count) {
398 rgd->rd_bh_count++;
399 spin_unlock(&sdp->sd_rindex_spin);
400 up(&rgd->rd_mutex);
401 return 0;
402 }
403 spin_unlock(&sdp->sd_rindex_spin);
404
405 for (x = 0; x < length; x++) {
406 bi = rgd->rd_bits + x;
407 error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, DIO_START,
408 &bi->bi_bh);
409 if (error)
410 goto fail;
411 }
412
413 for (y = length; y--;) {
414 bi = rgd->rd_bits + y;
415 error = gfs2_meta_reread(sdp, bi->bi_bh, DIO_WAIT);
416 if (error)
417 goto fail;
418 if (gfs2_metatype_check(sdp, bi->bi_bh,
419 (y) ? GFS2_METATYPE_RB :
420 GFS2_METATYPE_RG)) {
421 error = -EIO;
422 goto fail;
423 }
424 }
425
426 if (rgd->rd_rg_vn != gl->gl_vn) {
427 gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data);
428 rgd->rd_rg_vn = gl->gl_vn;
429 }
430
431 spin_lock(&sdp->sd_rindex_spin);
432 rgd->rd_free_clone = rgd->rd_rg.rg_free;
433 rgd->rd_bh_count++;
434 spin_unlock(&sdp->sd_rindex_spin);
435
436 up(&rgd->rd_mutex);
437
438 return 0;
439
440 fail:
441 while (x--) {
442 bi = rgd->rd_bits + x;
443 brelse(bi->bi_bh);
444 bi->bi_bh = NULL;
445 gfs2_assert_warn(sdp, !bi->bi_clone);
446 }
447 up(&rgd->rd_mutex);
448
449 return error;
450}
451
452void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
453{
454 struct gfs2_sbd *sdp = rgd->rd_sbd;
455
456 spin_lock(&sdp->sd_rindex_spin);
457 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
458 rgd->rd_bh_count++;
459 spin_unlock(&sdp->sd_rindex_spin);
460}
461
462/**
463 * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
464 * @rgd: the struct gfs2_rgrpd describing the RG to read in
465 *
466 */
467
468void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
469{
470 struct gfs2_sbd *sdp = rgd->rd_sbd;
471 int x, length = rgd->rd_ri.ri_length;
472
473 spin_lock(&sdp->sd_rindex_spin);
474 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
475 if (--rgd->rd_bh_count) {
476 spin_unlock(&sdp->sd_rindex_spin);
477 return;
478 }
479
480 for (x = 0; x < length; x++) {
481 struct gfs2_bitmap *bi = rgd->rd_bits + x;
482 kfree(bi->bi_clone);
483 bi->bi_clone = NULL;
484 brelse(bi->bi_bh);
485 bi->bi_bh = NULL;
486 }
487
488 spin_unlock(&sdp->sd_rindex_spin);
489}
490
491void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
492{
493 struct gfs2_sbd *sdp = rgd->rd_sbd;
494 unsigned int length = rgd->rd_ri.ri_length;
495 unsigned int x;
496
497 for (x = 0; x < length; x++) {
498 struct gfs2_bitmap *bi = rgd->rd_bits + x;
499 if (!bi->bi_clone)
500 continue;
501 memcpy(bi->bi_clone + bi->bi_offset,
502 bi->bi_bh->b_data + bi->bi_offset,
503 bi->bi_len);
504 }
505
506 spin_lock(&sdp->sd_rindex_spin);
507 rgd->rd_free_clone = rgd->rd_rg.rg_free;
508 spin_unlock(&sdp->sd_rindex_spin);
509}
510
511/**
512 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
513 * @ip: the incore GFS2 inode structure
514 *
515 * Returns: the struct gfs2_alloc
516 */
517
518struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
519{
520 struct gfs2_alloc *al = &ip->i_alloc;
521
522 /* FIXME: Should assert that the correct locks are held here... */
523 memset(al, 0, sizeof(*al));
524 return al;
525}
526
527/**
528 * gfs2_alloc_put - throw away the struct gfs2_alloc for an inode
529 * @ip: the inode
530 *
531 */
532
533void gfs2_alloc_put(struct gfs2_inode *ip)
534{
535 return;
536}
537
538/**
539 * try_rgrp_fit - See if a given reservation will fit in a given RG
540 * @rgd: the RG data
541 * @al: the struct gfs2_alloc structure describing the reservation
542 *
543 * If there's room for the requested blocks to be allocated from the RG:
544 * Sets the $al_reserved_data field in @al.
545 * Sets the $al_reserved_meta field in @al.
546 * Sets the $al_rgd field in @al.
547 *
548 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
549 */
550
551static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
552{
553 struct gfs2_sbd *sdp = rgd->rd_sbd;
554 int ret = 0;
555
556 spin_lock(&sdp->sd_rindex_spin);
557 if (rgd->rd_free_clone >= al->al_requested) {
558 al->al_rgd = rgd;
559 ret = 1;
560 }
561 spin_unlock(&sdp->sd_rindex_spin);
562
563 return ret;
564}
565
566/**
567 * recent_rgrp_first - get first RG from "recent" list
568 * @sdp: The GFS2 superblock
569 * @rglast: address of the rgrp used last
570 *
571 * Returns: The first rgrp in the recent list
572 */
573
574static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
575 uint64_t rglast)
576{
577 struct gfs2_rgrpd *rgd = NULL;
578
579 spin_lock(&sdp->sd_rindex_spin);
580
581 if (list_empty(&sdp->sd_rindex_recent_list))
582 goto out;
583
584 if (!rglast)
585 goto first;
586
587 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
588 if (rgd->rd_ri.ri_addr == rglast)
589 goto out;
590 }
591
592 first:
593 rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd,
594 rd_recent);
595
596 out:
597 spin_unlock(&sdp->sd_rindex_spin);
598
599 return rgd;
600}
601
602/**
603 * recent_rgrp_next - get next RG from "recent" list
604 * @cur_rgd: current rgrp
605 * @remove:
606 *
607 * Returns: The next rgrp in the recent list
608 */
609
610static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
611 int remove)
612{
613 struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
614 struct list_head *head;
615 struct gfs2_rgrpd *rgd;
616
617 spin_lock(&sdp->sd_rindex_spin);
618
619 head = &sdp->sd_rindex_recent_list;
620
621 list_for_each_entry(rgd, head, rd_recent) {
622 if (rgd == cur_rgd) {
623 if (cur_rgd->rd_recent.next != head)
624 rgd = list_entry(cur_rgd->rd_recent.next,
625 struct gfs2_rgrpd, rd_recent);
626 else
627 rgd = NULL;
628
629 if (remove)
630 list_del(&cur_rgd->rd_recent);
631
632 goto out;
633 }
634 }
635
636 rgd = NULL;
637 if (!list_empty(head))
638 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
639
640 out:
641 spin_unlock(&sdp->sd_rindex_spin);
642
643 return rgd;
644}
645
646/**
647 * recent_rgrp_add - add an RG to tail of "recent" list
648 * @new_rgd: The rgrp to add
649 *
650 */
651
652static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
653{
654 struct gfs2_sbd *sdp = new_rgd->rd_sbd;
655 struct gfs2_rgrpd *rgd;
656 unsigned int count = 0;
657 unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
658
659 spin_lock(&sdp->sd_rindex_spin);
660
661 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
662 if (rgd == new_rgd)
663 goto out;
664
665 if (++count >= max)
666 goto out;
667 }
668 list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
669
670 out:
671 spin_unlock(&sdp->sd_rindex_spin);
672}
673
674/**
675 * forward_rgrp_get - get an rgrp to try next from full list
676 * @sdp: The GFS2 superblock
677 *
678 * Returns: The rgrp to try next
679 */
680
681static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
682{
683 struct gfs2_rgrpd *rgd;
684 unsigned int journals = gfs2_jindex_size(sdp);
685 unsigned int rg = 0, x;
686
687 spin_lock(&sdp->sd_rindex_spin);
688
689 rgd = sdp->sd_rindex_forward;
690 if (!rgd) {
691 if (sdp->sd_rgrps >= journals)
692 rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
693
694 for (x = 0, rgd = gfs2_rgrpd_get_first(sdp);
695 x < rg;
696 x++, rgd = gfs2_rgrpd_get_next(rgd))
697 /* Do Nothing */;
698
699 sdp->sd_rindex_forward = rgd;
700 }
701
702 spin_unlock(&sdp->sd_rindex_spin);
703
704 return rgd;
705}
706
707/**
708 * forward_rgrp_set - set the forward rgrp pointer
709 * @sdp: the filesystem
710 * @rgd: The new forward rgrp
711 *
712 */
713
714static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
715{
716 spin_lock(&sdp->sd_rindex_spin);
717 sdp->sd_rindex_forward = rgd;
718 spin_unlock(&sdp->sd_rindex_spin);
719}
720
721/**
722 * get_local_rgrp - Choose and lock a rgrp for allocation
723 * @ip: the inode to reserve space for
724 * @rgp: the chosen and locked rgrp
725 *
726 * Try to acquire rgrp in way which avoids contending with others.
727 *
728 * Returns: errno
729 */
730
731static int get_local_rgrp(struct gfs2_inode *ip)
732{
733 struct gfs2_sbd *sdp = ip->i_sbd;
734 struct gfs2_rgrpd *rgd, *begin = NULL;
735 struct gfs2_alloc *al = &ip->i_alloc;
736 int flags = LM_FLAG_TRY;
737 int skipped = 0;
738 int loops = 0;
739 int error;
740
741 /* Try recently successful rgrps */
742
743 rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
744
745 while (rgd) {
746 error = gfs2_glock_nq_init(rgd->rd_gl,
747 LM_ST_EXCLUSIVE, LM_FLAG_TRY,
748 &al->al_rgd_gh);
749 switch (error) {
750 case 0:
751 if (try_rgrp_fit(rgd, al))
752 goto out;
753 gfs2_glock_dq_uninit(&al->al_rgd_gh);
754 rgd = recent_rgrp_next(rgd, 1);
755 break;
756
757 case GLR_TRYFAILED:
758 rgd = recent_rgrp_next(rgd, 0);
759 break;
760
761 default:
762 return error;
763 }
764 }
765
766 /* Go through full list of rgrps */
767
768 begin = rgd = forward_rgrp_get(sdp);
769
770 for (;;) {
771 error = gfs2_glock_nq_init(rgd->rd_gl,
772 LM_ST_EXCLUSIVE, flags,
773 &al->al_rgd_gh);
774 switch (error) {
775 case 0:
776 if (try_rgrp_fit(rgd, al))
777 goto out;
778 gfs2_glock_dq_uninit(&al->al_rgd_gh);
779 break;
780
781 case GLR_TRYFAILED:
782 skipped++;
783 break;
784
785 default:
786 return error;
787 }
788
789 rgd = gfs2_rgrpd_get_next(rgd);
790 if (!rgd)
791 rgd = gfs2_rgrpd_get_first(sdp);
792
793 if (rgd == begin) {
794 if (++loops >= 2 || !skipped)
795 return -ENOSPC;
796 flags = 0;
797 }
798 }
799
800 out:
801 ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
802
803 if (begin) {
804 recent_rgrp_add(rgd);
805 rgd = gfs2_rgrpd_get_next(rgd);
806 if (!rgd)
807 rgd = gfs2_rgrpd_get_first(sdp);
808 forward_rgrp_set(sdp, rgd);
809 }
810
811 return 0;
812}
813
814/**
815 * gfs2_inplace_reserve_i - Reserve space in the filesystem
816 * @ip: the inode to reserve space for
817 *
818 * Returns: errno
819 */
820
821int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
822{
823 struct gfs2_sbd *sdp = ip->i_sbd;
824 struct gfs2_alloc *al = &ip->i_alloc;
825 int error;
826
827 if (gfs2_assert_warn(sdp, al->al_requested))
828 return -EINVAL;
829
830 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
831 if (error)
832 return error;
833
834 error = get_local_rgrp(ip);
835 if (error) {
836 gfs2_glock_dq_uninit(&al->al_ri_gh);
837 return error;
838 }
839
840 al->al_file = file;
841 al->al_line = line;
842
843 return 0;
844}
845
846/**
847 * gfs2_inplace_release - release an inplace reservation
848 * @ip: the inode the reservation was taken out on
849 *
850 * Release a reservation made by gfs2_inplace_reserve().
851 */
852
853void gfs2_inplace_release(struct gfs2_inode *ip)
854{
855 struct gfs2_sbd *sdp = ip->i_sbd;
856 struct gfs2_alloc *al = &ip->i_alloc;
857
858 if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
859 fs_warn(sdp, "al_alloced = %u, al_requested = %u "
860 "al_file = %s, al_line = %u\n",
861 al->al_alloced, al->al_requested, al->al_file,
862 al->al_line);
863
864 al->al_rgd = NULL;
865 gfs2_glock_dq_uninit(&al->al_rgd_gh);
866 gfs2_glock_dq_uninit(&al->al_ri_gh);
867}
868
869/**
870 * gfs2_get_block_type - Check a block in a RG is of given type
871 * @rgd: the resource group holding the block
872 * @block: the block number
873 *
874 * Returns: The block type (GFS2_BLKST_*)
875 */
876
877unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block)
878{
879 struct gfs2_bitmap *bi = NULL;
880 uint32_t length, rgrp_block, buf_block;
881 unsigned int buf;
882 unsigned char type;
883
884 length = rgd->rd_ri.ri_length;
885 rgrp_block = block - rgd->rd_ri.ri_data0;
886
887 for (buf = 0; buf < length; buf++) {
888 bi = rgd->rd_bits + buf;
889 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
890 break;
891 }
892
893 gfs2_assert(rgd->rd_sbd, buf < length);
894 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
895
896 type = gfs2_testbit(rgd,
897 bi->bi_bh->b_data + bi->bi_offset,
898 bi->bi_len, buf_block);
899
900 return type;
901}
902
903/**
904 * rgblk_search - find a block in @old_state, change allocation
905 * state to @new_state
906 * @rgd: the resource group descriptor
907 * @goal: the goal block within the RG (start here to search for avail block)
908 * @old_state: GFS2_BLKST_XXX the before-allocation state to find
909 * @new_state: GFS2_BLKST_XXX the after-allocation block state
910 *
911 * Walk rgrp's bitmap to find bits that represent a block in @old_state.
912 * Add the found bitmap buffer to the transaction.
913 * Set the found bits to @new_state to change block's allocation state.
914 *
915 * This function never fails, because we wouldn't call it unless we
916 * know (from reservation results, etc.) that a block is available.
917 *
918 * Scope of @goal and returned block is just within rgrp, not the whole
919 * filesystem.
920 *
921 * Returns: the block number allocated
922 */
923
924static uint32_t rgblk_search(struct gfs2_rgrpd *rgd, uint32_t goal,
925 unsigned char old_state, unsigned char new_state)
926{
927 struct gfs2_bitmap *bi = NULL;
928 uint32_t length = rgd->rd_ri.ri_length;
929 uint32_t blk = 0;
930 unsigned int buf, x;
931
932 /* Find bitmap block that contains bits for goal block */
933 for (buf = 0; buf < length; buf++) {
934 bi = rgd->rd_bits + buf;
935 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
936 break;
937 }
938
939 gfs2_assert(rgd->rd_sbd, buf < length);
940
941 /* Convert scope of "goal" from rgrp-wide to within found bit block */
942 goal -= bi->bi_start * GFS2_NBBY;
943
944 /* Search (up to entire) bitmap in this rgrp for allocatable block.
945 "x <= length", instead of "x < length", because we typically start
946 the search in the middle of a bit block, but if we can't find an
947 allocatable block anywhere else, we want to be able wrap around and
948 search in the first part of our first-searched bit block. */
949 for (x = 0; x <= length; x++) {
950 if (bi->bi_clone)
951 blk = gfs2_bitfit(rgd,
952 bi->bi_clone + bi->bi_offset,
953 bi->bi_len, goal, old_state);
954 else
955 blk = gfs2_bitfit(rgd,
956 bi->bi_bh->b_data + bi->bi_offset,
957 bi->bi_len, goal, old_state);
958 if (blk != BFITNOENT)
959 break;
960
961 /* Try next bitmap block (wrap back to rgrp header if at end) */
962 buf = (buf + 1) % length;
963 bi = rgd->rd_bits + buf;
964 goal = 0;
965 }
966
967 if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
968 blk = 0;
969
970 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh);
971 gfs2_setbit(rgd,
972 bi->bi_bh->b_data + bi->bi_offset,
973 bi->bi_len, blk, new_state);
974 if (bi->bi_clone)
975 gfs2_setbit(rgd,
976 bi->bi_clone + bi->bi_offset,
977 bi->bi_len, blk, new_state);
978
979 return bi->bi_start * GFS2_NBBY + blk;
980}
981
982/**
983 * rgblk_free - Change alloc state of given block(s)
984 * @sdp: the filesystem
985 * @bstart: the start of a run of blocks to free
986 * @blen: the length of the block run (all must lie within ONE RG!)
987 * @new_state: GFS2_BLKST_XXX the after-allocation block state
988 *
989 * Returns: Resource group containing the block(s)
990 */
991
992static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, uint64_t bstart,
993 uint32_t blen, unsigned char new_state)
994{
995 struct gfs2_rgrpd *rgd;
996 struct gfs2_bitmap *bi = NULL;
997 uint32_t length, rgrp_blk, buf_blk;
998 unsigned int buf;
999
1000 rgd = gfs2_blk2rgrpd(sdp, bstart);
1001 if (!rgd) {
1002 if (gfs2_consist(sdp))
1003 fs_err(sdp, "block = %llu\n", bstart);
1004 return NULL;
1005 }
1006
1007 length = rgd->rd_ri.ri_length;
1008
1009 rgrp_blk = bstart - rgd->rd_ri.ri_data0;
1010
1011 while (blen--) {
1012 for (buf = 0; buf < length; buf++) {
1013 bi = rgd->rd_bits + buf;
1014 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1015 break;
1016 }
1017
1018 gfs2_assert(rgd->rd_sbd, buf < length);
1019
1020 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1021 rgrp_blk++;
1022
1023 if (!bi->bi_clone) {
1024 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1025 GFP_KERNEL | __GFP_NOFAIL);
1026 memcpy(bi->bi_clone + bi->bi_offset,
1027 bi->bi_bh->b_data + bi->bi_offset,
1028 bi->bi_len);
1029 }
1030 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh);
1031 gfs2_setbit(rgd,
1032 bi->bi_bh->b_data + bi->bi_offset,
1033 bi->bi_len, buf_blk, new_state);
1034 }
1035
1036 return rgd;
1037}
1038
1039/**
1040 * gfs2_alloc_data - Allocate a data block
1041 * @ip: the inode to allocate the data block for
1042 *
1043 * Returns: the allocated block
1044 */
1045
1046uint64_t gfs2_alloc_data(struct gfs2_inode *ip)
1047{
1048 struct gfs2_sbd *sdp = ip->i_sbd;
1049 struct gfs2_alloc *al = &ip->i_alloc;
1050 struct gfs2_rgrpd *rgd = al->al_rgd;
1051 uint32_t goal, blk;
1052 uint64_t block;
1053
1054 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
1055 goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
1056 else
1057 goal = rgd->rd_last_alloc_data;
1058
1059 blk = rgblk_search(rgd, goal,
1060 GFS2_BLKST_FREE, GFS2_BLKST_USED);
1061 rgd->rd_last_alloc_data = blk;
1062
1063 block = rgd->rd_ri.ri_data0 + blk;
1064 ip->i_di.di_goal_data = block;
1065
1066 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1067 rgd->rd_rg.rg_free--;
1068
1069 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
1070 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1071
1072 al->al_alloced++;
1073
1074 gfs2_statfs_change(sdp, 0, -1, 0);
1075 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1076
1077 spin_lock(&sdp->sd_rindex_spin);
1078 rgd->rd_free_clone--;
1079 spin_unlock(&sdp->sd_rindex_spin);
1080
1081 return block;
1082}
1083
1084/**
1085 * gfs2_alloc_meta - Allocate a metadata block
1086 * @ip: the inode to allocate the metadata block for
1087 *
1088 * Returns: the allocated block
1089 */
1090
1091uint64_t gfs2_alloc_meta(struct gfs2_inode *ip)
1092{
1093 struct gfs2_sbd *sdp = ip->i_sbd;
1094 struct gfs2_alloc *al = &ip->i_alloc;
1095 struct gfs2_rgrpd *rgd = al->al_rgd;
1096 uint32_t goal, blk;
1097 uint64_t block;
1098
1099 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
1100 goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
1101 else
1102 goal = rgd->rd_last_alloc_meta;
1103
1104 blk = rgblk_search(rgd, goal,
1105 GFS2_BLKST_FREE, GFS2_BLKST_USED);
1106 rgd->rd_last_alloc_meta = blk;
1107
1108 block = rgd->rd_ri.ri_data0 + blk;
1109 ip->i_di.di_goal_meta = block;
1110
1111 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1112 rgd->rd_rg.rg_free--;
1113
1114 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
1115 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1116
1117 al->al_alloced++;
1118
1119 gfs2_statfs_change(sdp, 0, -1, 0);
1120 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1121 gfs2_trans_add_unrevoke(sdp, block);
1122
1123 spin_lock(&sdp->sd_rindex_spin);
1124 rgd->rd_free_clone--;
1125 spin_unlock(&sdp->sd_rindex_spin);
1126
1127 return block;
1128}
1129
1130/**
1131 * gfs2_alloc_di - Allocate a dinode
1132 * @dip: the directory that the inode is going in
1133 *
1134 * Returns: the block allocated
1135 */
1136
1137uint64_t gfs2_alloc_di(struct gfs2_inode *dip)
1138{
1139 struct gfs2_sbd *sdp = dip->i_sbd;
1140 struct gfs2_alloc *al = &dip->i_alloc;
1141 struct gfs2_rgrpd *rgd = al->al_rgd;
1142 uint32_t blk;
1143 uint64_t block;
1144
1145 blk = rgblk_search(rgd, rgd->rd_last_alloc_meta,
1146 GFS2_BLKST_FREE, GFS2_BLKST_DINODE);
1147
1148 rgd->rd_last_alloc_meta = blk;
1149
1150 block = rgd->rd_ri.ri_data0 + blk;
1151
1152 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1153 rgd->rd_rg.rg_free--;
1154 rgd->rd_rg.rg_dinodes++;
1155
1156 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
1157 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1158
1159 al->al_alloced++;
1160
1161 gfs2_statfs_change(sdp, 0, -1, +1);
1162 gfs2_trans_add_unrevoke(sdp, block);
1163
1164 spin_lock(&sdp->sd_rindex_spin);
1165 rgd->rd_free_clone--;
1166 spin_unlock(&sdp->sd_rindex_spin);
1167
1168 return block;
1169}
1170
1171/**
1172 * gfs2_free_data - free a contiguous run of data block(s)
1173 * @ip: the inode these blocks are being freed from
1174 * @bstart: first block of a run of contiguous blocks
1175 * @blen: the length of the block run
1176 *
1177 */
1178
1179void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
1180{
1181 struct gfs2_sbd *sdp = ip->i_sbd;
1182 struct gfs2_rgrpd *rgd;
1183
1184 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1185 if (!rgd)
1186 return;
1187
1188 rgd->rd_rg.rg_free += blen;
1189
1190 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
1191 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1192
1193 gfs2_trans_add_rg(rgd);
1194
1195 gfs2_statfs_change(sdp, 0, +blen, 0);
1196 gfs2_quota_change(ip, -(int64_t)blen,
1197 ip->i_di.di_uid, ip->i_di.di_gid);
1198}
1199
1200/**
1201 * gfs2_free_meta - free a contiguous run of data block(s)
1202 * @ip: the inode these blocks are being freed from
1203 * @bstart: first block of a run of contiguous blocks
1204 * @blen: the length of the block run
1205 *
1206 */
1207
1208void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
1209{
1210 struct gfs2_sbd *sdp = ip->i_sbd;
1211 struct gfs2_rgrpd *rgd;
1212
1213 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1214 if (!rgd)
1215 return;
1216
1217 rgd->rd_rg.rg_free += blen;
1218
1219 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
1220 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1221
1222 gfs2_trans_add_rg(rgd);
1223
1224 gfs2_statfs_change(sdp, 0, +blen, 0);
1225 gfs2_quota_change(ip, -(int64_t)blen,
1226 ip->i_di.di_uid, ip->i_di.di_gid);
1227 gfs2_meta_wipe(ip, bstart, blen);
1228}
1229
1230void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno)
1231{
1232 struct gfs2_sbd *sdp = rgd->rd_sbd;
1233 struct gfs2_rgrpd *tmp_rgd;
1234
1235 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1236 if (!tmp_rgd)
1237 return;
1238 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1239
1240 if (!rgd->rd_rg.rg_dinodes)
1241 gfs2_consist_rgrpd(rgd);
1242 rgd->rd_rg.rg_dinodes--;
1243 rgd->rd_rg.rg_free++;
1244
1245 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
1246 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1247
1248 gfs2_statfs_change(sdp, 0, +1, -1);
1249 gfs2_trans_add_rg(rgd);
1250}
1251
1252/**
1253 * gfs2_free_uninit_di - free a dinode block
1254 * @rgd: the resource group that contains the dinode
1255 * @ip: the inode
1256 *
1257 */
1258
1259void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1260{
1261 gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
1262 gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
1263 gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
1264}
1265
1266/**
1267 * gfs2_rlist_add - add a RG to a list of RGs
1268 * @sdp: the filesystem
1269 * @rlist: the list of resource groups
1270 * @block: the block
1271 *
1272 * Figure out what RG a block belongs to and add that RG to the list
1273 *
1274 * FIXME: Don't use NOFAIL
1275 *
1276 */
1277
1278void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1279 uint64_t block)
1280{
1281 struct gfs2_rgrpd *rgd;
1282 struct gfs2_rgrpd **tmp;
1283 unsigned int new_space;
1284 unsigned int x;
1285
1286 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1287 return;
1288
1289 rgd = gfs2_blk2rgrpd(sdp, block);
1290 if (!rgd) {
1291 if (gfs2_consist(sdp))
1292 fs_err(sdp, "block = %llu\n", block);
1293 return;
1294 }
1295
1296 for (x = 0; x < rlist->rl_rgrps; x++)
1297 if (rlist->rl_rgd[x] == rgd)
1298 return;
1299
1300 if (rlist->rl_rgrps == rlist->rl_space) {
1301 new_space = rlist->rl_space + 10;
1302
1303 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1304 GFP_KERNEL | __GFP_NOFAIL);
1305
1306 if (rlist->rl_rgd) {
1307 memcpy(tmp, rlist->rl_rgd,
1308 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1309 kfree(rlist->rl_rgd);
1310 }
1311
1312 rlist->rl_space = new_space;
1313 rlist->rl_rgd = tmp;
1314 }
1315
1316 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1317}
1318
1319/**
1320 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1321 * and initialize an array of glock holders for them
1322 * @rlist: the list of resource groups
1323 * @state: the lock state to acquire the RG lock in
1324 * @flags: the modifier flags for the holder structures
1325 *
1326 * FIXME: Don't use NOFAIL
1327 *
1328 */
1329
1330void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
1331 int flags)
1332{
1333 unsigned int x;
1334
1335 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1336 GFP_KERNEL | __GFP_NOFAIL);
1337 for (x = 0; x < rlist->rl_rgrps; x++)
1338 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1339 state, flags,
1340 &rlist->rl_ghs[x]);
1341}
1342
1343/**
1344 * gfs2_rlist_free - free a resource group list
1345 * @list: the list of resource groups
1346 *
1347 */
1348
1349void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1350{
1351 unsigned int x;
1352
1353 kfree(rlist->rl_rgd);
1354
1355 if (rlist->rl_ghs) {
1356 for (x = 0; x < rlist->rl_rgrps; x++)
1357 gfs2_holder_uninit(&rlist->rl_ghs[x]);
1358 kfree(rlist->rl_ghs);
1359 }
1360}
1361
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
new file mode 100644
index 000000000000..4c44a191b1c1
--- /dev/null
+++ b/fs/gfs2/rgrp.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RGRP_DOT_H__
11#define __RGRP_DOT_H__
12
13void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
14
15struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);
16struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
17struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
18
19void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
20int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
21
22int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
23void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
24void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
25
26void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
27
28struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
29void gfs2_alloc_put(struct gfs2_inode *ip);
30
31int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
32 char *file, unsigned int line);
33#define gfs2_inplace_reserve(ip) \
34gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
35
36void gfs2_inplace_release(struct gfs2_inode *ip);
37
38unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block);
39
40uint64_t gfs2_alloc_data(struct gfs2_inode *ip);
41uint64_t gfs2_alloc_meta(struct gfs2_inode *ip);
42uint64_t gfs2_alloc_di(struct gfs2_inode *ip);
43
44void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
45void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
46void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno);
47void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
48
49struct gfs2_rgrp_list {
50 unsigned int rl_rgrps;
51 unsigned int rl_space;
52 struct gfs2_rgrpd **rl_rgd;
53 struct gfs2_holder *rl_ghs;
54};
55
56void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
57 uint64_t block);
58void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
59 int flags);
60void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
61
62#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
new file mode 100644
index 000000000000..d37f203e133b
--- /dev/null
+++ b/fs/gfs2/super.c
@@ -0,0 +1,944 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "dir.h"
20#include "format.h"
21#include "glock.h"
22#include "glops.h"
23#include "inode.h"
24#include "log.h"
25#include "meta_io.h"
26#include "quota.h"
27#include "recovery.h"
28#include "rgrp.h"
29#include "super.h"
30#include "trans.h"
31#include "unlinked.h"
32
33/**
34 * gfs2_tune_init - Fill a gfs2_tune structure with default values
35 * @gt: tune
36 *
37 */
38
39void gfs2_tune_init(struct gfs2_tune *gt)
40{
41 spin_lock_init(&gt->gt_spin);
42
43 gt->gt_ilimit = 100;
44 gt->gt_ilimit_tries = 3;
45 gt->gt_ilimit_min = 1;
46 gt->gt_demote_secs = 300;
47 gt->gt_incore_log_blocks = 1024;
48 gt->gt_log_flush_secs = 60;
49 gt->gt_jindex_refresh_secs = 60;
50 gt->gt_scand_secs = 15;
51 gt->gt_recoverd_secs = 60;
52 gt->gt_logd_secs = 1;
53 gt->gt_quotad_secs = 5;
54 gt->gt_inoded_secs = 15;
55 gt->gt_quota_simul_sync = 64;
56 gt->gt_quota_warn_period = 10;
57 gt->gt_quota_scale_num = 1;
58 gt->gt_quota_scale_den = 1;
59 gt->gt_quota_cache_secs = 300;
60 gt->gt_quota_quantum = 60;
61 gt->gt_atime_quantum = 3600;
62 gt->gt_new_files_jdata = 0;
63 gt->gt_new_files_directio = 0;
64 gt->gt_max_atomic_write = 4 << 20;
65 gt->gt_max_readahead = 1 << 18;
66 gt->gt_lockdump_size = 131072;
67 gt->gt_stall_secs = 600;
68 gt->gt_complain_secs = 10;
69 gt->gt_reclaim_limit = 5000;
70 gt->gt_entries_per_readdir = 32;
71 gt->gt_prefetch_secs = 10;
72 gt->gt_greedy_default = HZ / 10;
73 gt->gt_greedy_quantum = HZ / 40;
74 gt->gt_greedy_max = HZ / 4;
75 gt->gt_statfs_quantum = 30;
76 gt->gt_statfs_slow = 0;
77}
78
79/**
80 * gfs2_check_sb - Check superblock
81 * @sdp: the filesystem
82 * @sb: The superblock
83 * @silent: Don't print a message if the check fails
84 *
85 * Checks the version code of the FS is one that we understand how to
86 * read and that the sizes of the various on-disk structures have not
87 * changed.
88 */
89
90int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
91{
92 unsigned int x;
93
94 if (sb->sb_header.mh_magic != GFS2_MAGIC ||
95 sb->sb_header.mh_type != GFS2_METATYPE_SB) {
96 if (!silent)
97 printk("GFS2: not a GFS2 filesystem\n");
98 return -EINVAL;
99 }
100
101 /* If format numbers match exactly, we're done. */
102
103 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
104 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
105 return 0;
106
107 if (sb->sb_fs_format != GFS2_FORMAT_FS) {
108 for (x = 0; gfs2_old_fs_formats[x]; x++)
109 if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
110 break;
111
112 if (!gfs2_old_fs_formats[x]) {
113 printk("GFS2: code version (%u, %u) is incompatible "
114 "with ondisk format (%u, %u)\n",
115 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
116 sb->sb_fs_format, sb->sb_multihost_format);
117 printk("GFS2: I don't know how to upgrade this FS\n");
118 return -EINVAL;
119 }
120 }
121
122 if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
123 for (x = 0; gfs2_old_multihost_formats[x]; x++)
124 if (gfs2_old_multihost_formats[x] == sb->sb_multihost_format)
125 break;
126
127 if (!gfs2_old_multihost_formats[x]) {
128 printk("GFS2: code version (%u, %u) is incompatible "
129 "with ondisk format (%u, %u)\n",
130 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
131 sb->sb_fs_format, sb->sb_multihost_format);
132 printk("GFS2: I don't know how to upgrade this FS\n");
133 return -EINVAL;
134 }
135 }
136
137 if (!sdp->sd_args.ar_upgrade) {
138 printk("GFS2: code version (%u, %u) is incompatible "
139 "with ondisk format (%u, %u)\n",
140 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
141 sb->sb_fs_format, sb->sb_multihost_format);
142 printk("GFS2: Use the \"upgrade\" mount option to upgrade "
143 "the FS\n");
144 printk("GFS2: See the manual for more details\n");
145 return -EINVAL;
146 }
147
148 return 0;
149}
150
151/**
152 * gfs2_read_sb - Read super block
153 * @sdp: The GFS2 superblock
154 * @gl: the glock for the superblock (assumed to be held)
155 * @silent: Don't print message if mount fails
156 *
157 */
158
159int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
160{
161 struct buffer_head *bh;
162 uint32_t hash_blocks, ind_blocks, leaf_blocks;
163 uint32_t tmp_blocks;
164 unsigned int x;
165 int error;
166
167 error = gfs2_meta_read(gl, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift,
168 DIO_FORCE | DIO_START | DIO_WAIT, &bh);
169 if (error) {
170 if (!silent)
171 fs_err(sdp, "can't read superblock\n");
172 return error;
173 }
174
175 gfs2_assert(sdp, sizeof(struct gfs2_sb) <= bh->b_size);
176 gfs2_sb_in(&sdp->sd_sb, bh->b_data);
177 brelse(bh);
178
179 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
180 if (error)
181 return error;
182
183 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
184 GFS2_BASIC_BLOCK_SHIFT;
185 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
186 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
187 sizeof(struct gfs2_dinode)) / sizeof(uint64_t);
188 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
189 sizeof(struct gfs2_meta_header)) / sizeof(uint64_t);
190 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
191 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
192 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
193 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t);
194 sdp->sd_ut_per_block = (sdp->sd_sb.sb_bsize -
195 sizeof(struct gfs2_meta_header)) /
196 sizeof(struct gfs2_unlinked_tag);
197 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
198 sizeof(struct gfs2_meta_header)) /
199 sizeof(struct gfs2_quota_change);
200
201 /* Compute maximum reservation required to add a entry to a directory */
202
203 hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS2_DIR_MAX_DEPTH),
204 sdp->sd_jbsize);
205
206 ind_blocks = 0;
207 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
208 tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs);
209 ind_blocks += tmp_blocks;
210 }
211
212 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
213
214 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
215
216 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
217 sizeof(struct gfs2_dinode);
218 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
219 for (x = 2;; x++) {
220 uint64_t space, d;
221 uint32_t m;
222
223 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
224 d = space;
225 m = do_div(d, sdp->sd_inptrs);
226
227 if (d != sdp->sd_heightsize[x - 1] || m)
228 break;
229 sdp->sd_heightsize[x] = space;
230 }
231 sdp->sd_max_height = x;
232 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
233
234 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
235 sizeof(struct gfs2_dinode);
236 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
237 for (x = 2;; x++) {
238 uint64_t space, d;
239 uint32_t m;
240
241 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
242 d = space;
243 m = do_div(d, sdp->sd_inptrs);
244
245 if (d != sdp->sd_jheightsize[x - 1] || m)
246 break;
247 sdp->sd_jheightsize[x] = space;
248 }
249 sdp->sd_max_jheight = x;
250 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
251
252 return 0;
253}
254
255int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *sb_gl)
256{
257 return 0;
258}
259
260/**
261 * gfs2_jindex_hold - Grab a lock on the jindex
262 * @sdp: The GFS2 superblock
263 * @ji_gh: the holder for the jindex glock
264 *
265 * This is very similar to the gfs2_rindex_hold() function, except that
266 * in general we hold the jindex lock for longer periods of time and
267 * we grab it far less frequently (in general) then the rgrp lock.
268 *
269 * Returns: errno
270 */
271
272int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
273{
274 struct gfs2_inode *dip = sdp->sd_jindex;
275 struct qstr name;
276 char buf[20];
277 struct gfs2_jdesc *jd;
278 int error;
279
280 name.name = buf;
281
282 down(&sdp->sd_jindex_mutex);
283
284 for (;;) {
285 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
286 GL_LOCAL_EXCL, ji_gh);
287 if (error)
288 break;
289
290 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
291
292 error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
293 if (error == -ENOENT) {
294 error = 0;
295 break;
296 }
297
298 gfs2_glock_dq_uninit(ji_gh);
299
300 if (error)
301 break;
302
303 error = -ENOMEM;
304 jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
305 if (!jd)
306 break;
307
308 error = gfs2_lookupi(dip, &name, 1, &jd->jd_inode);
309 if (error) {
310 kfree(jd);
311 break;
312 }
313
314 spin_lock(&sdp->sd_jindex_spin);
315 jd->jd_jid = sdp->sd_journals++;
316 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
317 spin_unlock(&sdp->sd_jindex_spin);
318 }
319
320 up(&sdp->sd_jindex_mutex);
321
322 return error;
323}
324
325/**
326 * gfs2_jindex_free - Clear all the journal index information
327 * @sdp: The GFS2 superblock
328 *
329 */
330
331void gfs2_jindex_free(struct gfs2_sbd *sdp)
332{
333 struct list_head list;
334 struct gfs2_jdesc *jd;
335
336 spin_lock(&sdp->sd_jindex_spin);
337 list_add(&list, &sdp->sd_jindex_list);
338 list_del_init(&sdp->sd_jindex_list);
339 sdp->sd_journals = 0;
340 spin_unlock(&sdp->sd_jindex_spin);
341
342 while (!list_empty(&list)) {
343 jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
344 list_del(&jd->jd_list);
345 gfs2_inode_put(jd->jd_inode);
346 kfree(jd);
347 }
348}
349
350static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
351{
352 struct gfs2_jdesc *jd;
353 int found = 0;
354
355 list_for_each_entry(jd, head, jd_list) {
356 if (jd->jd_jid == jid) {
357 found = 1;
358 break;
359 }
360 }
361
362 if (!found)
363 jd = NULL;
364
365 return jd;
366}
367
368struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
369{
370 struct gfs2_jdesc *jd;
371
372 spin_lock(&sdp->sd_jindex_spin);
373 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
374 spin_unlock(&sdp->sd_jindex_spin);
375
376 return jd;
377}
378
379void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
380{
381 struct gfs2_jdesc *jd;
382
383 spin_lock(&sdp->sd_jindex_spin);
384 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
385 if (jd)
386 jd->jd_dirty = 1;
387 spin_unlock(&sdp->sd_jindex_spin);
388}
389
390struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
391{
392 struct gfs2_jdesc *jd;
393 int found = 0;
394
395 spin_lock(&sdp->sd_jindex_spin);
396
397 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
398 if (jd->jd_dirty) {
399 jd->jd_dirty = 0;
400 found = 1;
401 break;
402 }
403 }
404 spin_unlock(&sdp->sd_jindex_spin);
405
406 if (!found)
407 jd = NULL;
408
409 return jd;
410}
411
412int gfs2_jdesc_check(struct gfs2_jdesc *jd)
413{
414 struct gfs2_inode *ip = jd->jd_inode;
415 struct gfs2_sbd *sdp = ip->i_sbd;
416 int ar;
417 int error;
418
419 if (ip->i_di.di_size < (8 << 20) ||
420 ip->i_di.di_size > (1 << 30) ||
421 (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
422 gfs2_consist_inode(ip);
423 return -EIO;
424 }
425 jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
426
427 error = gfs2_write_alloc_required(ip,
428 0, ip->i_di.di_size,
429 &ar);
430 if (!error && ar) {
431 gfs2_consist_inode(ip);
432 error = -EIO;
433 }
434
435 return error;
436}
437
438int gfs2_lookup_master_dir(struct gfs2_sbd *sdp)
439{
440 struct gfs2_glock *gl;
441 int error;
442
443 error = gfs2_glock_get(sdp,
444 sdp->sd_sb.sb_master_dir.no_addr,
445 &gfs2_inode_glops, CREATE, &gl);
446 if (!error) {
447 error = gfs2_inode_get(gl, &sdp->sd_sb.sb_master_dir, CREATE,
448 &sdp->sd_master_dir);
449 gfs2_glock_put(gl);
450 }
451
452 return error;
453}
454
455/**
456 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
457 * @sdp: the filesystem
458 *
459 * Returns: errno
460 */
461
462int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
463{
464 struct gfs2_glock *j_gl = sdp->sd_jdesc->jd_inode->i_gl;
465 struct gfs2_holder t_gh;
466 struct gfs2_log_header head;
467 int error;
468
469 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
470 GL_LOCAL_EXCL | GL_NEVER_RECURSE, &t_gh);
471 if (error)
472 return error;
473
474 gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode);
475 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
476
477 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
478 if (error)
479 goto fail;
480
481 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
482 gfs2_consist(sdp);
483 error = -EIO;
484 goto fail;
485 }
486
487 /* Initialize some head of the log stuff */
488 sdp->sd_log_sequence = head.lh_sequence + 1;
489 gfs2_log_pointers_init(sdp, head.lh_blkno);
490
491 error = gfs2_unlinked_init(sdp);
492 if (error)
493 goto fail;
494 error = gfs2_quota_init(sdp);
495 if (error)
496 goto fail_unlinked;
497
498 set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
499
500 gfs2_glock_dq_uninit(&t_gh);
501
502 return 0;
503
504 fail_unlinked:
505 gfs2_unlinked_cleanup(sdp);
506
507 fail:
508 t_gh.gh_flags |= GL_NOCACHE;
509 gfs2_glock_dq_uninit(&t_gh);
510
511 return error;
512}
513
514/**
515 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
516 * @sdp: the filesystem
517 *
518 * Returns: errno
519 */
520
521int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
522{
523 struct gfs2_holder t_gh;
524 int error;
525
526 gfs2_unlinked_dealloc(sdp);
527 gfs2_quota_sync(sdp);
528 gfs2_statfs_sync(sdp);
529
530 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
531 GL_LOCAL_EXCL | GL_NEVER_RECURSE | GL_NOCACHE,
532 &t_gh);
533 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
534 return error;
535
536 gfs2_meta_syncfs(sdp);
537 gfs2_log_shutdown(sdp);
538
539 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
540
541 if (t_gh.gh_gl)
542 gfs2_glock_dq_uninit(&t_gh);
543
544 gfs2_unlinked_cleanup(sdp);
545 gfs2_quota_cleanup(sdp);
546
547 return error;
548}
549
550int gfs2_statfs_init(struct gfs2_sbd *sdp)
551{
552 struct gfs2_inode *m_ip = sdp->sd_statfs_inode;
553 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
554 struct gfs2_inode *l_ip = sdp->sd_sc_inode;
555 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
556 struct buffer_head *m_bh, *l_bh;
557 struct gfs2_holder gh;
558 int error;
559
560 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
561 &gh);
562 if (error)
563 return error;
564
565 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
566 if (error)
567 goto out;
568
569 if (sdp->sd_args.ar_spectator) {
570 spin_lock(&sdp->sd_statfs_spin);
571 gfs2_statfs_change_in(m_sc, m_bh->b_data +
572 sizeof(struct gfs2_dinode));
573 spin_unlock(&sdp->sd_statfs_spin);
574 } else {
575 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
576 if (error)
577 goto out_m_bh;
578
579 spin_lock(&sdp->sd_statfs_spin);
580 gfs2_statfs_change_in(m_sc, m_bh->b_data +
581 sizeof(struct gfs2_dinode));
582 gfs2_statfs_change_in(l_sc, l_bh->b_data +
583 sizeof(struct gfs2_dinode));
584 spin_unlock(&sdp->sd_statfs_spin);
585
586 brelse(l_bh);
587 }
588
589 out_m_bh:
590 brelse(m_bh);
591
592 out:
593 gfs2_glock_dq_uninit(&gh);
594
595 return 0;
596}
597
598void gfs2_statfs_change(struct gfs2_sbd *sdp, int64_t total, int64_t free,
599 int64_t dinodes)
600{
601 struct gfs2_inode *l_ip = sdp->sd_sc_inode;
602 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
603 struct buffer_head *l_bh;
604 int error;
605
606 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
607 if (error)
608 return;
609
610 down(&sdp->sd_statfs_mutex);
611 gfs2_trans_add_bh(l_ip->i_gl, l_bh);
612 up(&sdp->sd_statfs_mutex);
613
614 spin_lock(&sdp->sd_statfs_spin);
615 l_sc->sc_total += total;
616 l_sc->sc_free += free;
617 l_sc->sc_dinodes += dinodes;
618 gfs2_statfs_change_out(l_sc, l_bh->b_data +
619 sizeof(struct gfs2_dinode));
620 spin_unlock(&sdp->sd_statfs_spin);
621
622 brelse(l_bh);
623}
624
625int gfs2_statfs_sync(struct gfs2_sbd *sdp)
626{
627 struct gfs2_inode *m_ip = sdp->sd_statfs_inode;
628 struct gfs2_inode *l_ip = sdp->sd_sc_inode;
629 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
630 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
631 struct gfs2_holder gh;
632 struct buffer_head *m_bh, *l_bh;
633 int error;
634
635 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
636 &gh);
637 if (error)
638 return error;
639
640 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
641 if (error)
642 goto out;
643
644 spin_lock(&sdp->sd_statfs_spin);
645 gfs2_statfs_change_in(m_sc, m_bh->b_data +
646 sizeof(struct gfs2_dinode));
647 if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
648 spin_unlock(&sdp->sd_statfs_spin);
649 goto out_bh;
650 }
651 spin_unlock(&sdp->sd_statfs_spin);
652
653 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
654 if (error)
655 goto out_bh;
656
657 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
658 if (error)
659 goto out_bh2;
660
661 down(&sdp->sd_statfs_mutex);
662 gfs2_trans_add_bh(l_ip->i_gl, l_bh);
663 up(&sdp->sd_statfs_mutex);
664
665 spin_lock(&sdp->sd_statfs_spin);
666 m_sc->sc_total += l_sc->sc_total;
667 m_sc->sc_free += l_sc->sc_free;
668 m_sc->sc_dinodes += l_sc->sc_dinodes;
669 memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
670 memset(l_bh->b_data + sizeof(struct gfs2_dinode),
671 0, sizeof(struct gfs2_statfs_change));
672 spin_unlock(&sdp->sd_statfs_spin);
673
674 gfs2_trans_add_bh(m_ip->i_gl, m_bh);
675 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
676
677 gfs2_trans_end(sdp);
678
679 out_bh2:
680 brelse(l_bh);
681
682 out_bh:
683 brelse(m_bh);
684
685 out:
686 gfs2_glock_dq_uninit(&gh);
687
688 return error;
689}
690
691/**
692 * gfs2_statfs_i - Do a statfs
693 * @sdp: the filesystem
694 * @sg: the sg structure
695 *
696 * Returns: errno
697 */
698
699int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
700{
701 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
702 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
703
704 spin_lock(&sdp->sd_statfs_spin);
705
706 *sc = *m_sc;
707 sc->sc_total += l_sc->sc_total;
708 sc->sc_free += l_sc->sc_free;
709 sc->sc_dinodes += l_sc->sc_dinodes;
710
711 spin_unlock(&sdp->sd_statfs_spin);
712
713 if (sc->sc_free < 0)
714 sc->sc_free = 0;
715 if (sc->sc_free > sc->sc_total)
716 sc->sc_free = sc->sc_total;
717 if (sc->sc_dinodes < 0)
718 sc->sc_dinodes = 0;
719
720 return 0;
721}
722
723/**
724 * statfs_fill - fill in the sg for a given RG
725 * @rgd: the RG
726 * @sc: the sc structure
727 *
728 * Returns: 0 on success, -ESTALE if the LVB is invalid
729 */
730
731static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
732 struct gfs2_statfs_change *sc)
733{
734 gfs2_rgrp_verify(rgd);
735 sc->sc_total += rgd->rd_ri.ri_data;
736 sc->sc_free += rgd->rd_rg.rg_free;
737 sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
738 return 0;
739}
740
741/**
742 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
743 * @sdp: the filesystem
744 * @sc: the sc info that will be returned
745 *
746 * Any error (other than a signal) will cause this routine to fall back
747 * to the synchronous version.
748 *
749 * FIXME: This really shouldn't busy wait like this.
750 *
751 * Returns: errno
752 */
753
754int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
755{
756 struct gfs2_holder ri_gh;
757 struct gfs2_rgrpd *rgd_next;
758 struct gfs2_holder *gha, *gh;
759 unsigned int slots = 64;
760 unsigned int x;
761 int done;
762 int error = 0, err;
763
764 memset(sc, 0, sizeof(struct gfs2_statfs_change));
765 gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
766 if (!gha)
767 return -ENOMEM;
768
769 error = gfs2_rindex_hold(sdp, &ri_gh);
770 if (error)
771 goto out;
772
773 rgd_next = gfs2_rgrpd_get_first(sdp);
774
775 for (;;) {
776 done = 1;
777
778 for (x = 0; x < slots; x++) {
779 gh = gha + x;
780
781 if (gh->gh_gl && gfs2_glock_poll(gh)) {
782 err = gfs2_glock_wait(gh);
783 if (err) {
784 gfs2_holder_uninit(gh);
785 error = err;
786 } else {
787 if (!error)
788 error = statfs_slow_fill(get_gl2rgd(gh->gh_gl), sc);
789 gfs2_glock_dq_uninit(gh);
790 }
791 }
792
793 if (gh->gh_gl)
794 done = 0;
795 else if (rgd_next && !error) {
796 error = gfs2_glock_nq_init(rgd_next->rd_gl,
797 LM_ST_SHARED,
798 GL_ASYNC,
799 gh);
800 rgd_next = gfs2_rgrpd_get_next(rgd_next);
801 done = 0;
802 }
803
804 if (signal_pending(current))
805 error = -ERESTARTSYS;
806 }
807
808 if (done)
809 break;
810
811 yield();
812 }
813
814 gfs2_glock_dq_uninit(&ri_gh);
815
816 out:
817 kfree(gha);
818
819 return error;
820}
821
822struct lfcc {
823 struct list_head list;
824 struct gfs2_holder gh;
825};
826
827/**
828 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
829 * journals are clean
830 * @sdp: the file system
831 * @state: the state to put the transaction lock into
832 * @t_gh: the hold on the transaction lock
833 *
834 * Returns: errno
835 */
836
837int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh)
838{
839 struct gfs2_holder ji_gh;
840 struct gfs2_jdesc *jd;
841 struct lfcc *lfcc;
842 LIST_HEAD(list);
843 struct gfs2_log_header lh;
844 int error;
845
846 error = gfs2_jindex_hold(sdp, &ji_gh);
847 if (error)
848 return error;
849
850 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
851 lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
852 if (!lfcc) {
853 error = -ENOMEM;
854 goto out;
855 }
856 error = gfs2_glock_nq_init(jd->jd_inode->i_gl, LM_ST_SHARED, 0,
857 &lfcc->gh);
858 if (error) {
859 kfree(lfcc);
860 goto out;
861 }
862 list_add(&lfcc->list, &list);
863 }
864
865 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
866 LM_FLAG_PRIORITY | GL_NEVER_RECURSE | GL_NOCACHE,
867 t_gh);
868
869 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
870 error = gfs2_jdesc_check(jd);
871 if (error)
872 break;
873 error = gfs2_find_jhead(jd, &lh);
874 if (error)
875 break;
876 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
877 error = -EBUSY;
878 break;
879 }
880 }
881
882 if (error)
883 gfs2_glock_dq_uninit(t_gh);
884
885 out:
886 while (!list_empty(&list)) {
887 lfcc = list_entry(list.next, struct lfcc, list);
888 list_del(&lfcc->list);
889 gfs2_glock_dq_uninit(&lfcc->gh);
890 kfree(lfcc);
891 }
892 gfs2_glock_dq_uninit(&ji_gh);
893
894 return error;
895}
896
897/**
898 * gfs2_freeze_fs - freezes the file system
899 * @sdp: the file system
900 *
901 * This function flushes data and meta data for all machines by
902 * aquiring the transaction log exclusively. All journals are
903 * ensured to be in a clean state as well.
904 *
905 * Returns: errno
906 */
907
908int gfs2_freeze_fs(struct gfs2_sbd *sdp)
909{
910 int error = 0;
911
912 down(&sdp->sd_freeze_lock);
913
914 if (!sdp->sd_freeze_count++) {
915 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
916 if (error)
917 sdp->sd_freeze_count--;
918 }
919
920 up(&sdp->sd_freeze_lock);
921
922 return error;
923}
924
925/**
926 * gfs2_unfreeze_fs - unfreezes the file system
927 * @sdp: the file system
928 *
929 * This function allows the file system to proceed by unlocking
930 * the exclusively held transaction lock. Other GFS2 nodes are
931 * now free to acquire the lock shared and go on with their lives.
932 *
933 */
934
935void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
936{
937 down(&sdp->sd_freeze_lock);
938
939 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
940 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
941
942 up(&sdp->sd_freeze_lock);
943}
944
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
new file mode 100644
index 000000000000..cc1a3df1949a
--- /dev/null
+++ b/fs/gfs2/super.h
@@ -0,0 +1,55 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __SUPER_DOT_H__
11#define __SUPER_DOT_H__
12
13void gfs2_tune_init(struct gfs2_tune *gt);
14
15int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
16int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
17int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *gl_sb);
18
19static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
20{
21 unsigned int x;
22 spin_lock(&sdp->sd_jindex_spin);
23 x = sdp->sd_journals;
24 spin_unlock(&sdp->sd_jindex_spin);
25 return x;
26}
27
28int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
29void gfs2_jindex_free(struct gfs2_sbd *sdp);
30
31struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
32void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
33struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
34int gfs2_jdesc_check(struct gfs2_jdesc *jd);
35
36int gfs2_lookup_master_dir(struct gfs2_sbd *sdp);
37int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
38 struct gfs2_inode **ipp);
39
40int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
41int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
42
43int gfs2_statfs_init(struct gfs2_sbd *sdp);
44void gfs2_statfs_change(struct gfs2_sbd *sdp,
45 int64_t total, int64_t free, int64_t dinodes);
46int gfs2_statfs_sync(struct gfs2_sbd *sdp);
47int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
48int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
49
50int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh);
51int gfs2_freeze_fs(struct gfs2_sbd *sdp);
52void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
53
54#endif /* __SUPER_DOT_H__ */
55
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
new file mode 100644
index 000000000000..75e9a3231b8f
--- /dev/null
+++ b/fs/gfs2/sys.c
@@ -0,0 +1,640 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/kobject.h>
17#include <asm/semaphore.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "lm.h"
22#include "sys.h"
23#include "super.h"
24#include "glock.h"
25#include "quota.h"
26
27char *gfs2_sys_margs;
28spinlock_t gfs2_sys_margs_lock;
29
30static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
31{
32 return sprintf(buf, "%s\n", sdp->sd_vfs->s_id);
33}
34
35static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
36{
37 return sprintf(buf, "%s\n", sdp->sd_fsname);
38}
39
40static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
41{
42 unsigned int count;
43
44 down(&sdp->sd_freeze_lock);
45 count = sdp->sd_freeze_count;
46 up(&sdp->sd_freeze_lock);
47
48 return sprintf(buf, "%u\n", count);
49}
50
51static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
52{
53 ssize_t ret = len;
54 int error = 0;
55 int n = simple_strtol(buf, NULL, 0);
56
57 if (!capable(CAP_SYS_ADMIN))
58 return -EACCES;
59
60 switch (n) {
61 case 0:
62 gfs2_unfreeze_fs(sdp);
63 break;
64 case 1:
65 error = gfs2_freeze_fs(sdp);
66 break;
67 default:
68 ret = -EINVAL;
69 }
70
71 if (error)
72 fs_warn(sdp, "freeze %d error %d", n, error);
73
74 return ret;
75}
76
77static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
78{
79 unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
80 return sprintf(buf, "%u\n", b);
81}
82
83static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
84{
85 if (!capable(CAP_SYS_ADMIN))
86 return -EACCES;
87
88 if (simple_strtol(buf, NULL, 0) != 1)
89 return -EINVAL;
90
91 gfs2_lm_withdraw(sdp,
92 "GFS2: fsid=%s: withdrawing from cluster at user's request\n",
93 sdp->sd_fsname);
94 return len;
95}
96
97static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
98 size_t len)
99{
100 if (!capable(CAP_SYS_ADMIN))
101 return -EACCES;
102
103 if (simple_strtol(buf, NULL, 0) != 1)
104 return -EINVAL;
105
106 gfs2_statfs_sync(sdp);
107 return len;
108}
109
110static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
111{
112 if (!capable(CAP_SYS_ADMIN))
113 return -EACCES;
114
115 if (simple_strtol(buf, NULL, 0) != 1)
116 return -EINVAL;
117
118 gfs2_gl_hash_clear(sdp, NO_WAIT);
119 return len;
120}
121
122static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
123 size_t len)
124{
125 if (!capable(CAP_SYS_ADMIN))
126 return -EACCES;
127
128 if (simple_strtol(buf, NULL, 0) != 1)
129 return -EINVAL;
130
131 gfs2_quota_sync(sdp);
132 return len;
133}
134
135static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
136 size_t len)
137{
138 uint32_t id;
139
140 if (!capable(CAP_SYS_ADMIN))
141 return -EACCES;
142
143 id = simple_strtoul(buf, NULL, 0);
144
145 gfs2_quota_refresh(sdp, 1, id);
146 return len;
147}
148
149static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
150 size_t len)
151{
152 uint32_t id;
153
154 if (!capable(CAP_SYS_ADMIN))
155 return -EACCES;
156
157 id = simple_strtoul(buf, NULL, 0);
158
159 gfs2_quota_refresh(sdp, 0, id);
160 return len;
161}
162
163struct gfs2_attr {
164 struct attribute attr;
165 ssize_t (*show)(struct gfs2_sbd *, char *);
166 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
167};
168
169#define GFS2_ATTR(name, mode, show, store) \
170static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
171
172GFS2_ATTR(id, 0444, id_show, NULL);
173GFS2_ATTR(fsname, 0444, fsname_show, NULL);
174GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
175GFS2_ATTR(shrink, 0200, NULL, shrink_store);
176GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
177GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
178GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
179GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
180GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
181
182static struct attribute *gfs2_attrs[] = {
183 &gfs2_attr_id.attr,
184 &gfs2_attr_fsname.attr,
185 &gfs2_attr_freeze.attr,
186 &gfs2_attr_shrink.attr,
187 &gfs2_attr_withdraw.attr,
188 &gfs2_attr_statfs_sync.attr,
189 &gfs2_attr_quota_sync.attr,
190 &gfs2_attr_quota_refresh_user.attr,
191 &gfs2_attr_quota_refresh_group.attr,
192 NULL,
193};
194
195static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
196 char *buf)
197{
198 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
199 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
200 return a->show ? a->show(sdp, buf) : 0;
201}
202
203static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
204 const char *buf, size_t len)
205{
206 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
207 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
208 return a->store ? a->store(sdp, buf, len) : len;
209}
210
211static struct sysfs_ops gfs2_attr_ops = {
212 .show = gfs2_attr_show,
213 .store = gfs2_attr_store,
214};
215
216static struct kobj_type gfs2_ktype = {
217 .default_attrs = gfs2_attrs,
218 .sysfs_ops = &gfs2_attr_ops,
219};
220
221static struct kset gfs2_kset = {
222 .subsys = &fs_subsys,
223 .kobj = {.name = "gfs2",},
224 .ktype = &gfs2_ktype,
225};
226
227/*
228 * display struct lm_lockstruct fields
229 */
230
231struct lockstruct_attr {
232 struct attribute attr;
233 ssize_t (*show)(struct gfs2_sbd *, char *);
234};
235
236#define LOCKSTRUCT_ATTR(name, fmt) \
237static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
238{ \
239 return sprintf(buf, fmt, sdp->sd_lockstruct.ls_##name); \
240} \
241static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
242
243LOCKSTRUCT_ATTR(jid, "%u\n");
244LOCKSTRUCT_ATTR(first, "%u\n");
245LOCKSTRUCT_ATTR(lvb_size, "%u\n");
246LOCKSTRUCT_ATTR(flags, "%d\n");
247
248static struct attribute *lockstruct_attrs[] = {
249 &lockstruct_attr_jid.attr,
250 &lockstruct_attr_first.attr,
251 &lockstruct_attr_lvb_size.attr,
252 &lockstruct_attr_flags.attr,
253 NULL
254};
255
256/*
257 * display struct gfs2_args fields
258 */
259
260struct args_attr {
261 struct attribute attr;
262 ssize_t (*show)(struct gfs2_sbd *, char *);
263};
264
265#define ARGS_ATTR(name, fmt) \
266static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
267{ \
268 return sprintf(buf, fmt, sdp->sd_args.ar_##name); \
269} \
270static struct args_attr args_attr_##name = __ATTR_RO(name)
271
272ARGS_ATTR(lockproto, "%s\n");
273ARGS_ATTR(locktable, "%s\n");
274ARGS_ATTR(hostdata, "%s\n");
275ARGS_ATTR(spectator, "%d\n");
276ARGS_ATTR(ignore_local_fs, "%d\n");
277ARGS_ATTR(localcaching, "%d\n");
278ARGS_ATTR(localflocks, "%d\n");
279ARGS_ATTR(debug, "%d\n");
280ARGS_ATTR(upgrade, "%d\n");
281ARGS_ATTR(num_glockd, "%u\n");
282ARGS_ATTR(posix_acl, "%d\n");
283ARGS_ATTR(quota, "%u\n");
284ARGS_ATTR(suiddir, "%d\n");
285ARGS_ATTR(data, "%d\n");
286
287/* one oddball doesn't fit the macro mold */
288static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
289{
290 return sprintf(buf, "%d\n", !!test_bit(SDF_NOATIME, &sdp->sd_flags));
291}
292static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
293
294static struct attribute *args_attrs[] = {
295 &args_attr_lockproto.attr,
296 &args_attr_locktable.attr,
297 &args_attr_hostdata.attr,
298 &args_attr_spectator.attr,
299 &args_attr_ignore_local_fs.attr,
300 &args_attr_localcaching.attr,
301 &args_attr_localflocks.attr,
302 &args_attr_debug.attr,
303 &args_attr_upgrade.attr,
304 &args_attr_num_glockd.attr,
305 &args_attr_posix_acl.attr,
306 &args_attr_quota.attr,
307 &args_attr_suiddir.attr,
308 &args_attr_data.attr,
309 &args_attr_noatime.attr,
310 NULL
311};
312
313/*
314 * display counters from superblock
315 */
316
317struct counters_attr {
318 struct attribute attr;
319 ssize_t (*show)(struct gfs2_sbd *, char *);
320};
321
322#define COUNTERS_ATTR_GENERAL(name, fmt, val) \
323static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
324{ \
325 return sprintf(buf, fmt, val); \
326} \
327static struct counters_attr counters_attr_##name = __ATTR_RO(name)
328
329#define COUNTERS_ATTR_SIMPLE(name, fmt) \
330 COUNTERS_ATTR_GENERAL(name, fmt, sdp->sd_##name)
331
332#define COUNTERS_ATTR_ATOMIC(name, fmt) \
333 COUNTERS_ATTR_GENERAL(name, fmt, (unsigned int)atomic_read(&sdp->sd_##name))
334
335COUNTERS_ATTR_ATOMIC(glock_count, "%u\n");
336COUNTERS_ATTR_ATOMIC(glock_held_count, "%u\n");
337COUNTERS_ATTR_ATOMIC(inode_count, "%u\n");
338COUNTERS_ATTR_ATOMIC(bufdata_count, "%u\n");
339COUNTERS_ATTR_ATOMIC(unlinked_count, "%u\n");
340COUNTERS_ATTR_ATOMIC(quota_count, "%u\n");
341COUNTERS_ATTR_SIMPLE(log_num_gl, "%u\n");
342COUNTERS_ATTR_SIMPLE(log_num_buf, "%u\n");
343COUNTERS_ATTR_SIMPLE(log_num_revoke, "%u\n");
344COUNTERS_ATTR_SIMPLE(log_num_rg, "%u\n");
345COUNTERS_ATTR_SIMPLE(log_num_databuf, "%u\n");
346COUNTERS_ATTR_SIMPLE(log_blks_free, "%u\n");
347COUNTERS_ATTR_GENERAL(jd_blocks, "%u\n", sdp->sd_jdesc->jd_blocks);
348COUNTERS_ATTR_ATOMIC(reclaim_count, "%u\n");
349COUNTERS_ATTR_SIMPLE(log_wraps, "%llu\n");
350COUNTERS_ATTR_ATOMIC(fh2dentry_misses, "%u\n");
351COUNTERS_ATTR_ATOMIC(reclaimed, "%u\n");
352COUNTERS_ATTR_ATOMIC(log_flush_incore, "%u\n");
353COUNTERS_ATTR_ATOMIC(log_flush_ondisk, "%u\n");
354COUNTERS_ATTR_ATOMIC(glock_nq_calls, "%u\n");
355COUNTERS_ATTR_ATOMIC(glock_dq_calls, "%u\n");
356COUNTERS_ATTR_ATOMIC(glock_prefetch_calls, "%u\n");
357COUNTERS_ATTR_ATOMIC(lm_lock_calls, "%u\n");
358COUNTERS_ATTR_ATOMIC(lm_unlock_calls, "%u\n");
359COUNTERS_ATTR_ATOMIC(lm_callbacks, "%u\n");
360COUNTERS_ATTR_ATOMIC(ops_address, "%u\n");
361COUNTERS_ATTR_ATOMIC(ops_dentry, "%u\n");
362COUNTERS_ATTR_ATOMIC(ops_export, "%u\n");
363COUNTERS_ATTR_ATOMIC(ops_file, "%u\n");
364COUNTERS_ATTR_ATOMIC(ops_inode, "%u\n");
365COUNTERS_ATTR_ATOMIC(ops_super, "%u\n");
366COUNTERS_ATTR_ATOMIC(ops_vm, "%u\n");
367
368static struct attribute *counters_attrs[] = {
369 &counters_attr_glock_count.attr,
370 &counters_attr_glock_held_count.attr,
371 &counters_attr_inode_count.attr,
372 &counters_attr_bufdata_count.attr,
373 &counters_attr_unlinked_count.attr,
374 &counters_attr_quota_count.attr,
375 &counters_attr_log_num_gl.attr,
376 &counters_attr_log_num_buf.attr,
377 &counters_attr_log_num_revoke.attr,
378 &counters_attr_log_num_rg.attr,
379 &counters_attr_log_num_databuf.attr,
380 &counters_attr_log_blks_free.attr,
381 &counters_attr_jd_blocks.attr,
382 &counters_attr_reclaim_count.attr,
383 &counters_attr_log_wraps.attr,
384 &counters_attr_fh2dentry_misses.attr,
385 &counters_attr_reclaimed.attr,
386 &counters_attr_log_flush_incore.attr,
387 &counters_attr_log_flush_ondisk.attr,
388 &counters_attr_glock_nq_calls.attr,
389 &counters_attr_glock_dq_calls.attr,
390 &counters_attr_glock_prefetch_calls.attr,
391 &counters_attr_lm_lock_calls.attr,
392 &counters_attr_lm_unlock_calls.attr,
393 &counters_attr_lm_callbacks.attr,
394 &counters_attr_ops_address.attr,
395 &counters_attr_ops_dentry.attr,
396 &counters_attr_ops_export.attr,
397 &counters_attr_ops_file.attr,
398 &counters_attr_ops_inode.attr,
399 &counters_attr_ops_super.attr,
400 &counters_attr_ops_vm.attr,
401 NULL
402};
403
404/*
405 * get and set struct gfs2_tune fields
406 */
407
408static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
409{
410 return sprintf(buf, "%u %u\n", sdp->sd_tune.gt_quota_scale_num,
411 sdp->sd_tune.gt_quota_scale_den);
412}
413
414static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
415 size_t len)
416{
417 struct gfs2_tune *gt = &sdp->sd_tune;
418 unsigned int x, y;
419
420 if (!capable(CAP_SYS_ADMIN))
421 return -EACCES;
422
423 if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
424 return -EINVAL;
425
426 spin_lock(&gt->gt_spin);
427 gt->gt_quota_scale_num = x;
428 gt->gt_quota_scale_den = y;
429 spin_unlock(&gt->gt_spin);
430 return len;
431}
432
433static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
434 int check_zero, const char *buf, size_t len)
435{
436 struct gfs2_tune *gt = &sdp->sd_tune;
437 unsigned int x;
438
439 if (!capable(CAP_SYS_ADMIN))
440 return -EACCES;
441
442 x = simple_strtoul(buf, NULL, 0);
443
444 if (check_zero && !x)
445 return -EINVAL;
446
447 spin_lock(&gt->gt_spin);
448 *field = x;
449 spin_unlock(&gt->gt_spin);
450 return len;
451}
452
453struct tune_attr {
454 struct attribute attr;
455 ssize_t (*show)(struct gfs2_sbd *, char *);
456 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
457};
458
459#define TUNE_ATTR_3(name, show, store) \
460static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
461
462#define TUNE_ATTR_2(name, store) \
463static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
464{ \
465 return sprintf(buf, "%u\n", sdp->sd_tune.gt_##name); \
466} \
467TUNE_ATTR_3(name, name##_show, store)
468
469#define TUNE_ATTR(name, check_zero) \
470static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
471{ \
472 return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
473} \
474TUNE_ATTR_2(name, name##_store)
475
476#define TUNE_ATTR_DAEMON(name, process) \
477static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
478{ \
479 ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \
480 wake_up_process(sdp->sd_##process); \
481 return r; \
482} \
483TUNE_ATTR_2(name, name##_store)
484
485TUNE_ATTR(ilimit, 0);
486TUNE_ATTR(ilimit_tries, 0);
487TUNE_ATTR(ilimit_min, 0);
488TUNE_ATTR(demote_secs, 0);
489TUNE_ATTR(incore_log_blocks, 0);
490TUNE_ATTR(log_flush_secs, 0);
491TUNE_ATTR(jindex_refresh_secs, 0);
492TUNE_ATTR(quota_warn_period, 0);
493TUNE_ATTR(quota_quantum, 0);
494TUNE_ATTR(atime_quantum, 0);
495TUNE_ATTR(max_readahead, 0);
496TUNE_ATTR(complain_secs, 0);
497TUNE_ATTR(reclaim_limit, 0);
498TUNE_ATTR(prefetch_secs, 0);
499TUNE_ATTR(statfs_slow, 0);
500TUNE_ATTR(new_files_jdata, 0);
501TUNE_ATTR(new_files_directio, 0);
502TUNE_ATTR(quota_simul_sync, 1);
503TUNE_ATTR(quota_cache_secs, 1);
504TUNE_ATTR(max_atomic_write, 1);
505TUNE_ATTR(stall_secs, 1);
506TUNE_ATTR(entries_per_readdir, 1);
507TUNE_ATTR(greedy_default, 1);
508TUNE_ATTR(greedy_quantum, 1);
509TUNE_ATTR(greedy_max, 1);
510TUNE_ATTR(statfs_quantum, 1);
511TUNE_ATTR_DAEMON(scand_secs, scand_process);
512TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
513TUNE_ATTR_DAEMON(logd_secs, logd_process);
514TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
515TUNE_ATTR_DAEMON(inoded_secs, inoded_process);
516TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
517
518static struct attribute *tune_attrs[] = {
519 &tune_attr_ilimit.attr,
520 &tune_attr_ilimit_tries.attr,
521 &tune_attr_ilimit_min.attr,
522 &tune_attr_demote_secs.attr,
523 &tune_attr_incore_log_blocks.attr,
524 &tune_attr_log_flush_secs.attr,
525 &tune_attr_jindex_refresh_secs.attr,
526 &tune_attr_quota_warn_period.attr,
527 &tune_attr_quota_quantum.attr,
528 &tune_attr_atime_quantum.attr,
529 &tune_attr_max_readahead.attr,
530 &tune_attr_complain_secs.attr,
531 &tune_attr_reclaim_limit.attr,
532 &tune_attr_prefetch_secs.attr,
533 &tune_attr_statfs_slow.attr,
534 &tune_attr_quota_simul_sync.attr,
535 &tune_attr_quota_cache_secs.attr,
536 &tune_attr_max_atomic_write.attr,
537 &tune_attr_stall_secs.attr,
538 &tune_attr_entries_per_readdir.attr,
539 &tune_attr_greedy_default.attr,
540 &tune_attr_greedy_quantum.attr,
541 &tune_attr_greedy_max.attr,
542 &tune_attr_statfs_quantum.attr,
543 &tune_attr_scand_secs.attr,
544 &tune_attr_recoverd_secs.attr,
545 &tune_attr_logd_secs.attr,
546 &tune_attr_quotad_secs.attr,
547 &tune_attr_inoded_secs.attr,
548 &tune_attr_quota_scale.attr,
549 &tune_attr_new_files_jdata.attr,
550 &tune_attr_new_files_directio.attr,
551 NULL
552};
553
554static struct attribute_group lockstruct_group = {
555 .name = "lockstruct",
556 .attrs = lockstruct_attrs
557};
558
559static struct attribute_group counters_group = {
560 .name = "counters",
561 .attrs = counters_attrs
562};
563
564static struct attribute_group args_group = {
565 .name = "args",
566 .attrs = args_attrs
567};
568
569static struct attribute_group tune_group = {
570 .name = "tune",
571 .attrs = tune_attrs
572};
573
574int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
575{
576 int error;
577
578 sdp->sd_kobj.kset = &gfs2_kset;
579 sdp->sd_kobj.ktype = &gfs2_ktype;
580
581 error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name);
582 if (error)
583 goto fail;
584
585 error = kobject_register(&sdp->sd_kobj);
586 if (error)
587 goto fail;
588
589 error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
590 if (error)
591 goto fail_reg;
592
593 error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
594 if (error)
595 goto fail_lockstruct;
596
597 error = sysfs_create_group(&sdp->sd_kobj, &args_group);
598 if (error)
599 goto fail_counters;
600
601 error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
602 if (error)
603 goto fail_args;
604
605 return 0;
606
607 fail_args:
608 sysfs_remove_group(&sdp->sd_kobj, &args_group);
609 fail_counters:
610 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
611 fail_lockstruct:
612 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
613 fail_reg:
614 kobject_unregister(&sdp->sd_kobj);
615 fail:
616 return error;
617}
618
619void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
620{
621 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
622 sysfs_remove_group(&sdp->sd_kobj, &args_group);
623 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
624 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
625 kobject_unregister(&sdp->sd_kobj);
626}
627
628int gfs2_sys_init(void)
629{
630 gfs2_sys_margs = NULL;
631 spin_lock_init(&gfs2_sys_margs_lock);
632 return kset_register(&gfs2_kset);
633}
634
635void gfs2_sys_uninit(void)
636{
637 kfree(gfs2_sys_margs);
638 kset_unregister(&gfs2_kset);
639}
640
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
new file mode 100644
index 000000000000..62c8ed89ab9c
--- /dev/null
+++ b/fs/gfs2/sys.h
@@ -0,0 +1,24 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __SYS_DOT_H__
11#define __SYS_DOT_H__
12
13/* Allow args to be passed to GFS2 when using an initial ram disk */
14extern char *gfs2_sys_margs;
15extern spinlock_t gfs2_sys_margs_lock;
16
17int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
18void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
19
20int gfs2_sys_init(void);
21void gfs2_sys_uninit(void);
22
23#endif /* __SYS_DOT_H__ */
24
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
new file mode 100644
index 000000000000..afa5408c0008
--- /dev/null
+++ b/fs/gfs2/trans.c
@@ -0,0 +1,214 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "glock.h"
19#include "log.h"
20#include "lops.h"
21#include "meta_io.h"
22#include "trans.h"
23
24int gfs2_trans_begin_i(struct gfs2_sbd *sdp, unsigned int blocks,
25 unsigned int revokes, char *file, unsigned int line)
26{
27 struct gfs2_trans *tr;
28 int error;
29
30 if (gfs2_assert_warn(sdp, !get_transaction) ||
31 gfs2_assert_warn(sdp, blocks || revokes)) {
32 fs_warn(sdp, "(%s, %u)\n", file, line);
33 return -EINVAL;
34 }
35
36 tr = kzalloc(sizeof(struct gfs2_trans), GFP_KERNEL);
37 if (!tr)
38 return -ENOMEM;
39
40 tr->tr_file = file;
41 tr->tr_line = line;
42 tr->tr_blocks = blocks;
43 tr->tr_revokes = revokes;
44 tr->tr_reserved = 1;
45 if (blocks)
46 tr->tr_reserved += 1 + blocks;
47 if (revokes)
48 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
49 sizeof(uint64_t));
50 INIT_LIST_HEAD(&tr->tr_list_buf);
51
52 error = -ENOMEM;
53 tr->tr_t_gh = gfs2_holder_get(sdp->sd_trans_gl, LM_ST_SHARED,
54 GL_NEVER_RECURSE, GFP_KERNEL);
55 if (!tr->tr_t_gh)
56 goto fail;
57
58 error = gfs2_glock_nq(tr->tr_t_gh);
59 if (error)
60 goto fail_holder_put;
61
62 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
63 tr->tr_t_gh->gh_flags |= GL_NOCACHE;
64 error = -EROFS;
65 goto fail_gunlock;
66 }
67
68 error = gfs2_log_reserve(sdp, tr->tr_reserved);
69 if (error)
70 goto fail_gunlock;
71
72 set_transaction(tr);
73
74 return 0;
75
76 fail_gunlock:
77 gfs2_glock_dq(tr->tr_t_gh);
78
79 fail_holder_put:
80 gfs2_holder_put(tr->tr_t_gh);
81
82 fail:
83 kfree(tr);
84
85 return error;
86}
87
88void gfs2_trans_end(struct gfs2_sbd *sdp)
89{
90 struct gfs2_trans *tr;
91 struct gfs2_holder *t_gh;
92
93 tr = get_transaction;
94 set_transaction(NULL);
95
96 if (gfs2_assert_warn(sdp, tr))
97 return;
98
99 t_gh = tr->tr_t_gh;
100 tr->tr_t_gh = NULL;
101
102 if (!tr->tr_touched) {
103 gfs2_log_release(sdp, tr->tr_reserved);
104 kfree(tr);
105
106 gfs2_glock_dq(t_gh);
107 gfs2_holder_put(t_gh);
108
109 return;
110 }
111
112 if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks))
113 fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u "
114 "tr_file = %s, tr_line = %u\n",
115 tr->tr_num_buf, tr->tr_blocks,
116 tr->tr_file, tr->tr_line);
117 if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes))
118 fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u "
119 "tr_file = %s, tr_line = %u\n",
120 tr->tr_num_revoke, tr->tr_revokes,
121 tr->tr_file, tr->tr_line);
122
123 gfs2_log_commit(sdp, tr);
124
125 gfs2_glock_dq(t_gh);
126 gfs2_holder_put(t_gh);
127
128 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
129 gfs2_log_flush(sdp);
130}
131
132void gfs2_trans_add_gl(struct gfs2_glock *gl)
133{
134 lops_add(gl->gl_sbd, &gl->gl_le);
135}
136
137/**
138 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
139 * @gl: the glock the buffer belongs to
140 * @bh: The buffer to add
141 *
142 */
143
144void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh)
145{
146 struct gfs2_sbd *sdp = gl->gl_sbd;
147 struct gfs2_bufdata *bd;
148
149 bd = get_v2bd(bh);
150 if (bd)
151 gfs2_assert(sdp, bd->bd_gl == gl);
152 else {
153 gfs2_meta_attach_bufdata(gl, bh);
154 bd = get_v2bd(bh);
155 }
156
157 lops_add(sdp, &bd->bd_le);
158}
159
160void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
161{
162 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
163 GFP_KERNEL | __GFP_NOFAIL);
164 lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
165 rv->rv_blkno = blkno;
166 lops_add(sdp, &rv->rv_le);
167}
168
169void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno)
170{
171 struct gfs2_revoke *rv;
172 int found = 0;
173
174 gfs2_log_lock(sdp);
175
176 list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
177 if (rv->rv_blkno == blkno) {
178 list_del(&rv->rv_le.le_list);
179 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
180 sdp->sd_log_num_revoke--;
181 found = 1;
182 break;
183 }
184 }
185
186 gfs2_log_unlock(sdp);
187
188 if (found) {
189 kfree(rv);
190 get_transaction->tr_num_revoke_rm++;
191 }
192}
193
194void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
195{
196 lops_add(rgd->rd_sbd, &rgd->rd_le);
197}
198
199void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh)
200{
201 struct gfs2_databuf *db;
202
203 db = get_v2db(bh);
204 if (!db) {
205 db = kmalloc(sizeof(struct gfs2_databuf),
206 GFP_KERNEL | __GFP_NOFAIL);
207 lops_init_le(&db->db_le, &gfs2_databuf_lops);
208 get_bh(bh);
209 db->db_bh = bh;
210 set_v2db(bh, db);
211 lops_add(sdp, &db->db_le);
212 }
213}
214
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
new file mode 100644
index 000000000000..ac615e9e8521
--- /dev/null
+++ b/fs/gfs2/trans.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __TRANS_DOT_H__
11#define __TRANS_DOT_H__
12
13#define RES_DINODE 1
14#define RES_INDIRECT 1
15#define RES_JDATA 1
16#define RES_DATA 1
17#define RES_LEAF 1
18#define RES_RG_BIT 2
19#define RES_EATTR 1
20#define RES_UNLINKED 1
21#define RES_STATFS 1
22#define RES_QUOTA 2
23
24#define gfs2_trans_begin(sdp, blocks, revokes) \
25gfs2_trans_begin_i((sdp), (blocks), (revokes), __FILE__, __LINE__)
26
27int gfs2_trans_begin_i(struct gfs2_sbd *sdp,
28 unsigned int blocks, unsigned int revokes,
29 char *file, unsigned int line);
30
31void gfs2_trans_end(struct gfs2_sbd *sdp);
32
33void gfs2_trans_add_gl(struct gfs2_glock *gl);
34void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh);
35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
38void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh);
39
40#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/unlinked.c b/fs/gfs2/unlinked.c
new file mode 100644
index 000000000000..4a993af58c1a
--- /dev/null
+++ b/fs/gfs2/unlinked.c
@@ -0,0 +1,453 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "bmap.h"
20#include "inode.h"
21#include "meta_io.h"
22#include "trans.h"
23#include "unlinked.h"
24
25static int munge_ondisk(struct gfs2_sbd *sdp, unsigned int slot,
26 struct gfs2_unlinked_tag *ut)
27{
28 struct gfs2_inode *ip = sdp->sd_ut_inode;
29 unsigned int block, offset;
30 uint64_t dblock;
31 int new = 0;
32 struct buffer_head *bh;
33 int error;
34
35 block = slot / sdp->sd_ut_per_block;
36 offset = slot % sdp->sd_ut_per_block;
37
38 error = gfs2_block_map(ip, block, &new, &dblock, NULL);
39 if (error)
40 return error;
41 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
42 if (error)
43 return error;
44 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
45 error = -EIO;
46 goto out;
47 }
48
49 down(&sdp->sd_unlinked_mutex);
50 gfs2_trans_add_bh(ip->i_gl, bh);
51 gfs2_unlinked_tag_out(ut, bh->b_data +
52 sizeof(struct gfs2_meta_header) +
53 offset * sizeof(struct gfs2_unlinked_tag));
54 up(&sdp->sd_unlinked_mutex);
55
56 out:
57 brelse(bh);
58
59 return error;
60}
61
62static void ul_hash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
63{
64 spin_lock(&sdp->sd_unlinked_spin);
65 list_add(&ul->ul_list, &sdp->sd_unlinked_list);
66 gfs2_assert(sdp, ul->ul_count);
67 ul->ul_count++;
68 atomic_inc(&sdp->sd_unlinked_count);
69 spin_unlock(&sdp->sd_unlinked_spin);
70}
71
72static void ul_unhash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
73{
74 spin_lock(&sdp->sd_unlinked_spin);
75 list_del_init(&ul->ul_list);
76 gfs2_assert(sdp, ul->ul_count > 1);
77 ul->ul_count--;
78 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_unlinked_count) > 0);
79 atomic_dec(&sdp->sd_unlinked_count);
80 spin_unlock(&sdp->sd_unlinked_spin);
81}
82
83static struct gfs2_unlinked *ul_fish(struct gfs2_sbd *sdp)
84{
85 struct list_head *head;
86 struct gfs2_unlinked *ul;
87 int found = 0;
88
89 if (sdp->sd_vfs->s_flags & MS_RDONLY)
90 return NULL;
91
92 spin_lock(&sdp->sd_unlinked_spin);
93
94 head = &sdp->sd_unlinked_list;
95
96 list_for_each_entry(ul, head, ul_list) {
97 if (test_bit(ULF_LOCKED, &ul->ul_flags))
98 continue;
99
100 list_move_tail(&ul->ul_list, head);
101 ul->ul_count++;
102 set_bit(ULF_LOCKED, &ul->ul_flags);
103 found = 1;
104
105 break;
106 }
107
108 if (!found)
109 ul = NULL;
110
111 spin_unlock(&sdp->sd_unlinked_spin);
112
113 return ul;
114}
115
116/**
117 * enforce_limit - limit the number of inodes waiting to be deallocated
118 * @sdp: the filesystem
119 *
120 * Returns: errno
121 */
122
123static void enforce_limit(struct gfs2_sbd *sdp)
124{
125 unsigned int tries = 0, min = 0;
126 int error;
127
128 if (atomic_read(&sdp->sd_unlinked_count) >=
129 gfs2_tune_get(sdp, gt_ilimit)) {
130 tries = gfs2_tune_get(sdp, gt_ilimit_tries);
131 min = gfs2_tune_get(sdp, gt_ilimit_min);
132 }
133
134 while (tries--) {
135 struct gfs2_unlinked *ul = ul_fish(sdp);
136 if (!ul)
137 break;
138 error = gfs2_inode_dealloc(sdp, ul);
139 gfs2_unlinked_put(sdp, ul);
140
141 if (!error) {
142 if (!--min)
143 break;
144 } else if (error != 1)
145 break;
146 }
147}
148
149static struct gfs2_unlinked *ul_alloc(struct gfs2_sbd *sdp)
150{
151 struct gfs2_unlinked *ul;
152
153 ul = kzalloc(sizeof(struct gfs2_unlinked), GFP_KERNEL);
154 if (ul) {
155 INIT_LIST_HEAD(&ul->ul_list);
156 ul->ul_count = 1;
157 set_bit(ULF_LOCKED, &ul->ul_flags);
158 }
159
160 return ul;
161}
162
163int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul)
164{
165 unsigned int c, o = 0, b;
166 unsigned char byte = 0;
167
168 enforce_limit(sdp);
169
170 *ul = ul_alloc(sdp);
171 if (!*ul)
172 return -ENOMEM;
173
174 spin_lock(&sdp->sd_unlinked_spin);
175
176 for (c = 0; c < sdp->sd_unlinked_chunks; c++)
177 for (o = 0; o < PAGE_SIZE; o++) {
178 byte = sdp->sd_unlinked_bitmap[c][o];
179 if (byte != 0xFF)
180 goto found;
181 }
182
183 goto fail;
184
185 found:
186 for (b = 0; b < 8; b++)
187 if (!(byte & (1 << b)))
188 break;
189 (*ul)->ul_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
190
191 if ((*ul)->ul_slot >= sdp->sd_unlinked_slots)
192 goto fail;
193
194 sdp->sd_unlinked_bitmap[c][o] |= 1 << b;
195
196 spin_unlock(&sdp->sd_unlinked_spin);
197
198 return 0;
199
200 fail:
201 spin_unlock(&sdp->sd_unlinked_spin);
202 kfree(*ul);
203 return -ENOSPC;
204}
205
206void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
207{
208 gfs2_assert_warn(sdp, test_and_clear_bit(ULF_LOCKED, &ul->ul_flags));
209
210 spin_lock(&sdp->sd_unlinked_spin);
211 gfs2_assert(sdp, ul->ul_count);
212 ul->ul_count--;
213 if (!ul->ul_count) {
214 gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, ul->ul_slot, 0);
215 spin_unlock(&sdp->sd_unlinked_spin);
216 kfree(ul);
217 } else
218 spin_unlock(&sdp->sd_unlinked_spin);
219}
220
221int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
222{
223 int error;
224
225 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
226 gfs2_assert_warn(sdp, list_empty(&ul->ul_list));
227
228 error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
229 if (!error)
230 ul_hash(sdp, ul);
231
232 return error;
233}
234
235int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
236{
237 int error;
238
239 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
240 gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
241
242 error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
243
244 return error;
245}
246
247int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
248{
249 struct gfs2_unlinked_tag ut;
250 int error;
251
252 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
253 gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
254
255 memset(&ut, 0, sizeof(struct gfs2_unlinked_tag));
256
257 error = munge_ondisk(sdp, ul->ul_slot, &ut);
258 if (error)
259 return error;
260
261 ul_unhash(sdp, ul);
262
263 return 0;
264}
265
266/**
267 * gfs2_unlinked_dealloc - Go through the list of inodes to be deallocated
268 * @sdp: the filesystem
269 *
270 * Returns: errno
271 */
272
273int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp)
274{
275 unsigned int hits, strikes;
276 int error;
277
278 for (;;) {
279 hits = 0;
280 strikes = 0;
281
282 for (;;) {
283 struct gfs2_unlinked *ul = ul_fish(sdp);
284 if (!ul)
285 return 0;
286 error = gfs2_inode_dealloc(sdp, ul);
287 gfs2_unlinked_put(sdp, ul);
288
289 if (!error) {
290 hits++;
291 if (strikes)
292 strikes--;
293 } else if (error == 1) {
294 strikes++;
295 if (strikes >=
296 atomic_read(&sdp->sd_unlinked_count)) {
297 error = 0;
298 break;
299 }
300 } else
301 return error;
302 }
303
304 if (!hits || kthread_should_stop())
305 break;
306
307 cond_resched();
308 }
309
310 return 0;
311}
312
313int gfs2_unlinked_init(struct gfs2_sbd *sdp)
314{
315 struct gfs2_inode *ip = sdp->sd_ut_inode;
316 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
317 unsigned int x, slot = 0;
318 unsigned int found = 0;
319 uint64_t dblock;
320 uint32_t extlen = 0;
321 int error;
322
323 if (!ip->i_di.di_size ||
324 ip->i_di.di_size > (64 << 20) ||
325 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
326 gfs2_consist_inode(ip);
327 return -EIO;
328 }
329 sdp->sd_unlinked_slots = blocks * sdp->sd_ut_per_block;
330 sdp->sd_unlinked_chunks = DIV_RU(sdp->sd_unlinked_slots, 8 * PAGE_SIZE);
331
332 error = -ENOMEM;
333
334 sdp->sd_unlinked_bitmap = kcalloc(sdp->sd_unlinked_chunks,
335 sizeof(unsigned char *),
336 GFP_KERNEL);
337 if (!sdp->sd_unlinked_bitmap)
338 return error;
339
340 for (x = 0; x < sdp->sd_unlinked_chunks; x++) {
341 sdp->sd_unlinked_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
342 if (!sdp->sd_unlinked_bitmap[x])
343 goto fail;
344 }
345
346 for (x = 0; x < blocks; x++) {
347 struct buffer_head *bh;
348 unsigned int y;
349
350 if (!extlen) {
351 int new = 0;
352 error = gfs2_block_map(ip, x, &new, &dblock, &extlen);
353 if (error)
354 goto fail;
355 }
356 gfs2_meta_ra(ip->i_gl, dblock, extlen);
357 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
358 &bh);
359 if (error)
360 goto fail;
361 error = -EIO;
362 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
363 brelse(bh);
364 goto fail;
365 }
366
367 for (y = 0;
368 y < sdp->sd_ut_per_block && slot < sdp->sd_unlinked_slots;
369 y++, slot++) {
370 struct gfs2_unlinked_tag ut;
371 struct gfs2_unlinked *ul;
372
373 gfs2_unlinked_tag_in(&ut, bh->b_data +
374 sizeof(struct gfs2_meta_header) +
375 y * sizeof(struct gfs2_unlinked_tag));
376 if (!ut.ut_inum.no_addr)
377 continue;
378
379 error = -ENOMEM;
380 ul = ul_alloc(sdp);
381 if (!ul) {
382 brelse(bh);
383 goto fail;
384 }
385 ul->ul_ut = ut;
386 ul->ul_slot = slot;
387
388 spin_lock(&sdp->sd_unlinked_spin);
389 gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, slot, 1);
390 spin_unlock(&sdp->sd_unlinked_spin);
391 ul_hash(sdp, ul);
392
393 gfs2_unlinked_put(sdp, ul);
394 found++;
395 }
396
397 brelse(bh);
398 dblock++;
399 extlen--;
400 }
401
402 if (found)
403 fs_info(sdp, "found %u unlinked inodes\n", found);
404
405 return 0;
406
407 fail:
408 gfs2_unlinked_cleanup(sdp);
409 return error;
410}
411
412/**
413 * gfs2_unlinked_cleanup - get rid of any extra struct gfs2_unlinked structures
414 * @sdp: the filesystem
415 *
416 */
417
418void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp)
419{
420 struct list_head *head = &sdp->sd_unlinked_list;
421 struct gfs2_unlinked *ul;
422 unsigned int x;
423
424 spin_lock(&sdp->sd_unlinked_spin);
425 while (!list_empty(head)) {
426 ul = list_entry(head->next, struct gfs2_unlinked, ul_list);
427
428 if (ul->ul_count > 1) {
429 list_move_tail(&ul->ul_list, head);
430 spin_unlock(&sdp->sd_unlinked_spin);
431 schedule();
432 spin_lock(&sdp->sd_unlinked_spin);
433 continue;
434 }
435
436 list_del_init(&ul->ul_list);
437 atomic_dec(&sdp->sd_unlinked_count);
438
439 gfs2_assert_warn(sdp, ul->ul_count == 1);
440 gfs2_assert_warn(sdp, !test_bit(ULF_LOCKED, &ul->ul_flags));
441 kfree(ul);
442 }
443 spin_unlock(&sdp->sd_unlinked_spin);
444
445 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_unlinked_count));
446
447 if (sdp->sd_unlinked_bitmap) {
448 for (x = 0; x < sdp->sd_unlinked_chunks; x++)
449 kfree(sdp->sd_unlinked_bitmap[x]);
450 kfree(sdp->sd_unlinked_bitmap);
451 }
452}
453
diff --git a/fs/gfs2/unlinked.h b/fs/gfs2/unlinked.h
new file mode 100644
index 000000000000..51e77f88d74f
--- /dev/null
+++ b/fs/gfs2/unlinked.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __UNLINKED_DOT_H__
11#define __UNLINKED_DOT_H__
12
13int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul);
14void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
15
16int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
17int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
18int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
19
20int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp);
21
22int gfs2_unlinked_init(struct gfs2_sbd *sdp);
23void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp);
24
25#endif /* __UNLINKED_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
new file mode 100644
index 000000000000..74e2c62f2370
--- /dev/null
+++ b/fs/gfs2/util.c
@@ -0,0 +1,273 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <asm/semaphore.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "glock.h"
21#include "lm.h"
22
23kmem_cache_t *gfs2_glock_cachep __read_mostly;
24kmem_cache_t *gfs2_inode_cachep __read_mostly;
25kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
26
27uint32_t gfs2_disk_hash(const char *data, int len)
28{
29 return crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF;
30}
31
32void gfs2_assert_i(struct gfs2_sbd *sdp)
33{
34 printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n",
35 sdp->sd_fsname);
36}
37
38/**
39 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
40 * Returns: -1 if this call withdrew the machine,
41 * -2 if it was already withdrawn
42 */
43
44int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
45 const char *function, char *file, unsigned int line)
46{
47 int me;
48 me = gfs2_lm_withdraw(sdp,
49 "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
50 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
51 sdp->sd_fsname, assertion,
52 sdp->sd_fsname, function, file, line);
53 return (me) ? -1 : -2;
54}
55
56/**
57 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
58 * Returns: -1 if we printed something
59 * -2 if we didn't
60 */
61
62int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
63 const char *function, char *file, unsigned int line)
64{
65 if (time_before(jiffies,
66 sdp->sd_last_warning +
67 gfs2_tune_get(sdp, gt_complain_secs) * HZ))
68 return -2;
69
70 printk(KERN_WARNING
71 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
72 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
73 sdp->sd_fsname, assertion,
74 sdp->sd_fsname, function, file, line);
75
76 if (sdp->sd_args.ar_debug)
77 BUG();
78
79 sdp->sd_last_warning = jiffies;
80
81 return -1;
82}
83
84/**
85 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
86 * Returns: -1 if this call withdrew the machine,
87 * 0 if it was already withdrawn
88 */
89
90int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
91 char *file, unsigned int line)
92{
93 int rv;
94 rv = gfs2_lm_withdraw(sdp,
95 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
96 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
97 sdp->sd_fsname,
98 sdp->sd_fsname, function, file, line);
99 return rv;
100}
101
102/**
103 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
104 * Returns: -1 if this call withdrew the machine,
105 * 0 if it was already withdrawn
106 */
107
108int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
109 const char *function, char *file, unsigned int line)
110{
111 struct gfs2_sbd *sdp = ip->i_sbd;
112 int rv;
113 rv = gfs2_lm_withdraw(sdp,
114 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
115 "GFS2: fsid=%s: inode = %llu %llu\n"
116 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
117 sdp->sd_fsname,
118 sdp->sd_fsname, ip->i_num.no_formal_ino, ip->i_num.no_addr,
119 sdp->sd_fsname, function, file, line);
120 return rv;
121}
122
123/**
124 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
125 * Returns: -1 if this call withdrew the machine,
126 * 0 if it was already withdrawn
127 */
128
129int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
130 const char *function, char *file, unsigned int line)
131{
132 struct gfs2_sbd *sdp = rgd->rd_sbd;
133 int rv;
134 rv = gfs2_lm_withdraw(sdp,
135 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
136 "GFS2: fsid=%s: RG = %llu\n"
137 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
138 sdp->sd_fsname,
139 sdp->sd_fsname, rgd->rd_ri.ri_addr,
140 sdp->sd_fsname, function, file, line);
141 return rv;
142}
143
144/**
145 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
146 * Returns: -1 if this call withdrew the machine,
147 * -2 if it was already withdrawn
148 */
149
150int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
151 const char *type, const char *function, char *file,
152 unsigned int line)
153{
154 int me;
155 me = gfs2_lm_withdraw(sdp,
156 "GFS2: fsid=%s: fatal: invalid metadata block\n"
157 "GFS2: fsid=%s: bh = %llu (%s)\n"
158 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
159 sdp->sd_fsname,
160 sdp->sd_fsname, (uint64_t)bh->b_blocknr, type,
161 sdp->sd_fsname, function, file, line);
162 return (me) ? -1 : -2;
163}
164
165/**
166 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
167 * Returns: -1 if this call withdrew the machine,
168 * -2 if it was already withdrawn
169 */
170
171int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
172 uint16_t type, uint16_t t, const char *function,
173 char *file, unsigned int line)
174{
175 int me;
176 me = gfs2_lm_withdraw(sdp,
177 "GFS2: fsid=%s: fatal: invalid metadata block\n"
178 "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n"
179 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
180 sdp->sd_fsname,
181 sdp->sd_fsname, (uint64_t)bh->b_blocknr, type, t,
182 sdp->sd_fsname, function, file, line);
183 return (me) ? -1 : -2;
184}
185
186/**
187 * gfs2_io_error_i - Flag an I/O error and withdraw
188 * Returns: -1 if this call withdrew the machine,
189 * 0 if it was already withdrawn
190 */
191
192int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
193 unsigned int line)
194{
195 int rv;
196 rv = gfs2_lm_withdraw(sdp,
197 "GFS2: fsid=%s: fatal: I/O error\n"
198 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
199 sdp->sd_fsname,
200 sdp->sd_fsname, function, file, line);
201 return rv;
202}
203
204/**
205 * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
206 * Returns: -1 if this call withdrew the machine,
207 * 0 if it was already withdrawn
208 */
209
210int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
211 const char *function, char *file, unsigned int line)
212{
213 int rv;
214 rv = gfs2_lm_withdraw(sdp,
215 "GFS2: fsid=%s: fatal: I/O error\n"
216 "GFS2: fsid=%s: block = %llu\n"
217 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
218 sdp->sd_fsname,
219 sdp->sd_fsname, (uint64_t)bh->b_blocknr,
220 sdp->sd_fsname, function, file, line);
221 return rv;
222}
223
224/**
225 * gfs2_add_bh_to_ub - copy a buffer up to user space
226 * @ub: the structure representing where to copy
227 * @bh: the buffer
228 *
229 * Returns: errno
230 */
231
232int gfs2_add_bh_to_ub(struct gfs2_user_buffer *ub, struct buffer_head *bh)
233{
234 uint64_t blkno = bh->b_blocknr;
235
236 if (ub->ub_count + sizeof(uint64_t) + bh->b_size > ub->ub_size)
237 return -ENOMEM;
238
239 if (copy_to_user(ub->ub_data + ub->ub_count,
240 &blkno,
241 sizeof(uint64_t)))
242 return -EFAULT;
243 ub->ub_count += sizeof(uint64_t);
244
245 if (copy_to_user(ub->ub_data + ub->ub_count,
246 bh->b_data,
247 bh->b_size))
248 return -EFAULT;
249 ub->ub_count += bh->b_size;
250
251 return 0;
252}
253
254void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
255 unsigned int bit, int new_value)
256{
257 unsigned int c, o, b = bit;
258 int old_value;
259
260 c = b / (8 * PAGE_SIZE);
261 b %= 8 * PAGE_SIZE;
262 o = b / 8;
263 b %= 8;
264
265 old_value = (bitmap[c][o] & (1 << b));
266 gfs2_assert_withdraw(sdp, !old_value != !new_value);
267
268 if (new_value)
269 bitmap[c][o] |= 1 << b;
270 else
271 bitmap[c][o] &= ~(1 << b);
272}
273
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
new file mode 100644
index 000000000000..21466fe9bf43
--- /dev/null
+++ b/fs/gfs2/util.h
@@ -0,0 +1,180 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__
12
13uint32_t gfs2_disk_hash(const char *data, int len);
14
15
16#define fs_printk(level, fs, fmt, arg...) \
17 printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg)
18
19#define fs_info(fs, fmt, arg...) \
20 fs_printk(KERN_INFO , fs , fmt , ## arg)
21
22#define fs_warn(fs, fmt, arg...) \
23 fs_printk(KERN_WARNING , fs , fmt , ## arg)
24
25#define fs_err(fs, fmt, arg...) \
26 fs_printk(KERN_ERR, fs , fmt , ## arg)
27
28
29void gfs2_assert_i(struct gfs2_sbd *sdp);
30
31#define gfs2_assert(sdp, assertion) \
32do { \
33 if (unlikely(!(assertion))) { \
34 gfs2_assert_i(sdp); \
35 BUG(); \
36 } \
37} while (0)
38
39
40int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
41 const char *function, char *file, unsigned int line);
42
43#define gfs2_assert_withdraw(sdp, assertion) \
44((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
45 __FUNCTION__, __FILE__, __LINE__))
46
47
48int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
49 const char *function, char *file, unsigned int line);
50
51#define gfs2_assert_warn(sdp, assertion) \
52((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
53 __FUNCTION__, __FILE__, __LINE__))
54
55
56int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
57 const char *function, char *file, unsigned int line);
58
59#define gfs2_consist(sdp) \
60gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
61
62
63int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
64 const char *function, char *file, unsigned int line);
65
66#define gfs2_consist_inode(ip) \
67gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
68
69
70int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
71 const char *function, char *file, unsigned int line);
72
73#define gfs2_consist_rgrpd(rgd) \
74gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
75
76
77int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
78 const char *type, const char *function,
79 char *file, unsigned int line);
80
81static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
82 struct buffer_head *bh,
83 const char *function,
84 char *file, unsigned int line)
85{
86 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
87 uint32_t magic = mh->mh_magic;
88 magic = be32_to_cpu(magic);
89 if (unlikely(magic != GFS2_MAGIC))
90 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
91 file, line);
92 return 0;
93}
94
95#define gfs2_meta_check(sdp, bh) \
96gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
97
98
99int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
100 uint16_t type, uint16_t t,
101 const char *function,
102 char *file, unsigned int line);
103
104static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
105 struct buffer_head *bh,
106 uint16_t type,
107 const char *function,
108 char *file, unsigned int line)
109{
110 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
111 uint32_t magic = mh->mh_magic;
112 uint16_t t = mh->mh_type;
113 magic = be32_to_cpu(magic);
114 if (unlikely(magic != GFS2_MAGIC))
115 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
116 file, line);
117 t = be16_to_cpu(t);
118 if (unlikely(t != type))
119 return gfs2_metatype_check_ii(sdp, bh, type, t, function,
120 file, line);
121 return 0;
122}
123
124#define gfs2_metatype_check(sdp, bh, type) \
125gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
126
127static inline void gfs2_metatype_set(struct buffer_head *bh, uint16_t type,
128 uint16_t format)
129{
130 struct gfs2_meta_header *mh;
131 mh = (struct gfs2_meta_header *)bh->b_data;
132 mh->mh_type = cpu_to_be16(type);
133 mh->mh_format = cpu_to_be16(format);
134}
135
136
137int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
138 char *file, unsigned int line);
139
140#define gfs2_io_error(sdp) \
141gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
142
143
144int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
145 const char *function, char *file, unsigned int line);
146
147#define gfs2_io_error_bh(sdp, bh) \
148gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
149
150
151extern kmem_cache_t *gfs2_glock_cachep;
152extern kmem_cache_t *gfs2_inode_cachep;
153extern kmem_cache_t *gfs2_bufdata_cachep;
154
155struct gfs2_user_buffer {
156 char __user *ub_data;
157 unsigned int ub_size;
158 unsigned int ub_count;
159};
160
161int gfs2_add_bh_to_ub(struct gfs2_user_buffer *ub, struct buffer_head *bh);
162
163static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
164 unsigned int *p)
165{
166 unsigned int x;
167 spin_lock(&gt->gt_spin);
168 x = *p;
169 spin_unlock(&gt->gt_spin);
170 return x;
171}
172
173#define gfs2_tune_get(sdp, field) \
174gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
175
176void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
177 unsigned int bit, int new_value);
178
179#endif /* __UTIL_DOT_H__ */
180
diff --git a/include/linux/gfs2_ioctl.h b/include/linux/gfs2_ioctl.h
new file mode 100644
index 000000000000..dde9840b1c30
--- /dev/null
+++ b/include/linux/gfs2_ioctl.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GFS2_IOCTL_DOT_H__
11#define __GFS2_IOCTL_DOT_H__
12
13#define _GFS2C_(x) (('G' << 16) | ('2' << 8) | (x))
14
15/* Ioctls implemented */
16
17#define GFS2_IOCTL_IDENTIFY _GFS2C_(1)
18#define GFS2_IOCTL_SUPER _GFS2C_(2)
19#define GFS2_IOCTL_SETFLAGS _GFS2C_(3)
20#define GFS2_IOCTL_GETFLAGS _GFS2C_(4)
21
22struct gfs2_ioctl {
23 unsigned int gi_argc;
24 const char **gi_argv;
25
26 char __user *gi_data;
27 unsigned int gi_size;
28 uint64_t gi_offset;
29};
30
31#endif /* ___GFS2_IOCTL_DOT_H__ */
32
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
new file mode 100644
index 000000000000..213d664d495d
--- /dev/null
+++ b/include/linux/gfs2_ondisk.h
@@ -0,0 +1,454 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GFS2_ONDISK_DOT_H__
11#define __GFS2_ONDISK_DOT_H__
12
13#define GFS2_MAGIC 0x01161970
14#define GFS2_BASIC_BLOCK 512
15#define GFS2_BASIC_BLOCK_SHIFT 9
16
17/* Lock numbers of the LM_TYPE_NONDISK type */
18
19#define GFS2_MOUNT_LOCK 0
20#define GFS2_LIVE_LOCK 1
21#define GFS2_TRANS_LOCK 2
22#define GFS2_RENAME_LOCK 3
23
24/* Format numbers for various metadata types */
25
26#define GFS2_FORMAT_NONE 0
27#define GFS2_FORMAT_SB 100
28#define GFS2_FORMAT_RG 200
29#define GFS2_FORMAT_RB 300
30#define GFS2_FORMAT_DI 400
31#define GFS2_FORMAT_IN 500
32#define GFS2_FORMAT_LF 600
33#define GFS2_FORMAT_JD 700
34#define GFS2_FORMAT_LH 800
35#define GFS2_FORMAT_LD 900
36#define GFS2_FORMAT_LB 1000
37#define GFS2_FORMAT_EA 1100
38#define GFS2_FORMAT_ED 1200
39#define GFS2_FORMAT_UT 1300
40#define GFS2_FORMAT_QC 1400
41/* These are format numbers for entities contained in files */
42#define GFS2_FORMAT_RI 1500
43#define GFS2_FORMAT_DE 1600
44#define GFS2_FORMAT_QU 1700
45/* These are part of the superblock */
46#define GFS2_FORMAT_FS 1801
47#define GFS2_FORMAT_MULTI 1900
48
49/*
50 * An on-disk inode number
51 */
52
53#define gfs2_inum_equal(ino1, ino2) \
54 (((ino1)->no_formal_ino == (ino2)->no_formal_ino) && \
55 ((ino1)->no_addr == (ino2)->no_addr))
56
57struct gfs2_inum {
58 __be64 no_formal_ino;
59 __be64 no_addr;
60};
61
62/*
63 * Generic metadata head structure
64 * Every inplace buffer logged in the journal must start with this.
65 */
66
67#define GFS2_METATYPE_NONE 0
68#define GFS2_METATYPE_SB 1
69#define GFS2_METATYPE_RG 2
70#define GFS2_METATYPE_RB 3
71#define GFS2_METATYPE_DI 4
72#define GFS2_METATYPE_IN 5
73#define GFS2_METATYPE_LF 6
74#define GFS2_METATYPE_JD 7
75#define GFS2_METATYPE_LH 8
76#define GFS2_METATYPE_LD 9
77#define GFS2_METATYPE_LB 10
78#define GFS2_METATYPE_EA 11
79#define GFS2_METATYPE_ED 12
80#define GFS2_METATYPE_UT 13
81#define GFS2_METATYPE_QC 14
82
83struct gfs2_meta_header {
84 __be32 mh_magic;
85 __be32 mh_type;
86 __be64 __pad0; /* Was generation number in gfs1 */
87 __be32 mh_format;
88 __be32 __pad1; /* Was incarnation number in gfs1 */
89};
90
91/*
92 * super-block structure
93 *
94 * It's probably good if SIZEOF_SB <= GFS2_BASIC_BLOCK (512 bytes)
95 *
96 * Order is important, need to be able to read old superblocks to do on-disk
97 * version upgrades.
98 */
99
100/* Address of superblock in GFS2 basic blocks */
101#define GFS2_SB_ADDR 128
102
103/* The lock number for the superblock (must be zero) */
104#define GFS2_SB_LOCK 0
105
106/* Requirement: GFS2_LOCKNAME_LEN % 8 == 0
107 Includes: the fencing zero at the end */
108#define GFS2_LOCKNAME_LEN 64
109
110struct gfs2_sb {
111 struct gfs2_meta_header sb_header;
112
113 __be32 sb_fs_format;
114 __be32 sb_multihost_format;
115 __u32 __pad0; /* Was superblock flags in gfs1 */
116
117 __be32 sb_bsize;
118 __be32 sb_bsize_shift;
119 __u32 __pad1; /* Was journal segment size in gfs1 */
120
121 struct gfs2_inum sb_master_dir; /* Was jindex dinode in gfs1 */
122 struct gfs2_inum __pad2; /* Was rindex dinode in gfs1 */
123 struct gfs2_inum sb_root_dir;
124
125 char sb_lockproto[GFS2_LOCKNAME_LEN];
126 char sb_locktable[GFS2_LOCKNAME_LEN];
127 /* In gfs1, quota and license dinodes followed */
128};
129
130/*
131 * resource index structure
132 */
133
134struct gfs2_rindex {
135 __be64 ri_addr; /* grp block disk address */
136 __be32 ri_length; /* length of rgrp header in fs blocks */
137 __u32 __pad;
138
139 __be64 ri_data0; /* first data location */
140 __be32 ri_data; /* num of data blocks in rgrp */
141
142 __be32 ri_bitbytes; /* number of bytes in data bitmaps */
143
144 __u8 ri_reserved[64];
145};
146
147/*
148 * resource group header structure
149 */
150
151/* Number of blocks per byte in rgrp */
152#define GFS2_NBBY 4
153#define GFS2_BIT_SIZE 2
154#define GFS2_BIT_MASK 0x00000003
155
156#define GFS2_BLKST_FREE 0
157#define GFS2_BLKST_USED 1
158#define GFS2_BLKST_INVALID 2
159#define GFS2_BLKST_DINODE 3
160
161#define GFS2_RGF_JOURNAL 0x00000001
162#define GFS2_RGF_METAONLY 0x00000002
163#define GFS2_RGF_DATAONLY 0x00000004
164#define GFS2_RGF_NOALLOC 0x00000008
165
166struct gfs2_rgrp {
167 struct gfs2_meta_header rg_header;
168
169 __be32 rg_flags;
170 __be32 rg_free;
171 __be32 rg_dinodes;
172
173 __u8 rg_reserved[92]; /* Several fields from gfs1 now reserved */
174};
175
176/*
177 * quota structure
178 */
179
180struct gfs2_quota {
181 __be64 qu_limit;
182 __be64 qu_warn;
183 __be64 qu_value;
184};
185
186/*
187 * dinode structure
188 */
189
190#define GFS2_MAX_META_HEIGHT 10
191#define GFS2_DIR_MAX_DEPTH 17
192
193#define DT2IF(dt) (((dt) << 12) & S_IFMT)
194#define IF2DT(sif) (((sif) & S_IFMT) >> 12)
195
196/* Dinode flags */
197#define GFS2_DIF_JDATA 0x00000001
198#define GFS2_DIF_EXHASH 0x00000002
199#define GFS2_DIF_UNUSED 0x00000004 /* only in gfs1 */
200#define GFS2_DIF_EA_INDIRECT 0x00000008
201#define GFS2_DIF_DIRECTIO 0x00000010
202#define GFS2_DIF_IMMUTABLE 0x00000020
203#define GFS2_DIF_APPENDONLY 0x00000040
204#define GFS2_DIF_NOATIME 0x00000080
205#define GFS2_DIF_SYNC 0x00000100
206#define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */
207#define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */
208#define GFS2_DIF_INHERIT_DIRECTIO 0x40000000
209#define GFS2_DIF_INHERIT_JDATA 0x80000000
210
211struct gfs2_dinode {
212 struct gfs2_meta_header di_header;
213
214 struct gfs2_inum di_num;
215
216 __be32 di_mode; /* mode of file */
217 __be32 di_uid; /* owner's user id */
218 __be32 di_gid; /* owner's group id */
219 __be32 di_nlink; /* number of links to this file */
220 __be64 di_size; /* number of bytes in file */
221 __be64 di_blocks; /* number of blocks in file */
222 __be64 di_atime; /* time last accessed */
223 __be64 di_mtime; /* time last modified */
224 __be64 di_ctime; /* time last changed */
225 __be32 di_major; /* device major number */
226 __be32 di_minor; /* device minor number */
227
228 /* This section varies from gfs1. Padding added to align with
229 * remainder of dinode
230 */
231 __be64 di_goal_meta; /* rgrp to alloc from next */
232 __be64 di_goal_data; /* data block goal */
233 __u32 __pad[2];
234
235 __be32 di_flags; /* GFS2_DIF_... */
236 __be32 di_payload_format; /* GFS2_FORMAT_... */
237 __u16 __pad1; /* Was ditype in gfs1 */
238 __be16 di_height; /* height of metadata */
239 __u32 __pad2; /* Unused incarnation number from gfs1 */
240
241 /* These only apply to directories */
242 __u16 __pad3; /* Padding */
243 __be16 di_depth; /* Number of bits in the table */
244 __be32 di_entries; /* The number of entries in the directory */
245
246 struct gfs2_inum __pad4; /* Unused even in current gfs1 */
247
248 __be64 di_eattr; /* extended attribute block number */
249
250 __u8 di_reserved[56];
251};
252
253/*
254 * directory structure - many of these per directory file
255 */
256
257#define GFS2_FNAMESIZE 255
258#define GFS2_DIRENT_SIZE(name_len) ((sizeof(struct gfs2_dirent) + (name_len) + 7) & ~7)
259
260struct gfs2_dirent {
261 struct gfs2_inum de_inum;
262 __be32 de_hash;
263 __be32 de_rec_len;
264 __u8 de_name_len;
265 __u8 de_type;
266 __u16 __pad1;
267 __u32 __pad2;
268};
269
270/*
271 * Header of leaf directory nodes
272 */
273
274struct gfs2_leaf {
275 struct gfs2_meta_header lf_header;
276
277 __be16 lf_depth; /* Depth of leaf */
278 __be16 lf_entries; /* Number of dirents in leaf */
279 __be32 lf_dirent_format; /* Format of the dirents */
280 __be64 lf_next; /* Next leaf, if overflow */
281
282 __u8 lf_reserved[32];
283};
284
285/*
286 * Extended attribute header format
287 */
288
289#define GFS2_EA_MAX_NAME_LEN 255
290#define GFS2_EA_MAX_DATA_LEN 65536
291
292#define GFS2_EATYPE_UNUSED 0
293#define GFS2_EATYPE_USR 1
294#define GFS2_EATYPE_SYS 2
295
296#define GFS2_EATYPE_LAST 2
297#define GFS2_EATYPE_VALID(x) ((x) <= GFS2_EATYPE_LAST)
298
299#define GFS2_EAFLAG_LAST 0x01 /* last ea in block */
300
301struct gfs2_ea_header {
302 __be32 ea_rec_len;
303 __be32 ea_data_len;
304 __u8 ea_name_len; /* no NULL pointer after the string */
305 __u8 ea_type; /* GFS2_EATYPE_... */
306 __u8 ea_flags; /* GFS2_EAFLAG_... */
307 __u8 ea_num_ptrs;
308 __u32 __pad;
309};
310
311/*
312 * Log header structure
313 */
314
315#define GFS2_LOG_HEAD_UNMOUNT 0x00000001 /* log is clean */
316
317struct gfs2_log_header {
318 struct gfs2_meta_header lh_header;
319
320 __be64 lh_sequence; /* Sequence number of this transaction */
321 __be32 lh_flags; /* GFS2_LOG_HEAD_... */
322 __be32 lh_tail; /* Block number of log tail */
323 __be32 lh_blkno;
324 __be32 lh_hash;
325};
326
327/*
328 * Log type descriptor
329 */
330
331#define GFS2_LOG_DESC_METADATA 300
332/* ld_data1 is the number of metadata blocks in the descriptor.
333 ld_data2 is unused. */
334
335#define GFS2_LOG_DESC_REVOKE 301
336/* ld_data1 is the number of revoke blocks in the descriptor.
337 ld_data2 is unused. */
338
339struct gfs2_log_descriptor {
340 struct gfs2_meta_header ld_header;
341
342 __be32 ld_type; /* GFS2_LOG_DESC_... */
343 __be32 ld_length; /* Number of buffers in this chunk */
344 __be32 ld_data1; /* descriptor-specific field */
345 __be32 ld_data2; /* descriptor-specific field */
346
347 __u8 ld_reserved[32];
348};
349
350/*
351 * Inum Range
352 * Describe a range of formal inode numbers allocated to
353 * one machine to assign to inodes.
354 */
355
356#define GFS2_INUM_QUANTUM 1048576
357
358struct gfs2_inum_range {
359 __be64 ir_start;
360 __be64 ir_length;
361};
362
363/*
364 * Statfs change
365 * Describes an change to the pool of free and allocated
366 * blocks.
367 */
368
369struct gfs2_statfs_change {
370 __be64 sc_total;
371 __be64 sc_free;
372 __be64 sc_dinodes;
373};
374
375/*
376 * Unlinked Tag
377 * Describes an allocated inode that isn't linked into
378 * the directory tree and might need to be deallocated.
379 */
380
381#define GFS2_UTF_UNINIT 0x00000001
382
383struct gfs2_unlinked_tag {
384 struct gfs2_inum ut_inum;
385 __be32 ut_flags; /* GFS2_UTF_... */
386 __u32 __pad;
387};
388
389/*
390 * Quota change
391 * Describes an allocation change for a particular
392 * user or group.
393 */
394
395#define GFS2_QCF_USER 0x00000001
396
397struct gfs2_quota_change {
398 __be64 qc_change;
399 __be32 qc_flags; /* GFS2_QCF_... */
400 __be32 qc_id;
401};
402
403/* Translation functions */
404
405extern void gfs2_inum_in(struct gfs2_inum *no, char *buf);
406extern void gfs2_inum_out(struct gfs2_inum *no, char *buf);
407extern void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf);
408extern void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf);
409extern void gfs2_sb_in(struct gfs2_sb *sb, char *buf);
410extern void gfs2_sb_out(struct gfs2_sb *sb, char *buf);
411extern void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf);
412extern void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf);
413extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf);
414extern void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf);
415extern void gfs2_quota_in(struct gfs2_quota *qu, char *buf);
416extern void gfs2_quota_out(struct gfs2_quota *qu, char *buf);
417extern void gfs2_dinode_in(struct gfs2_dinode *di, char *buf);
418extern void gfs2_dinode_out(struct gfs2_dinode *di, char *buf);
419extern void gfs2_dirent_in(struct gfs2_dirent *de, char *buf);
420extern void gfs2_dirent_out(struct gfs2_dirent *de, char *buf);
421extern void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf);
422extern void gfs2_leaf_out(struct gfs2_leaf *lf, char *buf);
423extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf);
424extern void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf);
425extern void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf);
426extern void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf);
427extern void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf);
428extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf);
429extern void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf);
430extern void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf);
431extern void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf);
432extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf);
433extern void gfs2_quota_change_out(struct gfs2_quota_change *qc, char *buf);
434
435/* Printing functions */
436
437extern void gfs2_inum_print(struct gfs2_inum *no);
438extern void gfs2_meta_header_print(struct gfs2_meta_header *mh);
439extern void gfs2_sb_print(struct gfs2_sb *sb);
440extern void gfs2_rindex_print(struct gfs2_rindex *ri);
441extern void gfs2_rgrp_print(struct gfs2_rgrp *rg);
442extern void gfs2_quota_print(struct gfs2_quota *qu);
443extern void gfs2_dinode_print(struct gfs2_dinode *di);
444extern void gfs2_dirent_print(struct gfs2_dirent *de, char *name);
445extern void gfs2_leaf_print(struct gfs2_leaf *lf);
446extern void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name);
447extern void gfs2_log_header_print(struct gfs2_log_header *lh);
448extern void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld);
449extern void gfs2_inum_range_print(struct gfs2_inum_range *ir);
450extern void gfs2_statfs_change_print(struct gfs2_statfs_change *sc);
451extern void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut);
452extern void gfs2_quota_change_print(struct gfs2_quota_change *qc);
453
454#endif /* __GFS2_ONDISK_DOT_H__ */