aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2')
-rw-r--r--fs/gfs2/Kconfig44
-rw-r--r--fs/gfs2/Makefile10
-rw-r--r--fs/gfs2/acl.c309
-rw-r--r--fs/gfs2/acl.h39
-rw-r--r--fs/gfs2/bmap.c1220
-rw-r--r--fs/gfs2/bmap.h31
-rw-r--r--fs/gfs2/daemon.c196
-rw-r--r--fs/gfs2/daemon.h19
-rw-r--r--fs/gfs2/dir.c1961
-rw-r--r--fs/gfs2/dir.h79
-rw-r--r--fs/gfs2/eaops.c230
-rw-r--r--fs/gfs2/eaops.h30
-rw-r--r--fs/gfs2/eattr.c1501
-rw-r--r--fs/gfs2/eattr.h100
-rw-r--r--fs/gfs2/gfs2.h31
-rw-r--r--fs/gfs2/glock.c2231
-rw-r--r--fs/gfs2/glock.h153
-rw-r--r--fs/gfs2/glops.c564
-rw-r--r--fs/gfs2/glops.h25
-rw-r--r--fs/gfs2/incore.h634
-rw-r--r--fs/gfs2/inode.c1338
-rw-r--r--fs/gfs2/inode.h56
-rw-r--r--fs/gfs2/lm.c217
-rw-r--r--fs/gfs2/lm.h42
-rw-r--r--fs/gfs2/locking.c184
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c524
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h187
-rw-r--r--fs/gfs2/locking/dlm/main.c64
-rw-r--r--fs/gfs2/locking/dlm/mount.c255
-rw-r--r--fs/gfs2/locking/dlm/plock.c301
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c226
-rw-r--r--fs/gfs2/locking/dlm/thread.c359
-rw-r--r--fs/gfs2/locking/nolock/Makefile3
-rw-r--r--fs/gfs2/locking/nolock/main.c246
-rw-r--r--fs/gfs2/log.c578
-rw-r--r--fs/gfs2/log.h65
-rw-r--r--fs/gfs2/lops.c809
-rw-r--r--fs/gfs2/lops.h99
-rw-r--r--fs/gfs2/main.c150
-rw-r--r--fs/gfs2/meta_io.c753
-rw-r--r--fs/gfs2/meta_io.h78
-rw-r--r--fs/gfs2/mount.c214
-rw-r--r--fs/gfs2/mount.h17
-rw-r--r--fs/gfs2/ondisk.c308
-rw-r--r--fs/gfs2/ops_address.c782
-rw-r--r--fs/gfs2/ops_address.h22
-rw-r--r--fs/gfs2/ops_dentry.c119
-rw-r--r--fs/gfs2/ops_dentry.h17
-rw-r--r--fs/gfs2/ops_export.c298
-rw-r--r--fs/gfs2/ops_export.h22
-rw-r--r--fs/gfs2/ops_file.c642
-rw-r--r--fs/gfs2/ops_file.h24
-rw-r--r--fs/gfs2/ops_fstype.c943
-rw-r--r--fs/gfs2/ops_fstype.h18
-rw-r--r--fs/gfs2/ops_inode.c1151
-rw-r--r--fs/gfs2/ops_inode.h20
-rw-r--r--fs/gfs2/ops_super.c468
-rw-r--r--fs/gfs2/ops_super.h17
-rw-r--r--fs/gfs2/ops_vm.c184
-rw-r--r--fs/gfs2/ops_vm.h18
-rw-r--r--fs/gfs2/quota.c1226
-rw-r--r--fs/gfs2/quota.h35
-rw-r--r--fs/gfs2/recovery.c570
-rw-r--r--fs/gfs2/recovery.h34
-rw-r--r--fs/gfs2/rgrp.c1513
-rw-r--r--fs/gfs2/rgrp.h69
-rw-r--r--fs/gfs2/super.c976
-rw-r--r--fs/gfs2/super.h54
-rw-r--r--fs/gfs2/sys.c583
-rw-r--r--fs/gfs2/sys.h27
-rw-r--r--fs/gfs2/trans.c184
-rw-r--r--fs/gfs2/trans.h39
-rw-r--r--fs/gfs2/util.c245
-rw-r--r--fs/gfs2/util.h170
75 files changed, 26953 insertions, 0 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
new file mode 100644
index 000000000000..8c27de8b9568
--- /dev/null
+++ b/fs/gfs2/Kconfig
@@ -0,0 +1,44 @@
1config GFS2_FS
2 tristate "GFS2 file system support"
3 depends on EXPERIMENTAL
4 select FS_POSIX_ACL
5 help
6 A cluster filesystem.
7
8 Allows a cluster of computers to simultaneously use a block device
9 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
10 and writes to the block device like a local filesystem, but also uses
11 a lock module to allow the computers coordinate their I/O so
12 filesystem consistency is maintained. One of the nifty features of
13 GFS is perfect consistency -- changes made to the filesystem on one
14 machine show up immediately on all other machines in the cluster.
15
16 To use the GFS2 filesystem, you will need to enable one or more of
17 the below locking modules. Documentation and utilities for GFS2 can
18 be found here: http://sources.redhat.com/cluster
19
20config GFS2_FS_LOCKING_NOLOCK
21 tristate "GFS2 \"nolock\" locking module"
22 depends on GFS2_FS
23 help
24 Single node locking module for GFS2.
25
26 Use this module if you want to use GFS2 on a single node without
27 its clustering features. You can still take advantage of the
28 large file support, and upgrade to running a full cluster later on
29 if required.
30
31 If you will only be using GFS2 in cluster mode, you do not need this
32 module.
33
34config GFS2_FS_LOCKING_DLM
35 tristate "GFS2 DLM locking module"
36 depends on GFS2_FS
37 select DLM
38 help
39 Multiple node locking module for GFS2
40
41 Most users of GFS2 will require this module. It provides the locking
42 interface between GFS2 and the DLM, which is required to use GFS2
43 in a cluster environment.
44
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
new file mode 100644
index 000000000000..e3f1ada643ac
--- /dev/null
+++ b/fs/gfs2/Makefile
@@ -0,0 +1,10 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
3 glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
4 mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
5 ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o util.o
7
8obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
9obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
10
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
new file mode 100644
index 000000000000..5f959b8ce406
--- /dev/null
+++ b/fs/gfs2/acl.c
@@ -0,0 +1,309 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "glock.h"
26#include "inode.h"
27#include "meta_io.h"
28#include "trans.h"
29#include "util.h"
30
31#define ACL_ACCESS 1
32#define ACL_DEFAULT 0
33
34int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
35 struct gfs2_ea_request *er,
36 int *remove, mode_t *mode)
37{
38 struct posix_acl *acl;
39 int error;
40
41 error = gfs2_acl_validate_remove(ip, access);
42 if (error)
43 return error;
44
45 if (!er->er_data)
46 return -EINVAL;
47
48 acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
49 if (IS_ERR(acl))
50 return PTR_ERR(acl);
51 if (!acl) {
52 *remove = 1;
53 return 0;
54 }
55
56 error = posix_acl_valid(acl);
57 if (error)
58 goto out;
59
60 if (access) {
61 error = posix_acl_equiv_mode(acl, mode);
62 if (!error)
63 *remove = 1;
64 else if (error > 0)
65 error = 0;
66 }
67
68out:
69 posix_acl_release(acl);
70 return error;
71}
72
73int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
74{
75 if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl)
76 return -EOPNOTSUPP;
77 if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
78 return -EPERM;
79 if (S_ISLNK(ip->i_di.di_mode))
80 return -EOPNOTSUPP;
81 if (!access && !S_ISDIR(ip->i_di.di_mode))
82 return -EACCES;
83
84 return 0;
85}
86
87static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
88 struct gfs2_ea_location *el, char **data, unsigned int *len)
89{
90 struct gfs2_ea_request er;
91 struct gfs2_ea_location el_this;
92 int error;
93
94 if (!ip->i_di.di_eattr)
95 return 0;
96
97 memset(&er, 0, sizeof(struct gfs2_ea_request));
98 if (access) {
99 er.er_name = GFS2_POSIX_ACL_ACCESS;
100 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
101 } else {
102 er.er_name = GFS2_POSIX_ACL_DEFAULT;
103 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
104 }
105 er.er_type = GFS2_EATYPE_SYS;
106
107 if (!el)
108 el = &el_this;
109
110 error = gfs2_ea_find(ip, &er, el);
111 if (error)
112 return error;
113 if (!el->el_ea)
114 return 0;
115 if (!GFS2_EA_DATA_LEN(el->el_ea))
116 goto out;
117
118 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
119 er.er_data = kmalloc(er.er_data_len, GFP_KERNEL);
120 error = -ENOMEM;
121 if (!er.er_data)
122 goto out;
123
124 error = gfs2_ea_get_copy(ip, el, er.er_data);
125 if (error)
126 goto out_kfree;
127
128 if (acl) {
129 *acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
130 if (IS_ERR(*acl))
131 error = PTR_ERR(*acl);
132 }
133
134out_kfree:
135 if (error || !data)
136 kfree(er.er_data);
137 else {
138 *data = er.er_data;
139 *len = er.er_data_len;
140 }
141out:
142 if (error || el == &el_this)
143 brelse(el->el_bh);
144 return error;
145}
146
147/**
148 * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
149 * @inode: the file we want to do something to
150 * @mask: what we want to do
151 *
152 * Returns: errno
153 */
154
155int gfs2_check_acl_locked(struct inode *inode, int mask)
156{
157 struct posix_acl *acl = NULL;
158 int error;
159
160 error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL);
161 if (error)
162 return error;
163
164 if (acl) {
165 error = posix_acl_permission(inode, acl, mask);
166 posix_acl_release(acl);
167 return error;
168 }
169
170 return -EAGAIN;
171}
172
173int gfs2_check_acl(struct inode *inode, int mask)
174{
175 struct gfs2_inode *ip = GFS2_I(inode);
176 struct gfs2_holder i_gh;
177 int error;
178
179 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
180 if (!error) {
181 error = gfs2_check_acl_locked(inode, mask);
182 gfs2_glock_dq_uninit(&i_gh);
183 }
184
185 return error;
186}
187
188static int munge_mode(struct gfs2_inode *ip, mode_t mode)
189{
190 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
191 struct buffer_head *dibh;
192 int error;
193
194 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
195 if (error)
196 return error;
197
198 error = gfs2_meta_inode_buffer(ip, &dibh);
199 if (!error) {
200 gfs2_assert_withdraw(sdp,
201 (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
202 ip->i_di.di_mode = mode;
203 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
204 gfs2_dinode_out(&ip->i_di, dibh->b_data);
205 brelse(dibh);
206 }
207
208 gfs2_trans_end(sdp);
209
210 return 0;
211}
212
213int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
214{
215 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
216 struct posix_acl *acl = NULL, *clone;
217 struct gfs2_ea_request er;
218 mode_t mode = ip->i_di.di_mode;
219 int error;
220
221 if (!sdp->sd_args.ar_posix_acl)
222 return 0;
223 if (S_ISLNK(ip->i_di.di_mode))
224 return 0;
225
226 memset(&er, 0, sizeof(struct gfs2_ea_request));
227 er.er_type = GFS2_EATYPE_SYS;
228
229 error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
230 &er.er_data, &er.er_data_len);
231 if (error)
232 return error;
233 if (!acl) {
234 mode &= ~current->fs->umask;
235 if (mode != ip->i_di.di_mode)
236 error = munge_mode(ip, mode);
237 return error;
238 }
239
240 clone = posix_acl_clone(acl, GFP_KERNEL);
241 error = -ENOMEM;
242 if (!clone)
243 goto out;
244 posix_acl_release(acl);
245 acl = clone;
246
247 if (S_ISDIR(ip->i_di.di_mode)) {
248 er.er_name = GFS2_POSIX_ACL_DEFAULT;
249 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
250 error = gfs2_system_eaops.eo_set(ip, &er);
251 if (error)
252 goto out;
253 }
254
255 error = posix_acl_create_masq(acl, &mode);
256 if (error < 0)
257 goto out;
258 if (error > 0) {
259 er.er_name = GFS2_POSIX_ACL_ACCESS;
260 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
261 posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
262 er.er_mode = mode;
263 er.er_flags = GFS2_ERF_MODE;
264 error = gfs2_system_eaops.eo_set(ip, &er);
265 if (error)
266 goto out;
267 } else
268 munge_mode(ip, mode);
269
270out:
271 posix_acl_release(acl);
272 kfree(er.er_data);
273 return error;
274}
275
276int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
277{
278 struct posix_acl *acl = NULL, *clone;
279 struct gfs2_ea_location el;
280 char *data;
281 unsigned int len;
282 int error;
283
284 error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
285 if (error)
286 return error;
287 if (!acl)
288 return gfs2_setattr_simple(ip, attr);
289
290 clone = posix_acl_clone(acl, GFP_KERNEL);
291 error = -ENOMEM;
292 if (!clone)
293 goto out;
294 posix_acl_release(acl);
295 acl = clone;
296
297 error = posix_acl_chmod_masq(acl, attr->ia_mode);
298 if (!error) {
299 posix_acl_to_xattr(acl, data, len);
300 error = gfs2_ea_acl_chmod(ip, &el, attr, data);
301 }
302
303out:
304 posix_acl_release(acl);
305 brelse(el.el_bh);
306 kfree(data);
307 return error;
308}
309
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
new file mode 100644
index 000000000000..05c294fe0d78
--- /dev/null
+++ b/fs/gfs2/acl.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __ACL_DOT_H__
11#define __ACL_DOT_H__
12
13#include "incore.h"
14
15#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
16#define GFS2_POSIX_ACL_ACCESS_LEN 16
17#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
18#define GFS2_POSIX_ACL_DEFAULT_LEN 17
19
20#define GFS2_ACL_IS_ACCESS(name, len) \
21 ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \
22 !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len)))
23
24#define GFS2_ACL_IS_DEFAULT(name, len) \
25 ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
26 !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
27
28struct gfs2_ea_request;
29
30int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
31 struct gfs2_ea_request *er,
32 int *remove, mode_t *mode);
33int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
34int gfs2_check_acl_locked(struct inode *inode, int mask);
35int gfs2_check_acl(struct inode *inode, int mask);
36int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
37int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
38
39#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
new file mode 100644
index 000000000000..92eef825167d
--- /dev/null
+++ b/fs/gfs2/bmap.c
@@ -0,0 +1,1220 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "inode.h"
24#include "meta_io.h"
25#include "quota.h"
26#include "rgrp.h"
27#include "trans.h"
28#include "dir.h"
29#include "util.h"
30#include "ops_address.h"
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
37 __u16 mp_list[GFS2_MAX_META_HEIGHT];
38};
39
40typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
41 struct buffer_head *bh, u64 *top,
42 u64 *bottom, unsigned int height,
43 void *data);
44
45struct strip_mine {
46 int sm_first;
47 unsigned int sm_height;
48};
49
50/**
51 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
52 * @ip: the inode
53 * @dibh: the dinode buffer
54 * @block: the block number that was allocated
55 * @private: any locked page held by the caller process
56 *
57 * Returns: errno
58 */
59
60static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
61 u64 block, struct page *page)
62{
63 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
64 struct inode *inode = &ip->i_inode;
65 struct buffer_head *bh;
66 int release = 0;
67
68 if (!page || page->index) {
69 page = grab_cache_page(inode->i_mapping, 0);
70 if (!page)
71 return -ENOMEM;
72 release = 1;
73 }
74
75 if (!PageUptodate(page)) {
76 void *kaddr = kmap(page);
77
78 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
79 ip->i_di.di_size);
80 memset(kaddr + ip->i_di.di_size, 0,
81 PAGE_CACHE_SIZE - ip->i_di.di_size);
82 kunmap(page);
83
84 SetPageUptodate(page);
85 }
86
87 if (!page_has_buffers(page))
88 create_empty_buffers(page, 1 << inode->i_blkbits,
89 (1 << BH_Uptodate));
90
91 bh = page_buffers(page);
92
93 if (!buffer_mapped(bh))
94 map_bh(bh, inode->i_sb, block);
95
96 set_buffer_uptodate(bh);
97 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
98 gfs2_trans_add_bh(ip->i_gl, bh, 0);
99 mark_buffer_dirty(bh);
100
101 if (release) {
102 unlock_page(page);
103 page_cache_release(page);
104 }
105
106 return 0;
107}
108
109/**
110 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
111 * @ip: The GFS2 inode to unstuff
112 * @unstuffer: the routine that handles unstuffing a non-zero length file
113 * @private: private data for the unstuffer
114 *
115 * This routine unstuffs a dinode and returns it to a "normal" state such
116 * that the height can be grown in the traditional way.
117 *
118 * Returns: errno
119 */
120
121int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
122{
123 struct buffer_head *bh, *dibh;
124 u64 block = 0;
125 int isdir = gfs2_is_dir(ip);
126 int error;
127
128 down_write(&ip->i_rw_mutex);
129
130 error = gfs2_meta_inode_buffer(ip, &dibh);
131 if (error)
132 goto out;
133
134 if (ip->i_di.di_size) {
135 /* Get a free block, fill it with the stuffed data,
136 and write it out to disk */
137
138 if (isdir) {
139 block = gfs2_alloc_meta(ip);
140
141 error = gfs2_dir_get_new_buffer(ip, block, &bh);
142 if (error)
143 goto out_brelse;
144 gfs2_buffer_copy_tail(bh,
145 sizeof(struct gfs2_meta_header),
146 dibh, sizeof(struct gfs2_dinode));
147 brelse(bh);
148 } else {
149 block = gfs2_alloc_data(ip);
150
151 error = gfs2_unstuffer_page(ip, dibh, block, page);
152 if (error)
153 goto out_brelse;
154 }
155 }
156
157 /* Set up the pointer to the new block */
158
159 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
160
161 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
162
163 if (ip->i_di.di_size) {
164 *(u64 *)(dibh->b_data + sizeof(struct gfs2_dinode)) =
165 cpu_to_be64(block);
166 ip->i_di.di_blocks++;
167 }
168
169 ip->i_di.di_height = 1;
170
171 gfs2_dinode_out(&ip->i_di, dibh->b_data);
172
173out_brelse:
174 brelse(dibh);
175out:
176 up_write(&ip->i_rw_mutex);
177 return error;
178}
179
180/**
181 * calc_tree_height - Calculate the height of a metadata tree
182 * @ip: The GFS2 inode
183 * @size: The proposed size of the file
184 *
185 * Work out how tall a metadata tree needs to be in order to accommodate a
186 * file of a particular size. If size is less than the current size of
187 * the inode, then the current size of the inode is used instead of the
188 * supplied one.
189 *
190 * Returns: the height the tree should be
191 */
192
193static unsigned int calc_tree_height(struct gfs2_inode *ip, u64 size)
194{
195 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
196 u64 *arr;
197 unsigned int max, height;
198
199 if (ip->i_di.di_size > size)
200 size = ip->i_di.di_size;
201
202 if (gfs2_is_dir(ip)) {
203 arr = sdp->sd_jheightsize;
204 max = sdp->sd_max_jheight;
205 } else {
206 arr = sdp->sd_heightsize;
207 max = sdp->sd_max_height;
208 }
209
210 for (height = 0; height < max; height++)
211 if (arr[height] >= size)
212 break;
213
214 return height;
215}
216
217/**
218 * build_height - Build a metadata tree of the requested height
219 * @ip: The GFS2 inode
220 * @height: The height to build to
221 *
222 *
223 * Returns: errno
224 */
225
226static int build_height(struct inode *inode, unsigned height)
227{
228 struct gfs2_inode *ip = GFS2_I(inode);
229 unsigned new_height = height - ip->i_di.di_height;
230 struct buffer_head *dibh;
231 struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
232 int error;
233 u64 *bp;
234 u64 bn;
235 unsigned n;
236
237 if (height <= ip->i_di.di_height)
238 return 0;
239
240 error = gfs2_meta_inode_buffer(ip, &dibh);
241 if (error)
242 return error;
243
244 for(n = 0; n < new_height; n++) {
245 bn = gfs2_alloc_meta(ip);
246 blocks[n] = gfs2_meta_new(ip->i_gl, bn);
247 gfs2_trans_add_bh(ip->i_gl, blocks[n], 1);
248 }
249
250 n = 0;
251 bn = blocks[0]->b_blocknr;
252 if (new_height > 1) {
253 for(; n < new_height-1; n++) {
254 gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN,
255 GFS2_FORMAT_IN);
256 gfs2_buffer_clear_tail(blocks[n],
257 sizeof(struct gfs2_meta_header));
258 bp = (u64 *)(blocks[n]->b_data +
259 sizeof(struct gfs2_meta_header));
260 *bp = cpu_to_be64(blocks[n+1]->b_blocknr);
261 brelse(blocks[n]);
262 blocks[n] = NULL;
263 }
264 }
265 gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
266 gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header),
267 dibh, sizeof(struct gfs2_dinode));
268 brelse(blocks[n]);
269 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
270 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
271 bp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
272 *bp = cpu_to_be64(bn);
273 ip->i_di.di_height += new_height;
274 ip->i_di.di_blocks += new_height;
275 gfs2_dinode_out(&ip->i_di, dibh->b_data);
276 brelse(dibh);
277 return error;
278}
279
280/**
281 * find_metapath - Find path through the metadata tree
282 * @ip: The inode pointer
283 * @mp: The metapath to return the result in
284 * @block: The disk block to look up
285 *
286 * This routine returns a struct metapath structure that defines a path
287 * through the metadata of inode "ip" to get to block "block".
288 *
289 * Example:
290 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
291 * filesystem with a blocksize of 4096.
292 *
293 * find_metapath() would return a struct metapath structure set to:
294 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
295 * and mp_list[2] = 165.
296 *
297 * That means that in order to get to the block containing the byte at
298 * offset 101342453, we would load the indirect block pointed to by pointer
299 * 0 in the dinode. We would then load the indirect block pointed to by
300 * pointer 48 in that indirect block. We would then load the data block
301 * pointed to by pointer 165 in that indirect block.
302 *
303 * ----------------------------------------
304 * | Dinode | |
305 * | | 4|
306 * | |0 1 2 3 4 5 9|
307 * | | 6|
308 * ----------------------------------------
309 * |
310 * |
311 * V
312 * ----------------------------------------
313 * | Indirect Block |
314 * | 5|
315 * | 4 4 4 4 4 5 5 1|
316 * |0 5 6 7 8 9 0 1 2|
317 * ----------------------------------------
318 * |
319 * |
320 * V
321 * ----------------------------------------
322 * | Indirect Block |
323 * | 1 1 1 1 1 5|
324 * | 6 6 6 6 6 1|
325 * |0 3 4 5 6 7 2|
326 * ----------------------------------------
327 * |
328 * |
329 * V
330 * ----------------------------------------
331 * | Data block containing offset |
332 * | 101342453 |
333 * | |
334 * | |
335 * ----------------------------------------
336 *
337 */
338
339static void find_metapath(struct gfs2_inode *ip, u64 block,
340 struct metapath *mp)
341{
342 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
343 u64 b = block;
344 unsigned int i;
345
346 for (i = ip->i_di.di_height; i--;)
347 mp->mp_list[i] = do_div(b, sdp->sd_inptrs);
348
349}
350
351/**
352 * metapointer - Return pointer to start of metadata in a buffer
353 * @bh: The buffer
354 * @height: The metadata height (0 = dinode)
355 * @mp: The metapath
356 *
357 * Return a pointer to the block number of the next height of the metadata
358 * tree given a buffer containing the pointer to the current height of the
359 * metadata tree.
360 */
361
362static inline u64 *metapointer(struct buffer_head *bh, int *boundary,
363 unsigned int height, const struct metapath *mp)
364{
365 unsigned int head_size = (height > 0) ?
366 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
367 u64 *ptr;
368 *boundary = 0;
369 ptr = ((u64 *)(bh->b_data + head_size)) + mp->mp_list[height];
370 if (ptr + 1 == (u64 *)(bh->b_data + bh->b_size))
371 *boundary = 1;
372 return ptr;
373}
374
375/**
376 * lookup_block - Get the next metadata block in metadata tree
377 * @ip: The GFS2 inode
378 * @bh: Buffer containing the pointers to metadata blocks
379 * @height: The height of the tree (0 = dinode)
380 * @mp: The metapath
381 * @create: Non-zero if we may create a new meatdata block
382 * @new: Used to indicate if we did create a new metadata block
383 * @block: the returned disk block number
384 *
385 * Given a metatree, complete to a particular height, checks to see if the next
386 * height of the tree exists. If not the next height of the tree is created.
387 * The block number of the next height of the metadata tree is returned.
388 *
389 */
390
391static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
392 unsigned int height, struct metapath *mp, int create,
393 int *new, u64 *block)
394{
395 int boundary;
396 u64 *ptr = metapointer(bh, &boundary, height, mp);
397
398 if (*ptr) {
399 *block = be64_to_cpu(*ptr);
400 return boundary;
401 }
402
403 *block = 0;
404
405 if (!create)
406 return 0;
407
408 if (height == ip->i_di.di_height - 1 && !gfs2_is_dir(ip))
409 *block = gfs2_alloc_data(ip);
410 else
411 *block = gfs2_alloc_meta(ip);
412
413 gfs2_trans_add_bh(ip->i_gl, bh, 1);
414
415 *ptr = cpu_to_be64(*block);
416 ip->i_di.di_blocks++;
417
418 *new = 1;
419 return 0;
420}
421
422/**
423 * gfs2_block_pointers - Map a block from an inode to a disk block
424 * @inode: The inode
425 * @lblock: The logical block number
426 * @map_bh: The bh to be mapped
427 * @mp: metapath to use
428 *
429 * Find the block number on the current device which corresponds to an
430 * inode's block. If the block had to be created, "new" will be set.
431 *
432 * Returns: errno
433 */
434
435static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
436 struct buffer_head *bh_map, struct metapath *mp,
437 unsigned int maxlen)
438{
439 struct gfs2_inode *ip = GFS2_I(inode);
440 struct gfs2_sbd *sdp = GFS2_SB(inode);
441 struct buffer_head *bh;
442 unsigned int bsize;
443 unsigned int height;
444 unsigned int end_of_metadata;
445 unsigned int x;
446 int error = 0;
447 int new = 0;
448 u64 dblock = 0;
449 int boundary;
450
451 BUG_ON(maxlen == 0);
452
453 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
454 return 0;
455
456 bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
457
458 height = calc_tree_height(ip, (lblock + 1) * bsize);
459 if (ip->i_di.di_height < height) {
460 if (!create)
461 return 0;
462
463 error = build_height(inode, height);
464 if (error)
465 return error;
466 }
467
468 find_metapath(ip, lblock, mp);
469 end_of_metadata = ip->i_di.di_height - 1;
470
471 error = gfs2_meta_inode_buffer(ip, &bh);
472 if (error)
473 return error;
474
475 for (x = 0; x < end_of_metadata; x++) {
476 lookup_block(ip, bh, x, mp, create, &new, &dblock);
477 brelse(bh);
478 if (!dblock)
479 return 0;
480
481 error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh);
482 if (error)
483 return error;
484 }
485
486 boundary = lookup_block(ip, bh, end_of_metadata, mp, create, &new, &dblock);
487 clear_buffer_mapped(bh_map);
488 clear_buffer_new(bh_map);
489 clear_buffer_boundary(bh_map);
490
491 if (dblock) {
492 map_bh(bh_map, inode->i_sb, dblock);
493 if (boundary)
494 set_buffer_boundary(bh);
495 if (new) {
496 struct buffer_head *dibh;
497 error = gfs2_meta_inode_buffer(ip, &dibh);
498 if (!error) {
499 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
500 gfs2_dinode_out(&ip->i_di, dibh->b_data);
501 brelse(dibh);
502 }
503 set_buffer_new(bh_map);
504 goto out_brelse;
505 }
506 while(--maxlen && !buffer_boundary(bh_map)) {
507 u64 eblock;
508
509 mp->mp_list[end_of_metadata]++;
510 boundary = lookup_block(ip, bh, end_of_metadata, mp, 0, &new, &eblock);
511 if (eblock != ++dblock)
512 break;
513 bh_map->b_size += (1 << inode->i_blkbits);
514 if (boundary)
515 set_buffer_boundary(bh_map);
516 }
517 }
518out_brelse:
519 brelse(bh);
520 return 0;
521}
522
523
524static inline void bmap_lock(struct inode *inode, int create)
525{
526 struct gfs2_inode *ip = GFS2_I(inode);
527 if (create)
528 down_write(&ip->i_rw_mutex);
529 else
530 down_read(&ip->i_rw_mutex);
531}
532
533static inline void bmap_unlock(struct inode *inode, int create)
534{
535 struct gfs2_inode *ip = GFS2_I(inode);
536 if (create)
537 up_write(&ip->i_rw_mutex);
538 else
539 up_read(&ip->i_rw_mutex);
540}
541
542int gfs2_block_map(struct inode *inode, u64 lblock, int create,
543 struct buffer_head *bh, unsigned int maxlen)
544{
545 struct metapath mp;
546 int ret;
547
548 bmap_lock(inode, create);
549 ret = gfs2_block_pointers(inode, lblock, create, bh, &mp, maxlen);
550 bmap_unlock(inode, create);
551 return ret;
552}
553
554int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
555{
556 struct metapath mp;
557 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0, .b_size = 0 };
558 int ret;
559 int create = *new;
560
561 BUG_ON(!extlen);
562 BUG_ON(!dblock);
563 BUG_ON(!new);
564
565 bmap_lock(inode, create);
566 ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp, 32);
567 bmap_unlock(inode, create);
568 *extlen = bh.b_size >> inode->i_blkbits;
569 *dblock = bh.b_blocknr;
570 if (buffer_new(&bh))
571 *new = 1;
572 else
573 *new = 0;
574 return ret;
575}
576
577/**
578 * recursive_scan - recursively scan through the end of a file
579 * @ip: the inode
580 * @dibh: the dinode buffer
581 * @mp: the path through the metadata to the point to start
582 * @height: the height the recursion is at
583 * @block: the indirect block to look at
584 * @first: 1 if this is the first block
585 * @bc: the call to make for each piece of metadata
586 * @data: data opaque to this function to pass to @bc
587 *
588 * When this is first called @height and @block should be zero and
589 * @first should be 1.
590 *
591 * Returns: errno
592 */
593
594static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
595 struct metapath *mp, unsigned int height,
596 u64 block, int first, block_call_t bc,
597 void *data)
598{
599 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
600 struct buffer_head *bh = NULL;
601 u64 *top, *bottom;
602 u64 bn;
603 int error;
604 int mh_size = sizeof(struct gfs2_meta_header);
605
606 if (!height) {
607 error = gfs2_meta_inode_buffer(ip, &bh);
608 if (error)
609 return error;
610 dibh = bh;
611
612 top = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
613 bottom = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
614 } else {
615 error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
616 if (error)
617 return error;
618
619 top = (u64 *)(bh->b_data + mh_size) +
620 (first ? mp->mp_list[height] : 0);
621
622 bottom = (u64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
623 }
624
625 error = bc(ip, dibh, bh, top, bottom, height, data);
626 if (error)
627 goto out;
628
629 if (height < ip->i_di.di_height - 1)
630 for (; top < bottom; top++, first = 0) {
631 if (!*top)
632 continue;
633
634 bn = be64_to_cpu(*top);
635
636 error = recursive_scan(ip, dibh, mp, height + 1, bn,
637 first, bc, data);
638 if (error)
639 break;
640 }
641
642out:
643 brelse(bh);
644 return error;
645}
646
647/**
648 * do_strip - Look for a layer a particular layer of the file and strip it off
649 * @ip: the inode
650 * @dibh: the dinode buffer
651 * @bh: A buffer of pointers
652 * @top: The first pointer in the buffer
653 * @bottom: One more than the last pointer
654 * @height: the height this buffer is at
655 * @data: a pointer to a struct strip_mine
656 *
657 * Returns: errno
658 */
659
660static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
661 struct buffer_head *bh, u64 *top, u64 *bottom,
662 unsigned int height, void *data)
663{
664 struct strip_mine *sm = data;
665 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
666 struct gfs2_rgrp_list rlist;
667 u64 bn, bstart;
668 u32 blen;
669 u64 *p;
670 unsigned int rg_blocks = 0;
671 int metadata;
672 unsigned int revokes = 0;
673 int x;
674 int error;
675
676 if (!*top)
677 sm->sm_first = 0;
678
679 if (height != sm->sm_height)
680 return 0;
681
682 if (sm->sm_first) {
683 top++;
684 sm->sm_first = 0;
685 }
686
687 metadata = (height != ip->i_di.di_height - 1);
688 if (metadata)
689 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
690
691 error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
692 if (error)
693 return error;
694
695 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
696 bstart = 0;
697 blen = 0;
698
699 for (p = top; p < bottom; p++) {
700 if (!*p)
701 continue;
702
703 bn = be64_to_cpu(*p);
704
705 if (bstart + blen == bn)
706 blen++;
707 else {
708 if (bstart)
709 gfs2_rlist_add(sdp, &rlist, bstart);
710
711 bstart = bn;
712 blen = 1;
713 }
714 }
715
716 if (bstart)
717 gfs2_rlist_add(sdp, &rlist, bstart);
718 else
719 goto out; /* Nothing to do */
720
721 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
722
723 for (x = 0; x < rlist.rl_rgrps; x++) {
724 struct gfs2_rgrpd *rgd;
725 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
726 rg_blocks += rgd->rd_ri.ri_length;
727 }
728
729 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
730 if (error)
731 goto out_rlist;
732
733 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
734 RES_INDIRECT + RES_STATFS + RES_QUOTA,
735 revokes);
736 if (error)
737 goto out_rg_gunlock;
738
739 down_write(&ip->i_rw_mutex);
740
741 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
742 gfs2_trans_add_bh(ip->i_gl, bh, 1);
743
744 bstart = 0;
745 blen = 0;
746
747 for (p = top; p < bottom; p++) {
748 if (!*p)
749 continue;
750
751 bn = be64_to_cpu(*p);
752
753 if (bstart + blen == bn)
754 blen++;
755 else {
756 if (bstart) {
757 if (metadata)
758 gfs2_free_meta(ip, bstart, blen);
759 else
760 gfs2_free_data(ip, bstart, blen);
761 }
762
763 bstart = bn;
764 blen = 1;
765 }
766
767 *p = 0;
768 if (!ip->i_di.di_blocks)
769 gfs2_consist_inode(ip);
770 ip->i_di.di_blocks--;
771 }
772 if (bstart) {
773 if (metadata)
774 gfs2_free_meta(ip, bstart, blen);
775 else
776 gfs2_free_data(ip, bstart, blen);
777 }
778
779 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
780
781 gfs2_dinode_out(&ip->i_di, dibh->b_data);
782
783 up_write(&ip->i_rw_mutex);
784
785 gfs2_trans_end(sdp);
786
787out_rg_gunlock:
788 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
789out_rlist:
790 gfs2_rlist_free(&rlist);
791out:
792 gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
793 return error;
794}
795
796/**
797 * do_grow - Make a file look bigger than it is
798 * @ip: the inode
799 * @size: the size to set the file to
800 *
801 * Called with an exclusive lock on @ip.
802 *
803 * Returns: errno
804 */
805
806static int do_grow(struct gfs2_inode *ip, u64 size)
807{
808 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
809 struct gfs2_alloc *al;
810 struct buffer_head *dibh;
811 unsigned int h;
812 int error;
813
814 al = gfs2_alloc_get(ip);
815
816 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
817 if (error)
818 goto out;
819
820 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
821 if (error)
822 goto out_gunlock_q;
823
824 al->al_requested = sdp->sd_max_height + RES_DATA;
825
826 error = gfs2_inplace_reserve(ip);
827 if (error)
828 goto out_gunlock_q;
829
830 error = gfs2_trans_begin(sdp,
831 sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
832 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
833 if (error)
834 goto out_ipres;
835
836 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
837 if (gfs2_is_stuffed(ip)) {
838 error = gfs2_unstuff_dinode(ip, NULL);
839 if (error)
840 goto out_end_trans;
841 }
842
843 h = calc_tree_height(ip, size);
844 if (ip->i_di.di_height < h) {
845 down_write(&ip->i_rw_mutex);
846 error = build_height(&ip->i_inode, h);
847 up_write(&ip->i_rw_mutex);
848 if (error)
849 goto out_end_trans;
850 }
851 }
852
853 ip->i_di.di_size = size;
854 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
855
856 error = gfs2_meta_inode_buffer(ip, &dibh);
857 if (error)
858 goto out_end_trans;
859
860 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
861 gfs2_dinode_out(&ip->i_di, dibh->b_data);
862 brelse(dibh);
863
864out_end_trans:
865 gfs2_trans_end(sdp);
866out_ipres:
867 gfs2_inplace_release(ip);
868out_gunlock_q:
869 gfs2_quota_unlock(ip);
870out:
871 gfs2_alloc_put(ip);
872 return error;
873}
874
875
876/**
877 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
878 *
879 * This is partly borrowed from ext3.
880 */
881static int gfs2_block_truncate_page(struct address_space *mapping)
882{
883 struct inode *inode = mapping->host;
884 struct gfs2_inode *ip = GFS2_I(inode);
885 struct gfs2_sbd *sdp = GFS2_SB(inode);
886 loff_t from = inode->i_size;
887 unsigned long index = from >> PAGE_CACHE_SHIFT;
888 unsigned offset = from & (PAGE_CACHE_SIZE-1);
889 unsigned blocksize, iblock, length, pos;
890 struct buffer_head *bh;
891 struct page *page;
892 void *kaddr;
893 int err;
894
895 page = grab_cache_page(mapping, index);
896 if (!page)
897 return 0;
898
899 blocksize = inode->i_sb->s_blocksize;
900 length = blocksize - (offset & (blocksize - 1));
901 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
902
903 if (!page_has_buffers(page))
904 create_empty_buffers(page, blocksize, 0);
905
906 /* Find the buffer that contains "offset" */
907 bh = page_buffers(page);
908 pos = blocksize;
909 while (offset >= pos) {
910 bh = bh->b_this_page;
911 iblock++;
912 pos += blocksize;
913 }
914
915 err = 0;
916
917 if (!buffer_mapped(bh)) {
918 gfs2_get_block(inode, iblock, bh, 0);
919 /* unmapped? It's a hole - nothing to do */
920 if (!buffer_mapped(bh))
921 goto unlock;
922 }
923
924 /* Ok, it's mapped. Make sure it's up-to-date */
925 if (PageUptodate(page))
926 set_buffer_uptodate(bh);
927
928 if (!buffer_uptodate(bh)) {
929 err = -EIO;
930 ll_rw_block(READ, 1, &bh);
931 wait_on_buffer(bh);
932 /* Uhhuh. Read error. Complain and punt. */
933 if (!buffer_uptodate(bh))
934 goto unlock;
935 }
936
937 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
938 gfs2_trans_add_bh(ip->i_gl, bh, 0);
939
940 kaddr = kmap_atomic(page, KM_USER0);
941 memset(kaddr + offset, 0, length);
942 flush_dcache_page(page);
943 kunmap_atomic(kaddr, KM_USER0);
944
945unlock:
946 unlock_page(page);
947 page_cache_release(page);
948 return err;
949}
950
951static int trunc_start(struct gfs2_inode *ip, u64 size)
952{
953 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
954 struct buffer_head *dibh;
955 int journaled = gfs2_is_jdata(ip);
956 int error;
957
958 error = gfs2_trans_begin(sdp,
959 RES_DINODE + (journaled ? RES_JDATA : 0), 0);
960 if (error)
961 return error;
962
963 error = gfs2_meta_inode_buffer(ip, &dibh);
964 if (error)
965 goto out;
966
967 if (gfs2_is_stuffed(ip)) {
968 ip->i_di.di_size = size;
969 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
970 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
971 gfs2_dinode_out(&ip->i_di, dibh->b_data);
972 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
973 error = 1;
974
975 } else {
976 if (size & (u64)(sdp->sd_sb.sb_bsize - 1))
977 error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
978
979 if (!error) {
980 ip->i_di.di_size = size;
981 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
982 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
983 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
984 gfs2_dinode_out(&ip->i_di, dibh->b_data);
985 }
986 }
987
988 brelse(dibh);
989
990out:
991 gfs2_trans_end(sdp);
992 return error;
993}
994
995static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
996{
997 unsigned int height = ip->i_di.di_height;
998 u64 lblock;
999 struct metapath mp;
1000 int error;
1001
1002 if (!size)
1003 lblock = 0;
1004 else
1005 lblock = (size - 1) >> GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize_shift;
1006
1007 find_metapath(ip, lblock, &mp);
1008 gfs2_alloc_get(ip);
1009
1010 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1011 if (error)
1012 goto out;
1013
1014 while (height--) {
1015 struct strip_mine sm;
1016 sm.sm_first = !!size;
1017 sm.sm_height = height;
1018
1019 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
1020 if (error)
1021 break;
1022 }
1023
1024 gfs2_quota_unhold(ip);
1025
1026out:
1027 gfs2_alloc_put(ip);
1028 return error;
1029}
1030
1031static int trunc_end(struct gfs2_inode *ip)
1032{
1033 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1034 struct buffer_head *dibh;
1035 int error;
1036
1037 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1038 if (error)
1039 return error;
1040
1041 down_write(&ip->i_rw_mutex);
1042
1043 error = gfs2_meta_inode_buffer(ip, &dibh);
1044 if (error)
1045 goto out;
1046
1047 if (!ip->i_di.di_size) {
1048 ip->i_di.di_height = 0;
1049 ip->i_di.di_goal_meta =
1050 ip->i_di.di_goal_data =
1051 ip->i_num.no_addr;
1052 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1053 }
1054 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
1055 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
1056
1057 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1058 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1059 brelse(dibh);
1060
1061out:
1062 up_write(&ip->i_rw_mutex);
1063 gfs2_trans_end(sdp);
1064 return error;
1065}
1066
1067/**
1068 * do_shrink - make a file smaller
1069 * @ip: the inode
1070 * @size: the size to make the file
1071 * @truncator: function to truncate the last partial block
1072 *
1073 * Called with an exclusive lock on @ip.
1074 *
1075 * Returns: errno
1076 */
1077
1078static int do_shrink(struct gfs2_inode *ip, u64 size)
1079{
1080 int error;
1081
1082 error = trunc_start(ip, size);
1083 if (error < 0)
1084 return error;
1085 if (error > 0)
1086 return 0;
1087
1088 error = trunc_dealloc(ip, size);
1089 if (!error)
1090 error = trunc_end(ip);
1091
1092 return error;
1093}
1094
1095/**
1096 * gfs2_truncatei - make a file a given size
1097 * @ip: the inode
1098 * @size: the size to make the file
1099 * @truncator: function to truncate the last partial block
1100 *
1101 * The file size can grow, shrink, or stay the same size.
1102 *
1103 * Returns: errno
1104 */
1105
1106int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
1107{
1108 int error;
1109
1110 if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_di.di_mode)))
1111 return -EINVAL;
1112
1113 if (size > ip->i_di.di_size)
1114 error = do_grow(ip, size);
1115 else
1116 error = do_shrink(ip, size);
1117
1118 return error;
1119}
1120
1121int gfs2_truncatei_resume(struct gfs2_inode *ip)
1122{
1123 int error;
1124 error = trunc_dealloc(ip, ip->i_di.di_size);
1125 if (!error)
1126 error = trunc_end(ip);
1127 return error;
1128}
1129
1130int gfs2_file_dealloc(struct gfs2_inode *ip)
1131{
1132 return trunc_dealloc(ip, 0);
1133}
1134
1135/**
1136 * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
1137 * @ip: the file
1138 * @len: the number of bytes to be written to the file
1139 * @data_blocks: returns the number of data blocks required
1140 * @ind_blocks: returns the number of indirect blocks required
1141 *
1142 */
1143
1144void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
1145 unsigned int *data_blocks, unsigned int *ind_blocks)
1146{
1147 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1148 unsigned int tmp;
1149
1150 if (gfs2_is_dir(ip)) {
1151 *data_blocks = DIV_ROUND_UP(len, sdp->sd_jbsize) + 2;
1152 *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
1153 } else {
1154 *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
1155 *ind_blocks = 3 * (sdp->sd_max_height - 1);
1156 }
1157
1158 for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
1159 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
1160 *ind_blocks += tmp;
1161 }
1162}
1163
1164/**
1165 * gfs2_write_alloc_required - figure out if a write will require an allocation
1166 * @ip: the file being written to
1167 * @offset: the offset to write to
1168 * @len: the number of bytes being written
1169 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
1170 *
1171 * Returns: errno
1172 */
1173
1174int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1175 unsigned int len, int *alloc_required)
1176{
1177 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1178 u64 lblock, lblock_stop, dblock;
1179 u32 extlen;
1180 int new = 0;
1181 int error = 0;
1182
1183 *alloc_required = 0;
1184
1185 if (!len)
1186 return 0;
1187
1188 if (gfs2_is_stuffed(ip)) {
1189 if (offset + len >
1190 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1191 *alloc_required = 1;
1192 return 0;
1193 }
1194
1195 if (gfs2_is_dir(ip)) {
1196 unsigned int bsize = sdp->sd_jbsize;
1197 lblock = offset;
1198 do_div(lblock, bsize);
1199 lblock_stop = offset + len + bsize - 1;
1200 do_div(lblock_stop, bsize);
1201 } else {
1202 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1203 lblock = offset >> shift;
1204 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1205 }
1206
1207 for (; lblock < lblock_stop; lblock += extlen) {
1208 error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
1209 if (error)
1210 return error;
1211
1212 if (!dblock) {
1213 *alloc_required = 1;
1214 return 0;
1215 }
1216 }
1217
1218 return 0;
1219}
1220
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
new file mode 100644
index 000000000000..0fd379b4cd9e
--- /dev/null
+++ b/fs/gfs2/bmap.h
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __BMAP_DOT_H__
11#define __BMAP_DOT_H__
12
13struct inode;
14struct gfs2_inode;
15struct page;
16
17int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
18int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh, unsigned int maxlen);
19int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
20
21int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
22int gfs2_truncatei_resume(struct gfs2_inode *ip);
23int gfs2_file_dealloc(struct gfs2_inode *ip);
24
25void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
26 unsigned int *data_blocks,
27 unsigned int *ind_blocks);
28int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
29 unsigned int len, int *alloc_required);
30
31#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
new file mode 100644
index 000000000000..cab1f68d4685
--- /dev/null
+++ b/fs/gfs2/daemon.c
@@ -0,0 +1,196 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <linux/delay.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "daemon.h"
23#include "glock.h"
24#include "log.h"
25#include "quota.h"
26#include "recovery.h"
27#include "super.h"
28#include "util.h"
29
30/* This uses schedule_timeout() instead of msleep() because it's good for
31 the daemons to wake up more often than the timeout when unmounting so
32 the user's unmount doesn't sit there forever.
33
34 The kthread functions used to start these daemons block and flush signals. */
35
36/**
37 * gfs2_scand - Look for cached glocks and inodes to toss from memory
38 * @sdp: Pointer to GFS2 superblock
39 *
40 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
41 * See gfs2_glockd()
42 */
43
44int gfs2_scand(void *data)
45{
46 struct gfs2_sbd *sdp = data;
47 unsigned long t;
48
49 while (!kthread_should_stop()) {
50 gfs2_scand_internal(sdp);
51 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
52 schedule_timeout_interruptible(t);
53 }
54
55 return 0;
56}
57
58/**
59 * gfs2_glockd - Reclaim unused glock structures
60 * @sdp: Pointer to GFS2 superblock
61 *
62 * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
63 * Number of daemons can be set by user, with num_glockd mount option.
64 */
65
66int gfs2_glockd(void *data)
67{
68 struct gfs2_sbd *sdp = data;
69
70 while (!kthread_should_stop()) {
71 while (atomic_read(&sdp->sd_reclaim_count))
72 gfs2_reclaim_glock(sdp);
73
74 wait_event_interruptible(sdp->sd_reclaim_wq,
75 (atomic_read(&sdp->sd_reclaim_count) ||
76 kthread_should_stop()));
77 }
78
79 return 0;
80}
81
82/**
83 * gfs2_recoverd - Recover dead machine's journals
84 * @sdp: Pointer to GFS2 superblock
85 *
86 */
87
88int gfs2_recoverd(void *data)
89{
90 struct gfs2_sbd *sdp = data;
91 unsigned long t;
92
93 while (!kthread_should_stop()) {
94 gfs2_check_journals(sdp);
95 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
96 schedule_timeout_interruptible(t);
97 }
98
99 return 0;
100}
101
102/**
103 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
104 * @sdp: Pointer to GFS2 superblock
105 *
106 * Also, periodically check to make sure that we're using the most recent
107 * journal index.
108 */
109
110int gfs2_logd(void *data)
111{
112 struct gfs2_sbd *sdp = data;
113 struct gfs2_holder ji_gh;
114 unsigned long t;
115
116 while (!kthread_should_stop()) {
117 /* Advance the log tail */
118
119 t = sdp->sd_log_flush_time +
120 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
121
122 gfs2_ail1_empty(sdp, DIO_ALL);
123
124 if (time_after_eq(jiffies, t)) {
125 gfs2_log_flush(sdp, NULL);
126 sdp->sd_log_flush_time = jiffies;
127 }
128
129 /* Check for latest journal index */
130
131 t = sdp->sd_jindex_refresh_time +
132 gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
133
134 if (time_after_eq(jiffies, t)) {
135 if (!gfs2_jindex_hold(sdp, &ji_gh))
136 gfs2_glock_dq_uninit(&ji_gh);
137 sdp->sd_jindex_refresh_time = jiffies;
138 }
139
140 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
141 schedule_timeout_interruptible(t);
142 }
143
144 return 0;
145}
146
147/**
148 * gfs2_quotad - Write cached quota changes into the quota file
149 * @sdp: Pointer to GFS2 superblock
150 *
151 */
152
153int gfs2_quotad(void *data)
154{
155 struct gfs2_sbd *sdp = data;
156 unsigned long t;
157 int error;
158
159 while (!kthread_should_stop()) {
160 /* Update the master statfs file */
161
162 t = sdp->sd_statfs_sync_time +
163 gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
164
165 if (time_after_eq(jiffies, t)) {
166 error = gfs2_statfs_sync(sdp);
167 if (error &&
168 error != -EROFS &&
169 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
170 fs_err(sdp, "quotad: (1) error=%d\n", error);
171 sdp->sd_statfs_sync_time = jiffies;
172 }
173
174 /* Update quota file */
175
176 t = sdp->sd_quota_sync_time +
177 gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
178
179 if (time_after_eq(jiffies, t)) {
180 error = gfs2_quota_sync(sdp);
181 if (error &&
182 error != -EROFS &&
183 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
184 fs_err(sdp, "quotad: (2) error=%d\n", error);
185 sdp->sd_quota_sync_time = jiffies;
186 }
187
188 gfs2_quota_scan(sdp);
189
190 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
191 schedule_timeout_interruptible(t);
192 }
193
194 return 0;
195}
196
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
new file mode 100644
index 000000000000..801007120fb2
--- /dev/null
+++ b/fs/gfs2/daemon.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __DAEMON_DOT_H__
11#define __DAEMON_DOT_H__
12
13int gfs2_scand(void *data);
14int gfs2_glockd(void *data);
15int gfs2_recoverd(void *data);
16int gfs2_logd(void *data);
17int gfs2_quotad(void *data);
18
19#endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
new file mode 100644
index 000000000000..459498cac93b
--- /dev/null
+++ b/fs/gfs2/dir.c
@@ -0,0 +1,1961 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10/*
11 * Implements Extendible Hashing as described in:
12 * "Extendible Hashing" by Fagin, et al in
13 * __ACM Trans. on Database Systems__, Sept 1979.
14 *
15 *
16 * Here's the layout of dirents which is essentially the same as that of ext2
17 * within a single block. The field de_name_len is the number of bytes
18 * actually required for the name (no null terminator). The field de_rec_len
19 * is the number of bytes allocated to the dirent. The offset of the next
20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21 * deleted, the preceding dirent inherits its allocated space, ie
22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23 * by adding de_rec_len to the current dirent, this essentially causes the
24 * deleted dirent to get jumped over when iterating through all the dirents.
25 *
26 * When deleting the first dirent in a block, there is no previous dirent so
27 * the field de_ino is set to zero to designate it as deleted. When allocating
28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30 * dirent is allocated. Otherwise it must go through all the 'used' dirents
31 * searching for one in which the amount of total space minus the amount of
32 * used space will provide enough space for the new dirent.
33 *
34 * There are two types of blocks in which dirents reside. In a stuffed dinode,
35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36 * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37 * beginning of the leaf block. The dirents reside in leaves when
38 *
39 * dip->i_di.di_flags & GFS2_DIF_EXHASH is true
40 *
41 * Otherwise, the dirents are "linear", within a single stuffed dinode block.
42 *
43 * When the dirents are in leaves, the actual contents of the directory file are
44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The
45 * dirents are NOT in the directory file itself. There can be more than one
46 * block pointer in the array that points to the same leaf. In fact, when a
47 * directory is first converted from linear to exhash, all of the pointers
48 * point to the same leaf.
49 *
50 * When a leaf is completely full, the size of the hash table can be
51 * doubled unless it is already at the maximum size which is hard coded into
52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53 * but never before the maximum hash table size has been reached.
54 */
55
56#include <linux/sched.h>
57#include <linux/slab.h>
58#include <linux/spinlock.h>
59#include <linux/buffer_head.h>
60#include <linux/sort.h>
61#include <linux/gfs2_ondisk.h>
62#include <linux/crc32.h>
63#include <linux/vmalloc.h>
64#include <linux/lm_interface.h>
65
66#include "gfs2.h"
67#include "incore.h"
68#include "dir.h"
69#include "glock.h"
70#include "inode.h"
71#include "meta_io.h"
72#include "quota.h"
73#include "rgrp.h"
74#include "trans.h"
75#include "bmap.h"
76#include "util.h"
77
78#define IS_LEAF 1 /* Hashed (leaf) directory */
79#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
80
81#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
82#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
83
84typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
85 u64 leaf_no, void *data);
86typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
87 const struct qstr *name, void *opaque);
88
89
90int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
91 struct buffer_head **bhp)
92{
93 struct buffer_head *bh;
94
95 bh = gfs2_meta_new(ip->i_gl, block);
96 gfs2_trans_add_bh(ip->i_gl, bh, 1);
97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
99 *bhp = bh;
100 return 0;
101}
102
103static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block,
104 struct buffer_head **bhp)
105{
106 struct buffer_head *bh;
107 int error;
108
109 error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh);
110 if (error)
111 return error;
112 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) {
113 brelse(bh);
114 return -EIO;
115 }
116 *bhp = bh;
117 return 0;
118}
119
120static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
121 unsigned int offset, unsigned int size)
122{
123 struct buffer_head *dibh;
124 int error;
125
126 error = gfs2_meta_inode_buffer(ip, &dibh);
127 if (error)
128 return error;
129
130 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132 if (ip->i_di.di_size < offset + size)
133 ip->i_di.di_size = offset + size;
134 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
135 gfs2_dinode_out(&ip->i_di, dibh->b_data);
136
137 brelse(dibh);
138
139 return size;
140}
141
142
143
144/**
145 * gfs2_dir_write_data - Write directory information to the inode
146 * @ip: The GFS2 inode
147 * @buf: The buffer containing information to be written
148 * @offset: The file offset to start writing at
149 * @size: The amount of data to write
150 *
151 * Returns: The number of bytes correctly written or error code
152 */
153static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
154 u64 offset, unsigned int size)
155{
156 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
157 struct buffer_head *dibh;
158 u64 lblock, dblock;
159 u32 extlen = 0;
160 unsigned int o;
161 int copied = 0;
162 int error = 0;
163
164 if (!size)
165 return 0;
166
167 if (gfs2_is_stuffed(ip) &&
168 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
169 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
170 size);
171
172 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
173 return -EINVAL;
174
175 if (gfs2_is_stuffed(ip)) {
176 error = gfs2_unstuff_dinode(ip, NULL);
177 if (error)
178 return error;
179 }
180
181 lblock = offset;
182 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
183
184 while (copied < size) {
185 unsigned int amount;
186 struct buffer_head *bh;
187 int new;
188
189 amount = size - copied;
190 if (amount > sdp->sd_sb.sb_bsize - o)
191 amount = sdp->sd_sb.sb_bsize - o;
192
193 if (!extlen) {
194 new = 1;
195 error = gfs2_extent_map(&ip->i_inode, lblock, &new,
196 &dblock, &extlen);
197 if (error)
198 goto fail;
199 error = -EIO;
200 if (gfs2_assert_withdraw(sdp, dblock))
201 goto fail;
202 }
203
204 if (amount == sdp->sd_jbsize || new)
205 error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
206 else
207 error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
208
209 if (error)
210 goto fail;
211
212 gfs2_trans_add_bh(ip->i_gl, bh, 1);
213 memcpy(bh->b_data + o, buf, amount);
214 brelse(bh);
215 if (error)
216 goto fail;
217
218 buf += amount;
219 copied += amount;
220 lblock++;
221 dblock++;
222 extlen--;
223
224 o = sizeof(struct gfs2_meta_header);
225 }
226
227out:
228 error = gfs2_meta_inode_buffer(ip, &dibh);
229 if (error)
230 return error;
231
232 if (ip->i_di.di_size < offset + copied)
233 ip->i_di.di_size = offset + copied;
234 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
235
236 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
237 gfs2_dinode_out(&ip->i_di, dibh->b_data);
238 brelse(dibh);
239
240 return copied;
241fail:
242 if (copied)
243 goto out;
244 return error;
245}
246
247static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
248 u64 offset, unsigned int size)
249{
250 struct buffer_head *dibh;
251 int error;
252
253 error = gfs2_meta_inode_buffer(ip, &dibh);
254 if (!error) {
255 offset += sizeof(struct gfs2_dinode);
256 memcpy(buf, dibh->b_data + offset, size);
257 brelse(dibh);
258 }
259
260 return (error) ? error : size;
261}
262
263
264/**
265 * gfs2_dir_read_data - Read a data from a directory inode
266 * @ip: The GFS2 Inode
267 * @buf: The buffer to place result into
268 * @offset: File offset to begin jdata_readng from
269 * @size: Amount of data to transfer
270 *
271 * Returns: The amount of data actually copied or the error
272 */
273static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
274 unsigned int size, unsigned ra)
275{
276 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
277 u64 lblock, dblock;
278 u32 extlen = 0;
279 unsigned int o;
280 int copied = 0;
281 int error = 0;
282
283 if (offset >= ip->i_di.di_size)
284 return 0;
285
286 if (offset + size > ip->i_di.di_size)
287 size = ip->i_di.di_size - offset;
288
289 if (!size)
290 return 0;
291
292 if (gfs2_is_stuffed(ip))
293 return gfs2_dir_read_stuffed(ip, buf, offset, size);
294
295 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
296 return -EINVAL;
297
298 lblock = offset;
299 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
300
301 while (copied < size) {
302 unsigned int amount;
303 struct buffer_head *bh;
304 int new;
305
306 amount = size - copied;
307 if (amount > sdp->sd_sb.sb_bsize - o)
308 amount = sdp->sd_sb.sb_bsize - o;
309
310 if (!extlen) {
311 new = 0;
312 error = gfs2_extent_map(&ip->i_inode, lblock, &new,
313 &dblock, &extlen);
314 if (error || !dblock)
315 goto fail;
316 BUG_ON(extlen < 1);
317 if (!ra)
318 extlen = 1;
319 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
320 }
321 if (!bh) {
322 error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
323 if (error)
324 goto fail;
325 }
326 error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD);
327 if (error) {
328 brelse(bh);
329 goto fail;
330 }
331 dblock++;
332 extlen--;
333 memcpy(buf, bh->b_data + o, amount);
334 brelse(bh);
335 bh = NULL;
336 buf += amount;
337 copied += amount;
338 lblock++;
339 o = sizeof(struct gfs2_meta_header);
340 }
341
342 return copied;
343fail:
344 return (copied) ? copied : error;
345}
346
347static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
348 const struct qstr *name, int ret)
349{
350 if (dent->de_inum.no_addr != 0 &&
351 be32_to_cpu(dent->de_hash) == name->hash &&
352 be16_to_cpu(dent->de_name_len) == name->len &&
353 memcmp(dent+1, name->name, name->len) == 0)
354 return ret;
355 return 0;
356}
357
358static int gfs2_dirent_find(const struct gfs2_dirent *dent,
359 const struct qstr *name,
360 void *opaque)
361{
362 return __gfs2_dirent_find(dent, name, 1);
363}
364
365static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
366 const struct qstr *name,
367 void *opaque)
368{
369 return __gfs2_dirent_find(dent, name, 2);
370}
371
372/*
373 * name->name holds ptr to start of block.
374 * name->len holds size of block.
375 */
376static int gfs2_dirent_last(const struct gfs2_dirent *dent,
377 const struct qstr *name,
378 void *opaque)
379{
380 const char *start = name->name;
381 const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len);
382 if (name->len == (end - start))
383 return 1;
384 return 0;
385}
386
387static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
388 const struct qstr *name,
389 void *opaque)
390{
391 unsigned required = GFS2_DIRENT_SIZE(name->len);
392 unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
393 unsigned totlen = be16_to_cpu(dent->de_rec_len);
394
395 if (!dent->de_inum.no_addr)
396 actual = GFS2_DIRENT_SIZE(0);
397 if (totlen - actual >= required)
398 return 1;
399 return 0;
400}
401
402struct dirent_gather {
403 const struct gfs2_dirent **pdent;
404 unsigned offset;
405};
406
407static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
408 const struct qstr *name,
409 void *opaque)
410{
411 struct dirent_gather *g = opaque;
412 if (dent->de_inum.no_addr) {
413 g->pdent[g->offset++] = dent;
414 }
415 return 0;
416}
417
418/*
419 * Other possible things to check:
420 * - Inode located within filesystem size (and on valid block)
421 * - Valid directory entry type
422 * Not sure how heavy-weight we want to make this... could also check
423 * hash is correct for example, but that would take a lot of extra time.
424 * For now the most important thing is to check that the various sizes
425 * are correct.
426 */
427static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
428 unsigned int size, unsigned int len, int first)
429{
430 const char *msg = "gfs2_dirent too small";
431 if (unlikely(size < sizeof(struct gfs2_dirent)))
432 goto error;
433 msg = "gfs2_dirent misaligned";
434 if (unlikely(offset & 0x7))
435 goto error;
436 msg = "gfs2_dirent points beyond end of block";
437 if (unlikely(offset + size > len))
438 goto error;
439 msg = "zero inode number";
440 if (unlikely(!first && !dent->de_inum.no_addr))
441 goto error;
442 msg = "name length is greater than space in dirent";
443 if (dent->de_inum.no_addr &&
444 unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
445 size))
446 goto error;
447 return 0;
448error:
449 printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg,
450 first ? "first in block" : "not first in block");
451 return -EIO;
452}
453
454static int gfs2_dirent_offset(const void *buf)
455{
456 const struct gfs2_meta_header *h = buf;
457 int offset;
458
459 BUG_ON(buf == NULL);
460
461 switch(be32_to_cpu(h->mh_type)) {
462 case GFS2_METATYPE_LF:
463 offset = sizeof(struct gfs2_leaf);
464 break;
465 case GFS2_METATYPE_DI:
466 offset = sizeof(struct gfs2_dinode);
467 break;
468 default:
469 goto wrong_type;
470 }
471 return offset;
472wrong_type:
473 printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n",
474 be32_to_cpu(h->mh_type));
475 return -1;
476}
477
478static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf,
479 unsigned int len, gfs2_dscan_t scan,
480 const struct qstr *name,
481 void *opaque)
482{
483 struct gfs2_dirent *dent, *prev;
484 unsigned offset;
485 unsigned size;
486 int ret = 0;
487
488 ret = gfs2_dirent_offset(buf);
489 if (ret < 0)
490 goto consist_inode;
491
492 offset = ret;
493 prev = NULL;
494 dent = buf + offset;
495 size = be16_to_cpu(dent->de_rec_len);
496 if (gfs2_check_dirent(dent, offset, size, len, 1))
497 goto consist_inode;
498 do {
499 ret = scan(dent, name, opaque);
500 if (ret)
501 break;
502 offset += size;
503 if (offset == len)
504 break;
505 prev = dent;
506 dent = buf + offset;
507 size = be16_to_cpu(dent->de_rec_len);
508 if (gfs2_check_dirent(dent, offset, size, len, 0))
509 goto consist_inode;
510 } while(1);
511
512 switch(ret) {
513 case 0:
514 return NULL;
515 case 1:
516 return dent;
517 case 2:
518 return prev ? prev : dent;
519 default:
520 BUG_ON(ret > 0);
521 return ERR_PTR(ret);
522 }
523
524consist_inode:
525 gfs2_consist_inode(GFS2_I(inode));
526 return ERR_PTR(-EIO);
527}
528
529
530/**
531 * dirent_first - Return the first dirent
532 * @dip: the directory
533 * @bh: The buffer
534 * @dent: Pointer to list of dirents
535 *
536 * return first dirent whether bh points to leaf or stuffed dinode
537 *
538 * Returns: IS_LEAF, IS_DINODE, or -errno
539 */
540
541static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
542 struct gfs2_dirent **dent)
543{
544 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
545
546 if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
547 if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh))
548 return -EIO;
549 *dent = (struct gfs2_dirent *)(bh->b_data +
550 sizeof(struct gfs2_leaf));
551 return IS_LEAF;
552 } else {
553 if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI))
554 return -EIO;
555 *dent = (struct gfs2_dirent *)(bh->b_data +
556 sizeof(struct gfs2_dinode));
557 return IS_DINODE;
558 }
559}
560
561static int dirent_check_reclen(struct gfs2_inode *dip,
562 const struct gfs2_dirent *d, const void *end_p)
563{
564 const void *ptr = d;
565 u16 rec_len = be16_to_cpu(d->de_rec_len);
566
567 if (unlikely(rec_len < sizeof(struct gfs2_dirent)))
568 goto broken;
569 ptr += rec_len;
570 if (ptr < end_p)
571 return rec_len;
572 if (ptr == end_p)
573 return -ENOENT;
574broken:
575 gfs2_consist_inode(dip);
576 return -EIO;
577}
578
579/**
580 * dirent_next - Next dirent
581 * @dip: the directory
582 * @bh: The buffer
583 * @dent: Pointer to list of dirents
584 *
585 * Returns: 0 on success, error code otherwise
586 */
587
588static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
589 struct gfs2_dirent **dent)
590{
591 struct gfs2_dirent *cur = *dent, *tmp;
592 char *bh_end = bh->b_data + bh->b_size;
593 int ret;
594
595 ret = dirent_check_reclen(dip, cur, bh_end);
596 if (ret < 0)
597 return ret;
598
599 tmp = (void *)cur + ret;
600 ret = dirent_check_reclen(dip, tmp, bh_end);
601 if (ret == -EIO)
602 return ret;
603
604 /* Only the first dent could ever have de_inum.no_addr == 0 */
605 if (!tmp->de_inum.no_addr) {
606 gfs2_consist_inode(dip);
607 return -EIO;
608 }
609
610 *dent = tmp;
611 return 0;
612}
613
614/**
615 * dirent_del - Delete a dirent
616 * @dip: The GFS2 inode
617 * @bh: The buffer
618 * @prev: The previous dirent
619 * @cur: The current dirent
620 *
621 */
622
623static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
624 struct gfs2_dirent *prev, struct gfs2_dirent *cur)
625{
626 u16 cur_rec_len, prev_rec_len;
627
628 if (!cur->de_inum.no_addr) {
629 gfs2_consist_inode(dip);
630 return;
631 }
632
633 gfs2_trans_add_bh(dip->i_gl, bh, 1);
634
635 /* If there is no prev entry, this is the first entry in the block.
636 The de_rec_len is already as big as it needs to be. Just zero
637 out the inode number and return. */
638
639 if (!prev) {
640 cur->de_inum.no_addr = 0; /* No endianess worries */
641 return;
642 }
643
644 /* Combine this dentry with the previous one. */
645
646 prev_rec_len = be16_to_cpu(prev->de_rec_len);
647 cur_rec_len = be16_to_cpu(cur->de_rec_len);
648
649 if ((char *)prev + prev_rec_len != (char *)cur)
650 gfs2_consist_inode(dip);
651 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
652 gfs2_consist_inode(dip);
653
654 prev_rec_len += cur_rec_len;
655 prev->de_rec_len = cpu_to_be16(prev_rec_len);
656}
657
658/*
659 * Takes a dent from which to grab space as an argument. Returns the
660 * newly created dent.
661 */
662static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
663 struct gfs2_dirent *dent,
664 const struct qstr *name,
665 struct buffer_head *bh)
666{
667 struct gfs2_inode *ip = GFS2_I(inode);
668 struct gfs2_dirent *ndent;
669 unsigned offset = 0, totlen;
670
671 if (dent->de_inum.no_addr)
672 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
673 totlen = be16_to_cpu(dent->de_rec_len);
674 BUG_ON(offset + name->len > totlen);
675 gfs2_trans_add_bh(ip->i_gl, bh, 1);
676 ndent = (struct gfs2_dirent *)((char *)dent + offset);
677 dent->de_rec_len = cpu_to_be16(offset);
678 gfs2_qstr2dirent(name, totlen - offset, ndent);
679 return ndent;
680}
681
682static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode,
683 struct buffer_head *bh,
684 const struct qstr *name)
685{
686 struct gfs2_dirent *dent;
687 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
688 gfs2_dirent_find_space, name, NULL);
689 if (!dent || IS_ERR(dent))
690 return dent;
691 return gfs2_init_dirent(inode, dent, name, bh);
692}
693
694static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
695 struct buffer_head **bhp)
696{
697 int error;
698
699 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp);
700 if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) {
701 /* printk(KERN_INFO "block num=%llu\n", leaf_no); */
702 error = -EIO;
703 }
704
705 return error;
706}
707
708/**
709 * get_leaf_nr - Get a leaf number associated with the index
710 * @dip: The GFS2 inode
711 * @index:
712 * @leaf_out:
713 *
714 * Returns: 0 on success, error code otherwise
715 */
716
717static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
718 u64 *leaf_out)
719{
720 u64 leaf_no;
721 int error;
722
723 error = gfs2_dir_read_data(dip, (char *)&leaf_no,
724 index * sizeof(u64),
725 sizeof(u64), 0);
726 if (error != sizeof(u64))
727 return (error < 0) ? error : -EIO;
728
729 *leaf_out = be64_to_cpu(leaf_no);
730
731 return 0;
732}
733
734static int get_first_leaf(struct gfs2_inode *dip, u32 index,
735 struct buffer_head **bh_out)
736{
737 u64 leaf_no;
738 int error;
739
740 error = get_leaf_nr(dip, index, &leaf_no);
741 if (!error)
742 error = get_leaf(dip, leaf_no, bh_out);
743
744 return error;
745}
746
747static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
748 const struct qstr *name,
749 gfs2_dscan_t scan,
750 struct buffer_head **pbh)
751{
752 struct buffer_head *bh;
753 struct gfs2_dirent *dent;
754 struct gfs2_inode *ip = GFS2_I(inode);
755 int error;
756
757 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
758 struct gfs2_leaf *leaf;
759 unsigned hsize = 1 << ip->i_di.di_depth;
760 unsigned index;
761 u64 ln;
762 if (hsize * sizeof(u64) != ip->i_di.di_size) {
763 gfs2_consist_inode(ip);
764 return ERR_PTR(-EIO);
765 }
766
767 index = name->hash >> (32 - ip->i_di.di_depth);
768 error = get_first_leaf(ip, index, &bh);
769 if (error)
770 return ERR_PTR(error);
771 do {
772 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
773 scan, name, NULL);
774 if (dent)
775 goto got_dent;
776 leaf = (struct gfs2_leaf *)bh->b_data;
777 ln = be64_to_cpu(leaf->lf_next);
778 brelse(bh);
779 if (!ln)
780 break;
781
782 error = get_leaf(ip, ln, &bh);
783 } while(!error);
784
785 return error ? ERR_PTR(error) : NULL;
786 }
787
788
789 error = gfs2_meta_inode_buffer(ip, &bh);
790 if (error)
791 return ERR_PTR(error);
792 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
793got_dent:
794 if (unlikely(dent == NULL || IS_ERR(dent))) {
795 brelse(bh);
796 bh = NULL;
797 }
798 *pbh = bh;
799 return dent;
800}
801
802static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth)
803{
804 struct gfs2_inode *ip = GFS2_I(inode);
805 u64 bn = gfs2_alloc_meta(ip);
806 struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn);
807 struct gfs2_leaf *leaf;
808 struct gfs2_dirent *dent;
809 struct qstr name = { .name = "", .len = 0, .hash = 0 };
810 if (!bh)
811 return NULL;
812
813 gfs2_trans_add_bh(ip->i_gl, bh, 1);
814 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
815 leaf = (struct gfs2_leaf *)bh->b_data;
816 leaf->lf_depth = cpu_to_be16(depth);
817 leaf->lf_entries = 0;
818 leaf->lf_dirent_format = cpu_to_be16(GFS2_FORMAT_DE);
819 leaf->lf_next = 0;
820 memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved));
821 dent = (struct gfs2_dirent *)(leaf+1);
822 gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
823 *pbh = bh;
824 return leaf;
825}
826
827/**
828 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
829 * @dip: The GFS2 inode
830 *
831 * Returns: 0 on success, error code otherwise
832 */
833
834static int dir_make_exhash(struct inode *inode)
835{
836 struct gfs2_inode *dip = GFS2_I(inode);
837 struct gfs2_sbd *sdp = GFS2_SB(inode);
838 struct gfs2_dirent *dent;
839 struct qstr args;
840 struct buffer_head *bh, *dibh;
841 struct gfs2_leaf *leaf;
842 int y;
843 u32 x;
844 u64 *lp, bn;
845 int error;
846
847 error = gfs2_meta_inode_buffer(dip, &dibh);
848 if (error)
849 return error;
850
851 /* Turn over a new leaf */
852
853 leaf = new_leaf(inode, &bh, 0);
854 if (!leaf)
855 return -ENOSPC;
856 bn = bh->b_blocknr;
857
858 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
859 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
860
861 /* Copy dirents */
862
863 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
864 sizeof(struct gfs2_dinode));
865
866 /* Find last entry */
867
868 x = 0;
869 args.len = bh->b_size - sizeof(struct gfs2_dinode) +
870 sizeof(struct gfs2_leaf);
871 args.name = bh->b_data;
872 dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size,
873 gfs2_dirent_last, &args, NULL);
874 if (!dent) {
875 brelse(bh);
876 brelse(dibh);
877 return -EIO;
878 }
879 if (IS_ERR(dent)) {
880 brelse(bh);
881 brelse(dibh);
882 return PTR_ERR(dent);
883 }
884
885 /* Adjust the last dirent's record length
886 (Remember that dent still points to the last entry.) */
887
888 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) +
889 sizeof(struct gfs2_dinode) -
890 sizeof(struct gfs2_leaf));
891
892 brelse(bh);
893
894 /* We're done with the new leaf block, now setup the new
895 hash table. */
896
897 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
898 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
899
900 lp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
901
902 for (x = sdp->sd_hash_ptrs; x--; lp++)
903 *lp = cpu_to_be64(bn);
904
905 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
906 dip->i_di.di_blocks++;
907 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
908 dip->i_di.di_payload_format = 0;
909
910 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
911 dip->i_di.di_depth = y;
912
913 gfs2_dinode_out(&dip->i_di, dibh->b_data);
914
915 brelse(dibh);
916
917 return 0;
918}
919
920/**
921 * dir_split_leaf - Split a leaf block into two
922 * @dip: The GFS2 inode
923 * @index:
924 * @leaf_no:
925 *
926 * Returns: 0 on success, error code on failure
927 */
928
929static int dir_split_leaf(struct inode *inode, const struct qstr *name)
930{
931 struct gfs2_inode *dip = GFS2_I(inode);
932 struct buffer_head *nbh, *obh, *dibh;
933 struct gfs2_leaf *nleaf, *oleaf;
934 struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new;
935 u32 start, len, half_len, divider;
936 u64 bn, *lp, leaf_no;
937 u32 index;
938 int x, moved = 0;
939 int error;
940
941 index = name->hash >> (32 - dip->i_di.di_depth);
942 error = get_leaf_nr(dip, index, &leaf_no);
943 if (error)
944 return error;
945
946 /* Get the old leaf block */
947 error = get_leaf(dip, leaf_no, &obh);
948 if (error)
949 return error;
950
951 oleaf = (struct gfs2_leaf *)obh->b_data;
952 if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) {
953 brelse(obh);
954 return 1; /* can't split */
955 }
956
957 gfs2_trans_add_bh(dip->i_gl, obh, 1);
958
959 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
960 if (!nleaf) {
961 brelse(obh);
962 return -ENOSPC;
963 }
964 bn = nbh->b_blocknr;
965
966 /* Compute the start and len of leaf pointers in the hash table. */
967 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
968 half_len = len >> 1;
969 if (!half_len) {
970 printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index);
971 gfs2_consist_inode(dip);
972 error = -EIO;
973 goto fail_brelse;
974 }
975
976 start = (index & ~(len - 1));
977
978 /* Change the pointers.
979 Don't bother distinguishing stuffed from non-stuffed.
980 This code is complicated enough already. */
981 lp = kmalloc(half_len * sizeof(u64), GFP_NOFS | __GFP_NOFAIL);
982 /* Change the pointers */
983 for (x = 0; x < half_len; x++)
984 lp[x] = cpu_to_be64(bn);
985
986 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
987 half_len * sizeof(u64));
988 if (error != half_len * sizeof(u64)) {
989 if (error >= 0)
990 error = -EIO;
991 goto fail_lpfree;
992 }
993
994 kfree(lp);
995
996 /* Compute the divider */
997 divider = (start + half_len) << (32 - dip->i_di.di_depth);
998
999 /* Copy the entries */
1000 dirent_first(dip, obh, &dent);
1001
1002 do {
1003 next = dent;
1004 if (dirent_next(dip, obh, &next))
1005 next = NULL;
1006
1007 if (dent->de_inum.no_addr &&
1008 be32_to_cpu(dent->de_hash) < divider) {
1009 struct qstr str;
1010 str.name = (char*)(dent+1);
1011 str.len = be16_to_cpu(dent->de_name_len);
1012 str.hash = be32_to_cpu(dent->de_hash);
1013 new = gfs2_dirent_alloc(inode, nbh, &str);
1014 if (IS_ERR(new)) {
1015 error = PTR_ERR(new);
1016 break;
1017 }
1018
1019 new->de_inum = dent->de_inum; /* No endian worries */
1020 new->de_type = dent->de_type; /* No endian worries */
1021 nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1);
1022
1023 dirent_del(dip, obh, prev, dent);
1024
1025 if (!oleaf->lf_entries)
1026 gfs2_consist_inode(dip);
1027 oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1);
1028
1029 if (!prev)
1030 prev = dent;
1031
1032 moved = 1;
1033 } else {
1034 prev = dent;
1035 }
1036 dent = next;
1037 } while (dent);
1038
1039 oleaf->lf_depth = nleaf->lf_depth;
1040
1041 error = gfs2_meta_inode_buffer(dip, &dibh);
1042 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1043 dip->i_di.di_blocks++;
1044 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1045 brelse(dibh);
1046 }
1047
1048 brelse(obh);
1049 brelse(nbh);
1050
1051 return error;
1052
1053fail_lpfree:
1054 kfree(lp);
1055
1056fail_brelse:
1057 brelse(obh);
1058 brelse(nbh);
1059 return error;
1060}
1061
1062/**
1063 * dir_double_exhash - Double size of ExHash table
1064 * @dip: The GFS2 dinode
1065 *
1066 * Returns: 0 on success, error code on failure
1067 */
1068
1069static int dir_double_exhash(struct gfs2_inode *dip)
1070{
1071 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1072 struct buffer_head *dibh;
1073 u32 hsize;
1074 u64 *buf;
1075 u64 *from, *to;
1076 u64 block;
1077 int x;
1078 int error = 0;
1079
1080 hsize = 1 << dip->i_di.di_depth;
1081 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1082 gfs2_consist_inode(dip);
1083 return -EIO;
1084 }
1085
1086 /* Allocate both the "from" and "to" buffers in one big chunk */
1087
1088 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
1089
1090 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
1091 error = gfs2_dir_read_data(dip, (char *)buf,
1092 block * sdp->sd_hash_bsize,
1093 sdp->sd_hash_bsize, 1);
1094 if (error != sdp->sd_hash_bsize) {
1095 if (error >= 0)
1096 error = -EIO;
1097 goto fail;
1098 }
1099
1100 from = buf;
1101 to = (u64 *)((char *)buf + sdp->sd_hash_bsize);
1102
1103 for (x = sdp->sd_hash_ptrs; x--; from++) {
1104 *to++ = *from; /* No endianess worries */
1105 *to++ = *from;
1106 }
1107
1108 error = gfs2_dir_write_data(dip,
1109 (char *)buf + sdp->sd_hash_bsize,
1110 block * sdp->sd_sb.sb_bsize,
1111 sdp->sd_sb.sb_bsize);
1112 if (error != sdp->sd_sb.sb_bsize) {
1113 if (error >= 0)
1114 error = -EIO;
1115 goto fail;
1116 }
1117 }
1118
1119 kfree(buf);
1120
1121 error = gfs2_meta_inode_buffer(dip, &dibh);
1122 if (!gfs2_assert_withdraw(sdp, !error)) {
1123 dip->i_di.di_depth++;
1124 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1125 brelse(dibh);
1126 }
1127
1128 return error;
1129
1130fail:
1131 kfree(buf);
1132 return error;
1133}
1134
1135/**
1136 * compare_dents - compare directory entries by hash value
1137 * @a: first dent
1138 * @b: second dent
1139 *
1140 * When comparing the hash entries of @a to @b:
1141 * gt: returns 1
1142 * lt: returns -1
1143 * eq: returns 0
1144 */
1145
1146static int compare_dents(const void *a, const void *b)
1147{
1148 const struct gfs2_dirent *dent_a, *dent_b;
1149 u32 hash_a, hash_b;
1150 int ret = 0;
1151
1152 dent_a = *(const struct gfs2_dirent **)a;
1153 hash_a = be32_to_cpu(dent_a->de_hash);
1154
1155 dent_b = *(const struct gfs2_dirent **)b;
1156 hash_b = be32_to_cpu(dent_b->de_hash);
1157
1158 if (hash_a > hash_b)
1159 ret = 1;
1160 else if (hash_a < hash_b)
1161 ret = -1;
1162 else {
1163 unsigned int len_a = be16_to_cpu(dent_a->de_name_len);
1164 unsigned int len_b = be16_to_cpu(dent_b->de_name_len);
1165
1166 if (len_a > len_b)
1167 ret = 1;
1168 else if (len_a < len_b)
1169 ret = -1;
1170 else
1171 ret = memcmp(dent_a + 1, dent_b + 1, len_a);
1172 }
1173
1174 return ret;
1175}
1176
1177/**
1178 * do_filldir_main - read out directory entries
1179 * @dip: The GFS2 inode
1180 * @offset: The offset in the file to read from
1181 * @opaque: opaque data to pass to filldir
1182 * @filldir: The function to pass entries to
1183 * @darr: an array of struct gfs2_dirent pointers to read
1184 * @entries: the number of entries in darr
1185 * @copied: pointer to int that's non-zero if a entry has been copied out
1186 *
1187 * Jump through some hoops to make sure that if there are hash collsions,
1188 * they are read out at the beginning of a buffer. We want to minimize
1189 * the possibility that they will fall into different readdir buffers or
1190 * that someone will want to seek to that location.
1191 *
1192 * Returns: errno, >0 on exception from filldir
1193 */
1194
1195static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1196 void *opaque, gfs2_filldir_t filldir,
1197 const struct gfs2_dirent **darr, u32 entries,
1198 int *copied)
1199{
1200 const struct gfs2_dirent *dent, *dent_next;
1201 struct gfs2_inum inum;
1202 u64 off, off_next;
1203 unsigned int x, y;
1204 int run = 0;
1205 int error = 0;
1206
1207 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
1208
1209 dent_next = darr[0];
1210 off_next = be32_to_cpu(dent_next->de_hash);
1211 off_next = gfs2_disk_hash2offset(off_next);
1212
1213 for (x = 0, y = 1; x < entries; x++, y++) {
1214 dent = dent_next;
1215 off = off_next;
1216
1217 if (y < entries) {
1218 dent_next = darr[y];
1219 off_next = be32_to_cpu(dent_next->de_hash);
1220 off_next = gfs2_disk_hash2offset(off_next);
1221
1222 if (off < *offset)
1223 continue;
1224 *offset = off;
1225
1226 if (off_next == off) {
1227 if (*copied && !run)
1228 return 1;
1229 run = 1;
1230 } else
1231 run = 0;
1232 } else {
1233 if (off < *offset)
1234 continue;
1235 *offset = off;
1236 }
1237
1238 gfs2_inum_in(&inum, (char *)&dent->de_inum);
1239
1240 error = filldir(opaque, (const char *)(dent + 1),
1241 be16_to_cpu(dent->de_name_len),
1242 off, &inum,
1243 be16_to_cpu(dent->de_type));
1244 if (error)
1245 return 1;
1246
1247 *copied = 1;
1248 }
1249
1250 /* Increment the *offset by one, so the next time we come into the
1251 do_filldir fxn, we get the next entry instead of the last one in the
1252 current leaf */
1253
1254 (*offset)++;
1255
1256 return 0;
1257}
1258
1259static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1260 gfs2_filldir_t filldir, int *copied,
1261 unsigned *depth, u64 leaf_no)
1262{
1263 struct gfs2_inode *ip = GFS2_I(inode);
1264 struct buffer_head *bh;
1265 struct gfs2_leaf *lf;
1266 unsigned entries = 0;
1267 unsigned leaves = 0;
1268 const struct gfs2_dirent **darr, *dent;
1269 struct dirent_gather g;
1270 struct buffer_head **larr;
1271 int leaf = 0;
1272 int error, i;
1273 u64 lfn = leaf_no;
1274
1275 do {
1276 error = get_leaf(ip, lfn, &bh);
1277 if (error)
1278 goto out;
1279 lf = (struct gfs2_leaf *)bh->b_data;
1280 if (leaves == 0)
1281 *depth = be16_to_cpu(lf->lf_depth);
1282 entries += be16_to_cpu(lf->lf_entries);
1283 leaves++;
1284 lfn = be64_to_cpu(lf->lf_next);
1285 brelse(bh);
1286 } while(lfn);
1287
1288 if (!entries)
1289 return 0;
1290
1291 error = -ENOMEM;
1292 larr = vmalloc((leaves + entries) * sizeof(void *));
1293 if (!larr)
1294 goto out;
1295 darr = (const struct gfs2_dirent **)(larr + leaves);
1296 g.pdent = darr;
1297 g.offset = 0;
1298 lfn = leaf_no;
1299
1300 do {
1301 error = get_leaf(ip, lfn, &bh);
1302 if (error)
1303 goto out_kfree;
1304 lf = (struct gfs2_leaf *)bh->b_data;
1305 lfn = be64_to_cpu(lf->lf_next);
1306 if (lf->lf_entries) {
1307 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
1308 gfs2_dirent_gather, NULL, &g);
1309 error = PTR_ERR(dent);
1310 if (IS_ERR(dent)) {
1311 goto out_kfree;
1312 }
1313 error = 0;
1314 larr[leaf++] = bh;
1315 } else {
1316 brelse(bh);
1317 }
1318 } while(lfn);
1319
1320 error = do_filldir_main(ip, offset, opaque, filldir, darr,
1321 entries, copied);
1322out_kfree:
1323 for(i = 0; i < leaf; i++)
1324 brelse(larr[i]);
1325 vfree(larr);
1326out:
1327 return error;
1328}
1329
1330/**
1331 * dir_e_read - Reads the entries from a directory into a filldir buffer
1332 * @dip: dinode pointer
1333 * @offset: the hash of the last entry read shifted to the right once
1334 * @opaque: buffer for the filldir function to fill
1335 * @filldir: points to the filldir function to use
1336 *
1337 * Returns: errno
1338 */
1339
1340static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1341 gfs2_filldir_t filldir)
1342{
1343 struct gfs2_inode *dip = GFS2_I(inode);
1344 struct gfs2_sbd *sdp = GFS2_SB(inode);
1345 u32 hsize, len = 0;
1346 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1347 u32 hash, index;
1348 u64 *lp;
1349 int copied = 0;
1350 int error = 0;
1351 unsigned depth = 0;
1352
1353 hsize = 1 << dip->i_di.di_depth;
1354 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1355 gfs2_consist_inode(dip);
1356 return -EIO;
1357 }
1358
1359 hash = gfs2_dir_offset2hash(*offset);
1360 index = hash >> (32 - dip->i_di.di_depth);
1361
1362 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1363 if (!lp)
1364 return -ENOMEM;
1365
1366 while (index < hsize) {
1367 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1368 ht_offset = index - lp_offset;
1369
1370 if (ht_offset_cur != ht_offset) {
1371 error = gfs2_dir_read_data(dip, (char *)lp,
1372 ht_offset * sizeof(u64),
1373 sdp->sd_hash_bsize, 1);
1374 if (error != sdp->sd_hash_bsize) {
1375 if (error >= 0)
1376 error = -EIO;
1377 goto out;
1378 }
1379 ht_offset_cur = ht_offset;
1380 }
1381
1382 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1383 &copied, &depth,
1384 be64_to_cpu(lp[lp_offset]));
1385 if (error)
1386 break;
1387
1388 len = 1 << (dip->i_di.di_depth - depth);
1389 index = (index & ~(len - 1)) + len;
1390 }
1391
1392out:
1393 kfree(lp);
1394 if (error > 0)
1395 error = 0;
1396 return error;
1397}
1398
1399int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1400 gfs2_filldir_t filldir)
1401{
1402 struct gfs2_inode *dip = GFS2_I(inode);
1403 struct dirent_gather g;
1404 const struct gfs2_dirent **darr, *dent;
1405 struct buffer_head *dibh;
1406 int copied = 0;
1407 int error;
1408
1409 if (!dip->i_di.di_entries)
1410 return 0;
1411
1412 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1413 return dir_e_read(inode, offset, opaque, filldir);
1414
1415 if (!gfs2_is_stuffed(dip)) {
1416 gfs2_consist_inode(dip);
1417 return -EIO;
1418 }
1419
1420 error = gfs2_meta_inode_buffer(dip, &dibh);
1421 if (error)
1422 return error;
1423
1424 error = -ENOMEM;
1425 darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *),
1426 GFP_KERNEL);
1427 if (darr) {
1428 g.pdent = darr;
1429 g.offset = 0;
1430 dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
1431 gfs2_dirent_gather, NULL, &g);
1432 if (IS_ERR(dent)) {
1433 error = PTR_ERR(dent);
1434 goto out;
1435 }
1436 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1437 dip->i_di.di_entries, &copied);
1438out:
1439 kfree(darr);
1440 }
1441
1442 if (error > 0)
1443 error = 0;
1444
1445 brelse(dibh);
1446
1447 return error;
1448}
1449
1450/**
1451 * gfs2_dir_search - Search a directory
1452 * @dip: The GFS2 inode
1453 * @filename:
1454 * @inode:
1455 *
1456 * This routine searches a directory for a file or another directory.
1457 * Assumes a glock is held on dip.
1458 *
1459 * Returns: errno
1460 */
1461
1462int gfs2_dir_search(struct inode *dir, const struct qstr *name,
1463 struct gfs2_inum *inum, unsigned int *type)
1464{
1465 struct buffer_head *bh;
1466 struct gfs2_dirent *dent;
1467
1468 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1469 if (dent) {
1470 if (IS_ERR(dent))
1471 return PTR_ERR(dent);
1472 if (inum)
1473 gfs2_inum_in(inum, (char *)&dent->de_inum);
1474 if (type)
1475 *type = be16_to_cpu(dent->de_type);
1476 brelse(bh);
1477 return 0;
1478 }
1479 return -ENOENT;
1480}
1481
1482static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1483{
1484 struct buffer_head *bh, *obh;
1485 struct gfs2_inode *ip = GFS2_I(inode);
1486 struct gfs2_leaf *leaf, *oleaf;
1487 int error;
1488 u32 index;
1489 u64 bn;
1490
1491 index = name->hash >> (32 - ip->i_di.di_depth);
1492 error = get_first_leaf(ip, index, &obh);
1493 if (error)
1494 return error;
1495 do {
1496 oleaf = (struct gfs2_leaf *)obh->b_data;
1497 bn = be64_to_cpu(oleaf->lf_next);
1498 if (!bn)
1499 break;
1500 brelse(obh);
1501 error = get_leaf(ip, bn, &obh);
1502 if (error)
1503 return error;
1504 } while(1);
1505
1506 gfs2_trans_add_bh(ip->i_gl, obh, 1);
1507
1508 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1509 if (!leaf) {
1510 brelse(obh);
1511 return -ENOSPC;
1512 }
1513 oleaf->lf_next = cpu_to_be64(bh->b_blocknr);
1514 brelse(bh);
1515 brelse(obh);
1516
1517 error = gfs2_meta_inode_buffer(ip, &bh);
1518 if (error)
1519 return error;
1520 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1521 ip->i_di.di_blocks++;
1522 gfs2_dinode_out(&ip->i_di, bh->b_data);
1523 brelse(bh);
1524 return 0;
1525}
1526
1527/**
1528 * gfs2_dir_add - Add new filename into directory
1529 * @dip: The GFS2 inode
1530 * @filename: The new name
1531 * @inode: The inode number of the entry
1532 * @type: The type of the entry
1533 *
1534 * Returns: 0 on success, error code on failure
1535 */
1536
1537int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1538 const struct gfs2_inum *inum, unsigned type)
1539{
1540 struct gfs2_inode *ip = GFS2_I(inode);
1541 struct buffer_head *bh;
1542 struct gfs2_dirent *dent;
1543 struct gfs2_leaf *leaf;
1544 int error;
1545
1546 while(1) {
1547 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space,
1548 &bh);
1549 if (dent) {
1550 if (IS_ERR(dent))
1551 return PTR_ERR(dent);
1552 dent = gfs2_init_dirent(inode, dent, name, bh);
1553 gfs2_inum_out(inum, (char *)&dent->de_inum);
1554 dent->de_type = cpu_to_be16(type);
1555 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
1556 leaf = (struct gfs2_leaf *)bh->b_data;
1557 leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1);
1558 }
1559 brelse(bh);
1560 error = gfs2_meta_inode_buffer(ip, &bh);
1561 if (error)
1562 break;
1563 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1564 ip->i_di.di_entries++;
1565 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
1566 gfs2_dinode_out(&ip->i_di, bh->b_data);
1567 brelse(bh);
1568 error = 0;
1569 break;
1570 }
1571 if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
1572 error = dir_make_exhash(inode);
1573 if (error)
1574 break;
1575 continue;
1576 }
1577 error = dir_split_leaf(inode, name);
1578 if (error == 0)
1579 continue;
1580 if (error < 0)
1581 break;
1582 if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
1583 error = dir_double_exhash(ip);
1584 if (error)
1585 break;
1586 error = dir_split_leaf(inode, name);
1587 if (error < 0)
1588 break;
1589 if (error == 0)
1590 continue;
1591 }
1592 error = dir_new_leaf(inode, name);
1593 if (!error)
1594 continue;
1595 error = -ENOSPC;
1596 break;
1597 }
1598 return error;
1599}
1600
1601
1602/**
1603 * gfs2_dir_del - Delete a directory entry
1604 * @dip: The GFS2 inode
1605 * @filename: The filename
1606 *
1607 * Returns: 0 on success, error code on failure
1608 */
1609
1610int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1611{
1612 struct gfs2_dirent *dent, *prev = NULL;
1613 struct buffer_head *bh;
1614 int error;
1615
1616 /* Returns _either_ the entry (if its first in block) or the
1617 previous entry otherwise */
1618 dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh);
1619 if (!dent) {
1620 gfs2_consist_inode(dip);
1621 return -EIO;
1622 }
1623 if (IS_ERR(dent)) {
1624 gfs2_consist_inode(dip);
1625 return PTR_ERR(dent);
1626 }
1627 /* If not first in block, adjust pointers accordingly */
1628 if (gfs2_dirent_find(dent, name, NULL) == 0) {
1629 prev = dent;
1630 dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
1631 }
1632
1633 dirent_del(dip, bh, prev, dent);
1634 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
1635 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
1636 u16 entries = be16_to_cpu(leaf->lf_entries);
1637 if (!entries)
1638 gfs2_consist_inode(dip);
1639 leaf->lf_entries = cpu_to_be16(--entries);
1640 }
1641 brelse(bh);
1642
1643 error = gfs2_meta_inode_buffer(dip, &bh);
1644 if (error)
1645 return error;
1646
1647 if (!dip->i_di.di_entries)
1648 gfs2_consist_inode(dip);
1649 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1650 dip->i_di.di_entries--;
1651 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1652 gfs2_dinode_out(&dip->i_di, bh->b_data);
1653 brelse(bh);
1654 mark_inode_dirty(&dip->i_inode);
1655
1656 return error;
1657}
1658
1659/**
1660 * gfs2_dir_mvino - Change inode number of directory entry
1661 * @dip: The GFS2 inode
1662 * @filename:
1663 * @new_inode:
1664 *
1665 * This routine changes the inode number of a directory entry. It's used
1666 * by rename to change ".." when a directory is moved.
1667 * Assumes a glock is held on dvp.
1668 *
1669 * Returns: errno
1670 */
1671
1672int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1673 struct gfs2_inum *inum, unsigned int new_type)
1674{
1675 struct buffer_head *bh;
1676 struct gfs2_dirent *dent;
1677 int error;
1678
1679 dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh);
1680 if (!dent) {
1681 gfs2_consist_inode(dip);
1682 return -EIO;
1683 }
1684 if (IS_ERR(dent))
1685 return PTR_ERR(dent);
1686
1687 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1688 gfs2_inum_out(inum, (char *)&dent->de_inum);
1689 dent->de_type = cpu_to_be16(new_type);
1690
1691 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
1692 brelse(bh);
1693 error = gfs2_meta_inode_buffer(dip, &bh);
1694 if (error)
1695 return error;
1696 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1697 }
1698
1699 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1700 gfs2_dinode_out(&dip->i_di, bh->b_data);
1701 brelse(bh);
1702 return 0;
1703}
1704
1705/**
1706 * foreach_leaf - call a function for each leaf in a directory
1707 * @dip: the directory
1708 * @lc: the function to call for each each
1709 * @data: private data to pass to it
1710 *
1711 * Returns: errno
1712 */
1713
1714static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1715{
1716 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1717 struct buffer_head *bh;
1718 struct gfs2_leaf *leaf;
1719 u32 hsize, len;
1720 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1721 u32 index = 0;
1722 u64 *lp;
1723 u64 leaf_no;
1724 int error = 0;
1725
1726 hsize = 1 << dip->i_di.di_depth;
1727 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1728 gfs2_consist_inode(dip);
1729 return -EIO;
1730 }
1731
1732 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1733 if (!lp)
1734 return -ENOMEM;
1735
1736 while (index < hsize) {
1737 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1738 ht_offset = index - lp_offset;
1739
1740 if (ht_offset_cur != ht_offset) {
1741 error = gfs2_dir_read_data(dip, (char *)lp,
1742 ht_offset * sizeof(u64),
1743 sdp->sd_hash_bsize, 1);
1744 if (error != sdp->sd_hash_bsize) {
1745 if (error >= 0)
1746 error = -EIO;
1747 goto out;
1748 }
1749 ht_offset_cur = ht_offset;
1750 }
1751
1752 leaf_no = be64_to_cpu(lp[lp_offset]);
1753 if (leaf_no) {
1754 error = get_leaf(dip, leaf_no, &bh);
1755 if (error)
1756 goto out;
1757 leaf = (struct gfs2_leaf *)bh->b_data;
1758 len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth));
1759 brelse(bh);
1760
1761 error = lc(dip, index, len, leaf_no, data);
1762 if (error)
1763 goto out;
1764
1765 index = (index & ~(len - 1)) + len;
1766 } else
1767 index++;
1768 }
1769
1770 if (index != hsize) {
1771 gfs2_consist_inode(dip);
1772 error = -EIO;
1773 }
1774
1775out:
1776 kfree(lp);
1777
1778 return error;
1779}
1780
1781/**
1782 * leaf_dealloc - Deallocate a directory leaf
1783 * @dip: the directory
1784 * @index: the hash table offset in the directory
1785 * @len: the number of pointers to this leaf
1786 * @leaf_no: the leaf number
1787 * @data: not used
1788 *
1789 * Returns: errno
1790 */
1791
1792static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1793 u64 leaf_no, void *data)
1794{
1795 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1796 struct gfs2_leaf *tmp_leaf;
1797 struct gfs2_rgrp_list rlist;
1798 struct buffer_head *bh, *dibh;
1799 u64 blk, nblk;
1800 unsigned int rg_blocks = 0, l_blocks = 0;
1801 char *ht;
1802 unsigned int x, size = len * sizeof(u64);
1803 int error;
1804
1805 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1806
1807 ht = kzalloc(size, GFP_KERNEL);
1808 if (!ht)
1809 return -ENOMEM;
1810
1811 gfs2_alloc_get(dip);
1812
1813 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1814 if (error)
1815 goto out;
1816
1817 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
1818 if (error)
1819 goto out_qs;
1820
1821 /* Count the number of leaves */
1822
1823 for (blk = leaf_no; blk; blk = nblk) {
1824 error = get_leaf(dip, blk, &bh);
1825 if (error)
1826 goto out_rlist;
1827 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1828 nblk = be64_to_cpu(tmp_leaf->lf_next);
1829 brelse(bh);
1830
1831 gfs2_rlist_add(sdp, &rlist, blk);
1832 l_blocks++;
1833 }
1834
1835 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1836
1837 for (x = 0; x < rlist.rl_rgrps; x++) {
1838 struct gfs2_rgrpd *rgd;
1839 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1840 rg_blocks += rgd->rd_ri.ri_length;
1841 }
1842
1843 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1844 if (error)
1845 goto out_rlist;
1846
1847 error = gfs2_trans_begin(sdp,
1848 rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
1849 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
1850 if (error)
1851 goto out_rg_gunlock;
1852
1853 for (blk = leaf_no; blk; blk = nblk) {
1854 error = get_leaf(dip, blk, &bh);
1855 if (error)
1856 goto out_end_trans;
1857 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1858 nblk = be64_to_cpu(tmp_leaf->lf_next);
1859 brelse(bh);
1860
1861 gfs2_free_meta(dip, blk, 1);
1862
1863 if (!dip->i_di.di_blocks)
1864 gfs2_consist_inode(dip);
1865 dip->i_di.di_blocks--;
1866 }
1867
1868 error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
1869 if (error != size) {
1870 if (error >= 0)
1871 error = -EIO;
1872 goto out_end_trans;
1873 }
1874
1875 error = gfs2_meta_inode_buffer(dip, &dibh);
1876 if (error)
1877 goto out_end_trans;
1878
1879 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1880 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1881 brelse(dibh);
1882
1883out_end_trans:
1884 gfs2_trans_end(sdp);
1885out_rg_gunlock:
1886 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1887out_rlist:
1888 gfs2_rlist_free(&rlist);
1889 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
1890out_qs:
1891 gfs2_quota_unhold(dip);
1892out:
1893 gfs2_alloc_put(dip);
1894 kfree(ht);
1895 return error;
1896}
1897
1898/**
1899 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
1900 * @dip: the directory
1901 *
1902 * Dealloc all on-disk directory leaves to FREEMETA state
1903 * Change on-disk inode type to "regular file"
1904 *
1905 * Returns: errno
1906 */
1907
1908int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1909{
1910 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1911 struct buffer_head *bh;
1912 int error;
1913
1914 /* Dealloc on-disk leaves to FREEMETA state */
1915 error = foreach_leaf(dip, leaf_dealloc, NULL);
1916 if (error)
1917 return error;
1918
1919 /* Make this a regular file in case we crash.
1920 (We don't want to free these blocks a second time.) */
1921
1922 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1923 if (error)
1924 return error;
1925
1926 error = gfs2_meta_inode_buffer(dip, &bh);
1927 if (!error) {
1928 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1929 ((struct gfs2_dinode *)bh->b_data)->di_mode =
1930 cpu_to_be32(S_IFREG);
1931 brelse(bh);
1932 }
1933
1934 gfs2_trans_end(sdp);
1935
1936 return error;
1937}
1938
1939/**
1940 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
1941 * @ip: the file being written to
1942 * @filname: the filename that's going to be added
1943 *
1944 * Returns: 1 if alloc required, 0 if not, -ve on error
1945 */
1946
1947int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name)
1948{
1949 struct gfs2_dirent *dent;
1950 struct buffer_head *bh;
1951
1952 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh);
1953 if (!dent) {
1954 return 1;
1955 }
1956 if (IS_ERR(dent))
1957 return PTR_ERR(dent);
1958 brelse(bh);
1959 return 0;
1960}
1961
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
new file mode 100644
index 000000000000..371233419b07
--- /dev/null
+++ b/fs/gfs2/dir.h
@@ -0,0 +1,79 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __DIR_DOT_H__
11#define __DIR_DOT_H__
12
13#include <linux/dcache.h>
14
15struct inode;
16struct gfs2_inode;
17struct gfs2_inum;
18
19/**
20 * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
21 * @opaque: opaque data used by the function
22 * @name: the name of the directory entry
23 * @length: the length of the name
24 * @offset: the entry's offset in the directory
25 * @inum: the inode number the entry points to
26 * @type: the type of inode the entry points to
27 *
28 * Returns: 0 on success, 1 if buffer full
29 */
30
31typedef int (*gfs2_filldir_t) (void *opaque,
32 const char *name, unsigned int length,
33 u64 offset,
34 struct gfs2_inum *inum, unsigned int type);
35
36int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
37 struct gfs2_inum *inum, unsigned int *type);
38int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
39 const struct gfs2_inum *inum, unsigned int type);
40int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
41int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque,
42 gfs2_filldir_t filldir);
43int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
44 struct gfs2_inum *new_inum, unsigned int new_type);
45
46int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
47
48int gfs2_diradd_alloc_required(struct inode *dir,
49 const struct qstr *filename);
50int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
51 struct buffer_head **bhp);
52
53static inline u32 gfs2_disk_hash(const char *data, int len)
54{
55 return crc32_le((u32)~0, data, len) ^ (u32)~0;
56}
57
58
59static inline void gfs2_str2qstr(struct qstr *name, const char *fname)
60{
61 name->name = fname;
62 name->len = strlen(fname);
63 name->hash = gfs2_disk_hash(name->name, name->len);
64}
65
66/* N.B. This probably ought to take inum & type as args as well */
67static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct gfs2_dirent *dent)
68{
69 dent->de_inum.no_addr = cpu_to_be64(0);
70 dent->de_inum.no_formal_ino = cpu_to_be64(0);
71 dent->de_hash = cpu_to_be32(name->hash);
72 dent->de_rec_len = cpu_to_be16(reclen);
73 dent->de_name_len = cpu_to_be16(name->len);
74 dent->de_type = cpu_to_be16(0);
75 memset(dent->__pad, 0, sizeof(dent->__pad));
76 memcpy(dent + 1, name->name, name->len);
77}
78
79#endif /* __DIR_DOT_H__ */
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
new file mode 100644
index 000000000000..92c54e9b0dc3
--- /dev/null
+++ b/fs/gfs2/eaops.c
@@ -0,0 +1,230 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "util.h"
26
27/**
28 * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
29 * @namep: ea name, possibly with type appended
30 *
31 * Returns: GFS2_EATYPE_XXX
32 */
33
34unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name)
35{
36 unsigned int type;
37
38 if (strncmp(name, "system.", 7) == 0) {
39 type = GFS2_EATYPE_SYS;
40 if (truncated_name)
41 *truncated_name = name + sizeof("system.") - 1;
42 } else if (strncmp(name, "user.", 5) == 0) {
43 type = GFS2_EATYPE_USR;
44 if (truncated_name)
45 *truncated_name = name + sizeof("user.") - 1;
46 } else if (strncmp(name, "security.", 9) == 0) {
47 type = GFS2_EATYPE_SECURITY;
48 if (truncated_name)
49 *truncated_name = name + sizeof("security.") - 1;
50 } else {
51 type = GFS2_EATYPE_UNUSED;
52 if (truncated_name)
53 *truncated_name = NULL;
54 }
55
56 return type;
57}
58
59static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
60{
61 struct inode *inode = &ip->i_inode;
62 int error = permission(inode, MAY_READ, NULL);
63 if (error)
64 return error;
65
66 return gfs2_ea_get_i(ip, er);
67}
68
69static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
70{
71 struct inode *inode = &ip->i_inode;
72
73 if (S_ISREG(inode->i_mode) ||
74 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
75 int error = permission(inode, MAY_WRITE, NULL);
76 if (error)
77 return error;
78 } else
79 return -EPERM;
80
81 return gfs2_ea_set_i(ip, er);
82}
83
84static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
85{
86 struct inode *inode = &ip->i_inode;
87
88 if (S_ISREG(inode->i_mode) ||
89 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
90 int error = permission(inode, MAY_WRITE, NULL);
91 if (error)
92 return error;
93 } else
94 return -EPERM;
95
96 return gfs2_ea_remove_i(ip, er);
97}
98
99static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
100{
101 if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
102 !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
103 !capable(CAP_SYS_ADMIN))
104 return -EPERM;
105
106 if (GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl == 0 &&
107 (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
108 GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
109 return -EOPNOTSUPP;
110
111
112
113 return gfs2_ea_get_i(ip, er);
114}
115
116static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
117{
118 int remove = 0;
119 int error;
120
121 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
122 if (!(er->er_flags & GFS2_ERF_MODE)) {
123 er->er_mode = ip->i_di.di_mode;
124 er->er_flags |= GFS2_ERF_MODE;
125 }
126 error = gfs2_acl_validate_set(ip, 1, er,
127 &remove, &er->er_mode);
128 if (error)
129 return error;
130 error = gfs2_ea_set_i(ip, er);
131 if (error)
132 return error;
133 if (remove)
134 gfs2_ea_remove_i(ip, er);
135 return 0;
136
137 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
138 error = gfs2_acl_validate_set(ip, 0, er,
139 &remove, NULL);
140 if (error)
141 return error;
142 if (!remove)
143 error = gfs2_ea_set_i(ip, er);
144 else {
145 error = gfs2_ea_remove_i(ip, er);
146 if (error == -ENODATA)
147 error = 0;
148 }
149 return error;
150 }
151
152 return -EPERM;
153}
154
155static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
156{
157 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
158 int error = gfs2_acl_validate_remove(ip, 1);
159 if (error)
160 return error;
161
162 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
163 int error = gfs2_acl_validate_remove(ip, 0);
164 if (error)
165 return error;
166
167 } else
168 return -EPERM;
169
170 return gfs2_ea_remove_i(ip, er);
171}
172
173static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
174{
175 struct inode *inode = &ip->i_inode;
176 int error = permission(inode, MAY_READ, NULL);
177 if (error)
178 return error;
179
180 return gfs2_ea_get_i(ip, er);
181}
182
183static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
184{
185 struct inode *inode = &ip->i_inode;
186 int error = permission(inode, MAY_WRITE, NULL);
187 if (error)
188 return error;
189
190 return gfs2_ea_set_i(ip, er);
191}
192
193static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
194{
195 struct inode *inode = &ip->i_inode;
196 int error = permission(inode, MAY_WRITE, NULL);
197 if (error)
198 return error;
199
200 return gfs2_ea_remove_i(ip, er);
201}
202
203static struct gfs2_eattr_operations gfs2_user_eaops = {
204 .eo_get = user_eo_get,
205 .eo_set = user_eo_set,
206 .eo_remove = user_eo_remove,
207 .eo_name = "user",
208};
209
210struct gfs2_eattr_operations gfs2_system_eaops = {
211 .eo_get = system_eo_get,
212 .eo_set = system_eo_set,
213 .eo_remove = system_eo_remove,
214 .eo_name = "system",
215};
216
217static struct gfs2_eattr_operations gfs2_security_eaops = {
218 .eo_get = security_eo_get,
219 .eo_set = security_eo_set,
220 .eo_remove = security_eo_remove,
221 .eo_name = "security",
222};
223
224struct gfs2_eattr_operations *gfs2_ea_ops[] = {
225 NULL,
226 &gfs2_user_eaops,
227 &gfs2_system_eaops,
228 &gfs2_security_eaops,
229};
230
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
new file mode 100644
index 000000000000..508b4f7a2449
--- /dev/null
+++ b/fs/gfs2/eaops.h
@@ -0,0 +1,30 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __EAOPS_DOT_H__
11#define __EAOPS_DOT_H__
12
13struct gfs2_ea_request;
14struct gfs2_inode;
15
16struct gfs2_eattr_operations {
17 int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
18 int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
19 int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
20 char *eo_name;
21};
22
23unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
24
25extern struct gfs2_eattr_operations gfs2_system_eaops;
26
27extern struct gfs2_eattr_operations *gfs2_ea_ops[];
28
29#endif /* __EAOPS_DOT_H__ */
30
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
new file mode 100644
index 000000000000..a65a4ccfd4dd
--- /dev/null
+++ b/fs/gfs2/eattr.c
@@ -0,0 +1,1501 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "glock.h"
26#include "inode.h"
27#include "meta_io.h"
28#include "quota.h"
29#include "rgrp.h"
30#include "trans.h"
31#include "util.h"
32
33/**
34 * ea_calc_size - returns the acutal number of bytes the request will take up
35 * (not counting any unstuffed data blocks)
36 * @sdp:
37 * @er:
38 * @size:
39 *
40 * Returns: 1 if the EA should be stuffed
41 */
42
43static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er,
44 unsigned int *size)
45{
46 *size = GFS2_EAREQ_SIZE_STUFFED(er);
47 if (*size <= sdp->sd_jbsize)
48 return 1;
49
50 *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er);
51
52 return 0;
53}
54
55static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er)
56{
57 unsigned int size;
58
59 if (er->er_data_len > GFS2_EA_MAX_DATA_LEN)
60 return -ERANGE;
61
62 ea_calc_size(sdp, er, &size);
63
64 /* This can only happen with 512 byte blocks */
65 if (size > sdp->sd_jbsize)
66 return -ERANGE;
67
68 return 0;
69}
70
71typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
72 struct gfs2_ea_header *ea,
73 struct gfs2_ea_header *prev, void *private);
74
75static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
76 ea_call_t ea_call, void *data)
77{
78 struct gfs2_ea_header *ea, *prev = NULL;
79 int error = 0;
80
81 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_EA))
82 return -EIO;
83
84 for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
85 if (!GFS2_EA_REC_LEN(ea))
86 goto fail;
87 if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <=
88 bh->b_data + bh->b_size))
89 goto fail;
90 if (!GFS2_EATYPE_VALID(ea->ea_type))
91 goto fail;
92
93 error = ea_call(ip, bh, ea, prev, data);
94 if (error)
95 return error;
96
97 if (GFS2_EA_IS_LAST(ea)) {
98 if ((char *)GFS2_EA2NEXT(ea) !=
99 bh->b_data + bh->b_size)
100 goto fail;
101 break;
102 }
103 }
104
105 return error;
106
107fail:
108 gfs2_consist_inode(ip);
109 return -EIO;
110}
111
112static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
113{
114 struct buffer_head *bh, *eabh;
115 u64 *eablk, *end;
116 int error;
117
118 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh);
119 if (error)
120 return error;
121
122 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
123 error = ea_foreach_i(ip, bh, ea_call, data);
124 goto out;
125 }
126
127 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_IN)) {
128 error = -EIO;
129 goto out;
130 }
131
132 eablk = (u64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
133 end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs;
134
135 for (; eablk < end; eablk++) {
136 u64 bn;
137
138 if (!*eablk)
139 break;
140 bn = be64_to_cpu(*eablk);
141
142 error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, &eabh);
143 if (error)
144 break;
145 error = ea_foreach_i(ip, eabh, ea_call, data);
146 brelse(eabh);
147 if (error)
148 break;
149 }
150out:
151 brelse(bh);
152 return error;
153}
154
155struct ea_find {
156 struct gfs2_ea_request *ef_er;
157 struct gfs2_ea_location *ef_el;
158};
159
160static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
161 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
162 void *private)
163{
164 struct ea_find *ef = private;
165 struct gfs2_ea_request *er = ef->ef_er;
166
167 if (ea->ea_type == GFS2_EATYPE_UNUSED)
168 return 0;
169
170 if (ea->ea_type == er->er_type) {
171 if (ea->ea_name_len == er->er_name_len &&
172 !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
173 struct gfs2_ea_location *el = ef->ef_el;
174 get_bh(bh);
175 el->el_bh = bh;
176 el->el_ea = ea;
177 el->el_prev = prev;
178 return 1;
179 }
180 }
181
182 return 0;
183}
184
185int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er,
186 struct gfs2_ea_location *el)
187{
188 struct ea_find ef;
189 int error;
190
191 ef.ef_er = er;
192 ef.ef_el = el;
193
194 memset(el, 0, sizeof(struct gfs2_ea_location));
195
196 error = ea_foreach(ip, ea_find_i, &ef);
197 if (error > 0)
198 return 0;
199
200 return error;
201}
202
203/**
204 * ea_dealloc_unstuffed -
205 * @ip:
206 * @bh:
207 * @ea:
208 * @prev:
209 * @private:
210 *
211 * Take advantage of the fact that all unstuffed blocks are
212 * allocated from the same RG. But watch, this may not always
213 * be true.
214 *
215 * Returns: errno
216 */
217
218static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
219 struct gfs2_ea_header *ea,
220 struct gfs2_ea_header *prev, void *private)
221{
222 int *leave = private;
223 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
224 struct gfs2_rgrpd *rgd;
225 struct gfs2_holder rg_gh;
226 struct buffer_head *dibh;
227 u64 *dataptrs, bn = 0;
228 u64 bstart = 0;
229 unsigned int blen = 0;
230 unsigned int blks = 0;
231 unsigned int x;
232 int error;
233
234 if (GFS2_EA_IS_STUFFED(ea))
235 return 0;
236
237 dataptrs = GFS2_EA2DATAPTRS(ea);
238 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
239 if (*dataptrs) {
240 blks++;
241 bn = be64_to_cpu(*dataptrs);
242 }
243 }
244 if (!blks)
245 return 0;
246
247 rgd = gfs2_blk2rgrpd(sdp, bn);
248 if (!rgd) {
249 gfs2_consist_inode(ip);
250 return -EIO;
251 }
252
253 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
254 if (error)
255 return error;
256
257 error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE +
258 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
259 if (error)
260 goto out_gunlock;
261
262 gfs2_trans_add_bh(ip->i_gl, bh, 1);
263
264 dataptrs = GFS2_EA2DATAPTRS(ea);
265 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
266 if (!*dataptrs)
267 break;
268 bn = be64_to_cpu(*dataptrs);
269
270 if (bstart + blen == bn)
271 blen++;
272 else {
273 if (bstart)
274 gfs2_free_meta(ip, bstart, blen);
275 bstart = bn;
276 blen = 1;
277 }
278
279 *dataptrs = 0;
280 if (!ip->i_di.di_blocks)
281 gfs2_consist_inode(ip);
282 ip->i_di.di_blocks--;
283 }
284 if (bstart)
285 gfs2_free_meta(ip, bstart, blen);
286
287 if (prev && !leave) {
288 u32 len;
289
290 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
291 prev->ea_rec_len = cpu_to_be32(len);
292
293 if (GFS2_EA_IS_LAST(ea))
294 prev->ea_flags |= GFS2_EAFLAG_LAST;
295 } else {
296 ea->ea_type = GFS2_EATYPE_UNUSED;
297 ea->ea_num_ptrs = 0;
298 }
299
300 error = gfs2_meta_inode_buffer(ip, &dibh);
301 if (!error) {
302 ip->i_di.di_ctime = get_seconds();
303 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
304 gfs2_dinode_out(&ip->i_di, dibh->b_data);
305 brelse(dibh);
306 }
307
308 gfs2_trans_end(sdp);
309
310out_gunlock:
311 gfs2_glock_dq_uninit(&rg_gh);
312 return error;
313}
314
315static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
316 struct gfs2_ea_header *ea,
317 struct gfs2_ea_header *prev, int leave)
318{
319 struct gfs2_alloc *al;
320 int error;
321
322 al = gfs2_alloc_get(ip);
323
324 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
325 if (error)
326 goto out_alloc;
327
328 error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh);
329 if (error)
330 goto out_quota;
331
332 error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL);
333
334 gfs2_glock_dq_uninit(&al->al_ri_gh);
335
336out_quota:
337 gfs2_quota_unhold(ip);
338out_alloc:
339 gfs2_alloc_put(ip);
340 return error;
341}
342
343struct ea_list {
344 struct gfs2_ea_request *ei_er;
345 unsigned int ei_size;
346};
347
348static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
349 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
350 void *private)
351{
352 struct ea_list *ei = private;
353 struct gfs2_ea_request *er = ei->ei_er;
354 unsigned int ea_size = gfs2_ea_strlen(ea);
355
356 if (ea->ea_type == GFS2_EATYPE_UNUSED)
357 return 0;
358
359 if (er->er_data_len) {
360 char *prefix = NULL;
361 unsigned int l = 0;
362 char c = 0;
363
364 if (ei->ei_size + ea_size > er->er_data_len)
365 return -ERANGE;
366
367 switch (ea->ea_type) {
368 case GFS2_EATYPE_USR:
369 prefix = "user.";
370 l = 5;
371 break;
372 case GFS2_EATYPE_SYS:
373 prefix = "system.";
374 l = 7;
375 break;
376 case GFS2_EATYPE_SECURITY:
377 prefix = "security.";
378 l = 9;
379 break;
380 }
381
382 BUG_ON(l == 0);
383
384 memcpy(er->er_data + ei->ei_size, prefix, l);
385 memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea),
386 ea->ea_name_len);
387 memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1);
388 }
389
390 ei->ei_size += ea_size;
391
392 return 0;
393}
394
395/**
396 * gfs2_ea_list -
397 * @ip:
398 * @er:
399 *
400 * Returns: actual size of data on success, -errno on error
401 */
402
403int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
404{
405 struct gfs2_holder i_gh;
406 int error;
407
408 if (!er->er_data || !er->er_data_len) {
409 er->er_data = NULL;
410 er->er_data_len = 0;
411 }
412
413 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
414 if (error)
415 return error;
416
417 if (ip->i_di.di_eattr) {
418 struct ea_list ei = { .ei_er = er, .ei_size = 0 };
419
420 error = ea_foreach(ip, ea_list_i, &ei);
421 if (!error)
422 error = ei.ei_size;
423 }
424
425 gfs2_glock_dq_uninit(&i_gh);
426
427 return error;
428}
429
430/**
431 * ea_get_unstuffed - actually copies the unstuffed data into the
432 * request buffer
433 * @ip: The GFS2 inode
434 * @ea: The extended attribute header structure
435 * @data: The data to be copied
436 *
437 * Returns: errno
438 */
439
440static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
441 char *data)
442{
443 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
444 struct buffer_head **bh;
445 unsigned int amount = GFS2_EA_DATA_LEN(ea);
446 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
447 u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
448 unsigned int x;
449 int error = 0;
450
451 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
452 if (!bh)
453 return -ENOMEM;
454
455 for (x = 0; x < nptrs; x++) {
456 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
457 bh + x);
458 if (error) {
459 while (x--)
460 brelse(bh[x]);
461 goto out;
462 }
463 dataptrs++;
464 }
465
466 for (x = 0; x < nptrs; x++) {
467 error = gfs2_meta_wait(sdp, bh[x]);
468 if (error) {
469 for (; x < nptrs; x++)
470 brelse(bh[x]);
471 goto out;
472 }
473 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
474 for (; x < nptrs; x++)
475 brelse(bh[x]);
476 error = -EIO;
477 goto out;
478 }
479
480 memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header),
481 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
482
483 amount -= sdp->sd_jbsize;
484 data += sdp->sd_jbsize;
485
486 brelse(bh[x]);
487 }
488
489out:
490 kfree(bh);
491 return error;
492}
493
494int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
495 char *data)
496{
497 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
498 memcpy(data, GFS2_EA2DATA(el->el_ea), GFS2_EA_DATA_LEN(el->el_ea));
499 return 0;
500 } else
501 return ea_get_unstuffed(ip, el->el_ea, data);
502}
503
504/**
505 * gfs2_ea_get_i -
506 * @ip: The GFS2 inode
507 * @er: The request structure
508 *
509 * Returns: actual size of data on success, -errno on error
510 */
511
512int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
513{
514 struct gfs2_ea_location el;
515 int error;
516
517 if (!ip->i_di.di_eattr)
518 return -ENODATA;
519
520 error = gfs2_ea_find(ip, er, &el);
521 if (error)
522 return error;
523 if (!el.el_ea)
524 return -ENODATA;
525
526 if (er->er_data_len) {
527 if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len)
528 error = -ERANGE;
529 else
530 error = gfs2_ea_get_copy(ip, &el, er->er_data);
531 }
532 if (!error)
533 error = GFS2_EA_DATA_LEN(el.el_ea);
534
535 brelse(el.el_bh);
536
537 return error;
538}
539
540/**
541 * gfs2_ea_get -
542 * @ip: The GFS2 inode
543 * @er: The request structure
544 *
545 * Returns: actual size of data on success, -errno on error
546 */
547
548int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
549{
550 struct gfs2_holder i_gh;
551 int error;
552
553 if (!er->er_name_len ||
554 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
555 return -EINVAL;
556 if (!er->er_data || !er->er_data_len) {
557 er->er_data = NULL;
558 er->er_data_len = 0;
559 }
560
561 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
562 if (error)
563 return error;
564
565 error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
566
567 gfs2_glock_dq_uninit(&i_gh);
568
569 return error;
570}
571
572/**
573 * ea_alloc_blk - allocates a new block for extended attributes.
574 * @ip: A pointer to the inode that's getting extended attributes
575 * @bhp: Pointer to pointer to a struct buffer_head
576 *
577 * Returns: errno
578 */
579
580static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
581{
582 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
583 struct gfs2_ea_header *ea;
584 u64 block;
585
586 block = gfs2_alloc_meta(ip);
587
588 *bhp = gfs2_meta_new(ip->i_gl, block);
589 gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
590 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
591 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
592
593 ea = GFS2_EA_BH2FIRST(*bhp);
594 ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize);
595 ea->ea_type = GFS2_EATYPE_UNUSED;
596 ea->ea_flags = GFS2_EAFLAG_LAST;
597 ea->ea_num_ptrs = 0;
598
599 ip->i_di.di_blocks++;
600
601 return 0;
602}
603
604/**
605 * ea_write - writes the request info to an ea, creating new blocks if
606 * necessary
607 * @ip: inode that is being modified
608 * @ea: the location of the new ea in a block
609 * @er: the write request
610 *
611 * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags
612 *
613 * returns : errno
614 */
615
616static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
617 struct gfs2_ea_request *er)
618{
619 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
620
621 ea->ea_data_len = cpu_to_be32(er->er_data_len);
622 ea->ea_name_len = er->er_name_len;
623 ea->ea_type = er->er_type;
624 ea->__pad = 0;
625
626 memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len);
627
628 if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
629 ea->ea_num_ptrs = 0;
630 memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
631 } else {
632 u64 *dataptr = GFS2_EA2DATAPTRS(ea);
633 const char *data = er->er_data;
634 unsigned int data_len = er->er_data_len;
635 unsigned int copy;
636 unsigned int x;
637
638 ea->ea_num_ptrs = DIV_ROUND_UP(er->er_data_len, sdp->sd_jbsize);
639 for (x = 0; x < ea->ea_num_ptrs; x++) {
640 struct buffer_head *bh;
641 u64 block;
642 int mh_size = sizeof(struct gfs2_meta_header);
643
644 block = gfs2_alloc_meta(ip);
645
646 bh = gfs2_meta_new(ip->i_gl, block);
647 gfs2_trans_add_bh(ip->i_gl, bh, 1);
648 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
649
650 ip->i_di.di_blocks++;
651
652 copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize :
653 data_len;
654 memcpy(bh->b_data + mh_size, data, copy);
655 if (copy < sdp->sd_jbsize)
656 memset(bh->b_data + mh_size + copy, 0,
657 sdp->sd_jbsize - copy);
658
659 *dataptr++ = cpu_to_be64(bh->b_blocknr);
660 data += copy;
661 data_len -= copy;
662
663 brelse(bh);
664 }
665
666 gfs2_assert_withdraw(sdp, !data_len);
667 }
668
669 return 0;
670}
671
672typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip,
673 struct gfs2_ea_request *er, void *private);
674
675static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
676 unsigned int blks,
677 ea_skeleton_call_t skeleton_call, void *private)
678{
679 struct gfs2_alloc *al;
680 struct buffer_head *dibh;
681 int error;
682
683 al = gfs2_alloc_get(ip);
684
685 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
686 if (error)
687 goto out;
688
689 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
690 if (error)
691 goto out_gunlock_q;
692
693 al->al_requested = blks;
694
695 error = gfs2_inplace_reserve(ip);
696 if (error)
697 goto out_gunlock_q;
698
699 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
700 blks + al->al_rgd->rd_ri.ri_length +
701 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
702 if (error)
703 goto out_ipres;
704
705 error = skeleton_call(ip, er, private);
706 if (error)
707 goto out_end_trans;
708
709 error = gfs2_meta_inode_buffer(ip, &dibh);
710 if (!error) {
711 if (er->er_flags & GFS2_ERF_MODE) {
712 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
713 (ip->i_di.di_mode & S_IFMT) ==
714 (er->er_mode & S_IFMT));
715 ip->i_di.di_mode = er->er_mode;
716 }
717 ip->i_di.di_ctime = get_seconds();
718 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
719 gfs2_dinode_out(&ip->i_di, dibh->b_data);
720 brelse(dibh);
721 }
722
723out_end_trans:
724 gfs2_trans_end(GFS2_SB(&ip->i_inode));
725out_ipres:
726 gfs2_inplace_release(ip);
727out_gunlock_q:
728 gfs2_quota_unlock(ip);
729out:
730 gfs2_alloc_put(ip);
731 return error;
732}
733
734static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
735 void *private)
736{
737 struct buffer_head *bh;
738 int error;
739
740 error = ea_alloc_blk(ip, &bh);
741 if (error)
742 return error;
743
744 ip->i_di.di_eattr = bh->b_blocknr;
745 error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
746
747 brelse(bh);
748
749 return error;
750}
751
752/**
753 * ea_init - initializes a new eattr block
754 * @ip:
755 * @er:
756 *
757 * Returns: errno
758 */
759
760static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er)
761{
762 unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize;
763 unsigned int blks = 1;
764
765 if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize)
766 blks += DIV_ROUND_UP(er->er_data_len, jbsize);
767
768 return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL);
769}
770
771static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
772{
773 u32 ea_size = GFS2_EA_SIZE(ea);
774 struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea +
775 ea_size);
776 u32 new_size = GFS2_EA_REC_LEN(ea) - ea_size;
777 int last = ea->ea_flags & GFS2_EAFLAG_LAST;
778
779 ea->ea_rec_len = cpu_to_be32(ea_size);
780 ea->ea_flags ^= last;
781
782 new->ea_rec_len = cpu_to_be32(new_size);
783 new->ea_flags = last;
784
785 return new;
786}
787
788static void ea_set_remove_stuffed(struct gfs2_inode *ip,
789 struct gfs2_ea_location *el)
790{
791 struct gfs2_ea_header *ea = el->el_ea;
792 struct gfs2_ea_header *prev = el->el_prev;
793 u32 len;
794
795 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
796
797 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
798 ea->ea_type = GFS2_EATYPE_UNUSED;
799 return;
800 } else if (GFS2_EA2NEXT(prev) != ea) {
801 prev = GFS2_EA2NEXT(prev);
802 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), GFS2_EA2NEXT(prev) == ea);
803 }
804
805 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
806 prev->ea_rec_len = cpu_to_be32(len);
807
808 if (GFS2_EA_IS_LAST(ea))
809 prev->ea_flags |= GFS2_EAFLAG_LAST;
810}
811
812struct ea_set {
813 int ea_split;
814
815 struct gfs2_ea_request *es_er;
816 struct gfs2_ea_location *es_el;
817
818 struct buffer_head *es_bh;
819 struct gfs2_ea_header *es_ea;
820};
821
822static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
823 struct gfs2_ea_header *ea, struct ea_set *es)
824{
825 struct gfs2_ea_request *er = es->es_er;
826 struct buffer_head *dibh;
827 int error;
828
829 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + 2 * RES_EATTR, 0);
830 if (error)
831 return error;
832
833 gfs2_trans_add_bh(ip->i_gl, bh, 1);
834
835 if (es->ea_split)
836 ea = ea_split_ea(ea);
837
838 ea_write(ip, ea, er);
839
840 if (es->es_el)
841 ea_set_remove_stuffed(ip, es->es_el);
842
843 error = gfs2_meta_inode_buffer(ip, &dibh);
844 if (error)
845 goto out;
846
847 if (er->er_flags & GFS2_ERF_MODE) {
848 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
849 (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
850 ip->i_di.di_mode = er->er_mode;
851 }
852 ip->i_di.di_ctime = get_seconds();
853 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
854 gfs2_dinode_out(&ip->i_di, dibh->b_data);
855 brelse(dibh);
856out:
857 gfs2_trans_end(GFS2_SB(&ip->i_inode));
858 return error;
859}
860
861static int ea_set_simple_alloc(struct gfs2_inode *ip,
862 struct gfs2_ea_request *er, void *private)
863{
864 struct ea_set *es = private;
865 struct gfs2_ea_header *ea = es->es_ea;
866 int error;
867
868 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1);
869
870 if (es->ea_split)
871 ea = ea_split_ea(ea);
872
873 error = ea_write(ip, ea, er);
874 if (error)
875 return error;
876
877 if (es->es_el)
878 ea_set_remove_stuffed(ip, es->es_el);
879
880 return 0;
881}
882
883static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
884 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
885 void *private)
886{
887 struct ea_set *es = private;
888 unsigned int size;
889 int stuffed;
890 int error;
891
892 stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er, &size);
893
894 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
895 if (GFS2_EA_REC_LEN(ea) < size)
896 return 0;
897 if (!GFS2_EA_IS_STUFFED(ea)) {
898 error = ea_remove_unstuffed(ip, bh, ea, prev, 1);
899 if (error)
900 return error;
901 }
902 es->ea_split = 0;
903 } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size)
904 es->ea_split = 1;
905 else
906 return 0;
907
908 if (stuffed) {
909 error = ea_set_simple_noalloc(ip, bh, ea, es);
910 if (error)
911 return error;
912 } else {
913 unsigned int blks;
914
915 es->es_bh = bh;
916 es->es_ea = ea;
917 blks = 2 + DIV_ROUND_UP(es->es_er->er_data_len,
918 GFS2_SB(&ip->i_inode)->sd_jbsize);
919
920 error = ea_alloc_skeleton(ip, es->es_er, blks,
921 ea_set_simple_alloc, es);
922 if (error)
923 return error;
924 }
925
926 return 1;
927}
928
929static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
930 void *private)
931{
932 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
933 struct buffer_head *indbh, *newbh;
934 u64 *eablk;
935 int error;
936 int mh_size = sizeof(struct gfs2_meta_header);
937
938 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
939 u64 *end;
940
941 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT,
942 &indbh);
943 if (error)
944 return error;
945
946 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
947 error = -EIO;
948 goto out;
949 }
950
951 eablk = (u64 *)(indbh->b_data + mh_size);
952 end = eablk + sdp->sd_inptrs;
953
954 for (; eablk < end; eablk++)
955 if (!*eablk)
956 break;
957
958 if (eablk == end) {
959 error = -ENOSPC;
960 goto out;
961 }
962
963 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
964 } else {
965 u64 blk;
966
967 blk = gfs2_alloc_meta(ip);
968
969 indbh = gfs2_meta_new(ip->i_gl, blk);
970 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
971 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
972 gfs2_buffer_clear_tail(indbh, mh_size);
973
974 eablk = (u64 *)(indbh->b_data + mh_size);
975 *eablk = cpu_to_be64(ip->i_di.di_eattr);
976 ip->i_di.di_eattr = blk;
977 ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
978 ip->i_di.di_blocks++;
979
980 eablk++;
981 }
982
983 error = ea_alloc_blk(ip, &newbh);
984 if (error)
985 goto out;
986
987 *eablk = cpu_to_be64((u64)newbh->b_blocknr);
988 error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er);
989 brelse(newbh);
990 if (error)
991 goto out;
992
993 if (private)
994 ea_set_remove_stuffed(ip, private);
995
996out:
997 brelse(indbh);
998 return error;
999}
1000
1001static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1002 struct gfs2_ea_location *el)
1003{
1004 struct ea_set es;
1005 unsigned int blks = 2;
1006 int error;
1007
1008 memset(&es, 0, sizeof(struct ea_set));
1009 es.es_er = er;
1010 es.es_el = el;
1011
1012 error = ea_foreach(ip, ea_set_simple, &es);
1013 if (error > 0)
1014 return 0;
1015 if (error)
1016 return error;
1017
1018 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
1019 blks++;
1020 if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
1021 blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
1022
1023 return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
1024}
1025
1026static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1027 struct gfs2_ea_location *el)
1028{
1029 if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) {
1030 el->el_prev = GFS2_EA2NEXT(el->el_prev);
1031 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
1032 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1033 }
1034
1035 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0);
1036}
1037
1038int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1039{
1040 struct gfs2_ea_location el;
1041 int error;
1042
1043 if (!ip->i_di.di_eattr) {
1044 if (er->er_flags & XATTR_REPLACE)
1045 return -ENODATA;
1046 return ea_init(ip, er);
1047 }
1048
1049 error = gfs2_ea_find(ip, er, &el);
1050 if (error)
1051 return error;
1052
1053 if (el.el_ea) {
1054 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
1055 brelse(el.el_bh);
1056 return -EPERM;
1057 }
1058
1059 error = -EEXIST;
1060 if (!(er->er_flags & XATTR_CREATE)) {
1061 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1062 error = ea_set_i(ip, er, &el);
1063 if (!error && unstuffed)
1064 ea_set_remove_unstuffed(ip, &el);
1065 }
1066
1067 brelse(el.el_bh);
1068 } else {
1069 error = -ENODATA;
1070 if (!(er->er_flags & XATTR_REPLACE))
1071 error = ea_set_i(ip, er, NULL);
1072 }
1073
1074 return error;
1075}
1076
1077int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1078{
1079 struct gfs2_holder i_gh;
1080 int error;
1081
1082 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1083 return -EINVAL;
1084 if (!er->er_data || !er->er_data_len) {
1085 er->er_data = NULL;
1086 er->er_data_len = 0;
1087 }
1088 error = ea_check_size(GFS2_SB(&ip->i_inode), er);
1089 if (error)
1090 return error;
1091
1092 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1093 if (error)
1094 return error;
1095
1096 if (IS_IMMUTABLE(&ip->i_inode))
1097 error = -EPERM;
1098 else
1099 error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
1100
1101 gfs2_glock_dq_uninit(&i_gh);
1102
1103 return error;
1104}
1105
1106static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1107{
1108 struct gfs2_ea_header *ea = el->el_ea;
1109 struct gfs2_ea_header *prev = el->el_prev;
1110 struct buffer_head *dibh;
1111 int error;
1112
1113 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0);
1114 if (error)
1115 return error;
1116
1117 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1118
1119 if (prev) {
1120 u32 len;
1121
1122 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
1123 prev->ea_rec_len = cpu_to_be32(len);
1124
1125 if (GFS2_EA_IS_LAST(ea))
1126 prev->ea_flags |= GFS2_EAFLAG_LAST;
1127 } else
1128 ea->ea_type = GFS2_EATYPE_UNUSED;
1129
1130 error = gfs2_meta_inode_buffer(ip, &dibh);
1131 if (!error) {
1132 ip->i_di.di_ctime = get_seconds();
1133 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1134 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1135 brelse(dibh);
1136 }
1137
1138 gfs2_trans_end(GFS2_SB(&ip->i_inode));
1139
1140 return error;
1141}
1142
1143int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1144{
1145 struct gfs2_ea_location el;
1146 int error;
1147
1148 if (!ip->i_di.di_eattr)
1149 return -ENODATA;
1150
1151 error = gfs2_ea_find(ip, er, &el);
1152 if (error)
1153 return error;
1154 if (!el.el_ea)
1155 return -ENODATA;
1156
1157 if (GFS2_EA_IS_STUFFED(el.el_ea))
1158 error = ea_remove_stuffed(ip, &el);
1159 else
1160 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev,
1161 0);
1162
1163 brelse(el.el_bh);
1164
1165 return error;
1166}
1167
1168/**
1169 * gfs2_ea_remove - sets (or creates or replaces) an extended attribute
1170 * @ip: pointer to the inode of the target file
1171 * @er: request information
1172 *
1173 * Returns: errno
1174 */
1175
1176int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1177{
1178 struct gfs2_holder i_gh;
1179 int error;
1180
1181 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1182 return -EINVAL;
1183
1184 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1185 if (error)
1186 return error;
1187
1188 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
1189 error = -EPERM;
1190 else
1191 error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er);
1192
1193 gfs2_glock_dq_uninit(&i_gh);
1194
1195 return error;
1196}
1197
1198static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1199 struct gfs2_ea_header *ea, char *data)
1200{
1201 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1202 struct buffer_head **bh;
1203 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1204 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
1205 u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
1206 unsigned int x;
1207 int error;
1208
1209 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
1210 if (!bh)
1211 return -ENOMEM;
1212
1213 error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1214 if (error)
1215 goto out;
1216
1217 for (x = 0; x < nptrs; x++) {
1218 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
1219 bh + x);
1220 if (error) {
1221 while (x--)
1222 brelse(bh[x]);
1223 goto fail;
1224 }
1225 dataptrs++;
1226 }
1227
1228 for (x = 0; x < nptrs; x++) {
1229 error = gfs2_meta_wait(sdp, bh[x]);
1230 if (error) {
1231 for (; x < nptrs; x++)
1232 brelse(bh[x]);
1233 goto fail;
1234 }
1235 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
1236 for (; x < nptrs; x++)
1237 brelse(bh[x]);
1238 error = -EIO;
1239 goto fail;
1240 }
1241
1242 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
1243
1244 memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data,
1245 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
1246
1247 amount -= sdp->sd_jbsize;
1248 data += sdp->sd_jbsize;
1249
1250 brelse(bh[x]);
1251 }
1252
1253out:
1254 kfree(bh);
1255 return error;
1256
1257fail:
1258 gfs2_trans_end(sdp);
1259 kfree(bh);
1260 return error;
1261}
1262
1263int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
1264 struct iattr *attr, char *data)
1265{
1266 struct buffer_head *dibh;
1267 int error;
1268
1269 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
1270 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0);
1271 if (error)
1272 return error;
1273
1274 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1275 memcpy(GFS2_EA2DATA(el->el_ea), data,
1276 GFS2_EA_DATA_LEN(el->el_ea));
1277 } else
1278 error = ea_acl_chmod_unstuffed(ip, el->el_ea, data);
1279
1280 if (error)
1281 return error;
1282
1283 error = gfs2_meta_inode_buffer(ip, &dibh);
1284 if (!error) {
1285 error = inode_setattr(&ip->i_inode, attr);
1286 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
1287 gfs2_inode_attr_out(ip);
1288 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1289 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1290 brelse(dibh);
1291 }
1292
1293 gfs2_trans_end(GFS2_SB(&ip->i_inode));
1294
1295 return error;
1296}
1297
1298static int ea_dealloc_indirect(struct gfs2_inode *ip)
1299{
1300 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1301 struct gfs2_rgrp_list rlist;
1302 struct buffer_head *indbh, *dibh;
1303 u64 *eablk, *end;
1304 unsigned int rg_blocks = 0;
1305 u64 bstart = 0;
1306 unsigned int blen = 0;
1307 unsigned int blks = 0;
1308 unsigned int x;
1309 int error;
1310
1311 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1312
1313 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &indbh);
1314 if (error)
1315 return error;
1316
1317 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
1318 error = -EIO;
1319 goto out;
1320 }
1321
1322 eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1323 end = eablk + sdp->sd_inptrs;
1324
1325 for (; eablk < end; eablk++) {
1326 u64 bn;
1327
1328 if (!*eablk)
1329 break;
1330 bn = be64_to_cpu(*eablk);
1331
1332 if (bstart + blen == bn)
1333 blen++;
1334 else {
1335 if (bstart)
1336 gfs2_rlist_add(sdp, &rlist, bstart);
1337 bstart = bn;
1338 blen = 1;
1339 }
1340 blks++;
1341 }
1342 if (bstart)
1343 gfs2_rlist_add(sdp, &rlist, bstart);
1344 else
1345 goto out;
1346
1347 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1348
1349 for (x = 0; x < rlist.rl_rgrps; x++) {
1350 struct gfs2_rgrpd *rgd;
1351 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1352 rg_blocks += rgd->rd_ri.ri_length;
1353 }
1354
1355 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1356 if (error)
1357 goto out_rlist_free;
1358
1359 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT +
1360 RES_STATFS + RES_QUOTA, blks);
1361 if (error)
1362 goto out_gunlock;
1363
1364 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1365
1366 eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1367 bstart = 0;
1368 blen = 0;
1369
1370 for (; eablk < end; eablk++) {
1371 u64 bn;
1372
1373 if (!*eablk)
1374 break;
1375 bn = be64_to_cpu(*eablk);
1376
1377 if (bstart + blen == bn)
1378 blen++;
1379 else {
1380 if (bstart)
1381 gfs2_free_meta(ip, bstart, blen);
1382 bstart = bn;
1383 blen = 1;
1384 }
1385
1386 *eablk = 0;
1387 if (!ip->i_di.di_blocks)
1388 gfs2_consist_inode(ip);
1389 ip->i_di.di_blocks--;
1390 }
1391 if (bstart)
1392 gfs2_free_meta(ip, bstart, blen);
1393
1394 ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
1395
1396 error = gfs2_meta_inode_buffer(ip, &dibh);
1397 if (!error) {
1398 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1399 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1400 brelse(dibh);
1401 }
1402
1403 gfs2_trans_end(sdp);
1404
1405out_gunlock:
1406 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1407out_rlist_free:
1408 gfs2_rlist_free(&rlist);
1409out:
1410 brelse(indbh);
1411 return error;
1412}
1413
1414static int ea_dealloc_block(struct gfs2_inode *ip)
1415{
1416 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1417 struct gfs2_alloc *al = &ip->i_alloc;
1418 struct gfs2_rgrpd *rgd;
1419 struct buffer_head *dibh;
1420 int error;
1421
1422 rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
1423 if (!rgd) {
1424 gfs2_consist_inode(ip);
1425 return -EIO;
1426 }
1427
1428 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1429 &al->al_rgd_gh);
1430 if (error)
1431 return error;
1432
1433 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE + RES_STATFS +
1434 RES_QUOTA, 1);
1435 if (error)
1436 goto out_gunlock;
1437
1438 gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
1439
1440 ip->i_di.di_eattr = 0;
1441 if (!ip->i_di.di_blocks)
1442 gfs2_consist_inode(ip);
1443 ip->i_di.di_blocks--;
1444
1445 error = gfs2_meta_inode_buffer(ip, &dibh);
1446 if (!error) {
1447 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1448 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1449 brelse(dibh);
1450 }
1451
1452 gfs2_trans_end(sdp);
1453
1454out_gunlock:
1455 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1456 return error;
1457}
1458
1459/**
1460 * gfs2_ea_dealloc - deallocate the extended attribute fork
1461 * @ip: the inode
1462 *
1463 * Returns: errno
1464 */
1465
1466int gfs2_ea_dealloc(struct gfs2_inode *ip)
1467{
1468 struct gfs2_alloc *al;
1469 int error;
1470
1471 al = gfs2_alloc_get(ip);
1472
1473 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1474 if (error)
1475 goto out_alloc;
1476
1477 error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh);
1478 if (error)
1479 goto out_quota;
1480
1481 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
1482 if (error)
1483 goto out_rindex;
1484
1485 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
1486 error = ea_dealloc_indirect(ip);
1487 if (error)
1488 goto out_rindex;
1489 }
1490
1491 error = ea_dealloc_block(ip);
1492
1493out_rindex:
1494 gfs2_glock_dq_uninit(&al->al_ri_gh);
1495out_quota:
1496 gfs2_quota_unhold(ip);
1497out_alloc:
1498 gfs2_alloc_put(ip);
1499 return error;
1500}
1501
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
new file mode 100644
index 000000000000..ffa65947d686
--- /dev/null
+++ b/fs/gfs2/eattr.h
@@ -0,0 +1,100 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __EATTR_DOT_H__
11#define __EATTR_DOT_H__
12
13struct gfs2_inode;
14struct iattr;
15
16#define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len)
17#define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len)
18
19#define GFS2_EA_SIZE(ea) \
20ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
21 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
22 (sizeof(u64) * (ea)->ea_num_ptrs)), 8)
23
24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
26
27#define GFS2_EAREQ_SIZE_STUFFED(er) \
28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
29
30#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
31ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
32 sizeof(u64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
33
34#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
35#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
36
37#define GFS2_EA2DATAPTRS(ea) \
38((u64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
39
40#define GFS2_EA2NEXT(ea) \
41((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
42
43#define GFS2_EA_BH2FIRST(bh) \
44((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
45
46#define GFS2_ERF_MODE 0x80000000
47
48struct gfs2_ea_request {
49 const char *er_name;
50 char *er_data;
51 unsigned int er_name_len;
52 unsigned int er_data_len;
53 unsigned int er_type; /* GFS2_EATYPE_... */
54 int er_flags;
55 mode_t er_mode;
56};
57
58struct gfs2_ea_location {
59 struct buffer_head *el_bh;
60 struct gfs2_ea_header *el_ea;
61 struct gfs2_ea_header *el_prev;
62};
63
64int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
65int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
66int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
67
68int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
69int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
70int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
71int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
72
73int gfs2_ea_dealloc(struct gfs2_inode *ip);
74
75/* Exported to acl.c */
76
77int gfs2_ea_find(struct gfs2_inode *ip,
78 struct gfs2_ea_request *er,
79 struct gfs2_ea_location *el);
80int gfs2_ea_get_copy(struct gfs2_inode *ip,
81 struct gfs2_ea_location *el,
82 char *data);
83int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
84 struct iattr *attr, char *data);
85
86static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
87{
88 switch (ea->ea_type) {
89 case GFS2_EATYPE_USR:
90 return 5 + ea->ea_name_len + 1;
91 case GFS2_EATYPE_SYS:
92 return 7 + ea->ea_name_len + 1;
93 case GFS2_EATYPE_SECURITY:
94 return 9 + ea->ea_name_len + 1;
95 default:
96 return 0;
97 }
98}
99
100#endif /* __EATTR_DOT_H__ */
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h
new file mode 100644
index 000000000000..3bb11c0f8b56
--- /dev/null
+++ b/fs/gfs2/gfs2.h
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __GFS2_DOT_H__
11#define __GFS2_DOT_H__
12
13enum {
14 NO_CREATE = 0,
15 CREATE = 1,
16};
17
18enum {
19 NO_WAIT = 0,
20 WAIT = 1,
21};
22
23enum {
24 NO_FORCE = 0,
25 FORCE = 1,
26};
27
28#define GFS2_FAST_NAME_SIZE 8
29
30#endif /* __GFS2_DOT_H__ */
31
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
new file mode 100644
index 000000000000..78fe0fae23ff
--- /dev/null
+++ b/fs/gfs2/glock.c
@@ -0,0 +1,2231 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/sort.h>
17#include <linux/jhash.h>
18#include <linux/kallsyms.h>
19#include <linux/gfs2_ondisk.h>
20#include <linux/list.h>
21#include <linux/lm_interface.h>
22#include <asm/uaccess.h>
23
24#include "gfs2.h"
25#include "incore.h"
26#include "glock.h"
27#include "glops.h"
28#include "inode.h"
29#include "lm.h"
30#include "lops.h"
31#include "meta_io.h"
32#include "quota.h"
33#include "super.h"
34#include "util.h"
35
36struct greedy {
37 struct gfs2_holder gr_gh;
38 struct work_struct gr_work;
39};
40
41struct gfs2_gl_hash_bucket {
42 struct hlist_head hb_list;
43};
44
45typedef void (*glock_examiner) (struct gfs2_glock * gl);
46
47static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
48static int dump_glock(struct gfs2_glock *gl);
49static int dump_inode(struct gfs2_inode *ip);
50
51#define GFS2_GL_HASH_SHIFT 15
52#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
53#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
54
55static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
56
57/*
58 * Despite what you might think, the numbers below are not arbitrary :-)
59 * They are taken from the ipv4 routing hash code, which is well tested
60 * and thus should be nearly optimal. Later on we might tweek the numbers
61 * but for now this should be fine.
62 *
63 * The reason for putting the locks in a separate array from the list heads
64 * is that we can have fewer locks than list heads and save memory. We use
65 * the same hash function for both, but with a different hash mask.
66 */
67#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
68 defined(CONFIG_PROVE_LOCKING)
69
70#ifdef CONFIG_LOCKDEP
71# define GL_HASH_LOCK_SZ 256
72#else
73# if NR_CPUS >= 32
74# define GL_HASH_LOCK_SZ 4096
75# elif NR_CPUS >= 16
76# define GL_HASH_LOCK_SZ 2048
77# elif NR_CPUS >= 8
78# define GL_HASH_LOCK_SZ 1024
79# elif NR_CPUS >= 4
80# define GL_HASH_LOCK_SZ 512
81# else
82# define GL_HASH_LOCK_SZ 256
83# endif
84#endif
85
86/* We never want more locks than chains */
87#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
88# undef GL_HASH_LOCK_SZ
89# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
90#endif
91
92static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
93
94static inline rwlock_t *gl_lock_addr(unsigned int x)
95{
96 return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
97}
98#else /* not SMP, so no spinlocks required */
99static inline rwlock_t *gl_lock_addr(x)
100{
101 return NULL;
102}
103#endif
104
105/**
106 * relaxed_state_ok - is a requested lock compatible with the current lock mode?
107 * @actual: the current state of the lock
108 * @requested: the lock state that was requested by the caller
109 * @flags: the modifier flags passed in by the caller
110 *
111 * Returns: 1 if the locks are compatible, 0 otherwise
112 */
113
114static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
115 int flags)
116{
117 if (actual == requested)
118 return 1;
119
120 if (flags & GL_EXACT)
121 return 0;
122
123 if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
124 return 1;
125
126 if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
127 return 1;
128
129 return 0;
130}
131
132/**
133 * gl_hash() - Turn glock number into hash bucket number
134 * @lock: The glock number
135 *
136 * Returns: The number of the corresponding hash bucket
137 */
138
139static unsigned int gl_hash(const struct gfs2_sbd *sdp,
140 const struct lm_lockname *name)
141{
142 unsigned int h;
143
144 h = jhash(&name->ln_number, sizeof(u64), 0);
145 h = jhash(&name->ln_type, sizeof(unsigned int), h);
146 h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
147 h &= GFS2_GL_HASH_MASK;
148
149 return h;
150}
151
152/**
153 * glock_free() - Perform a few checks and then release struct gfs2_glock
154 * @gl: The glock to release
155 *
156 * Also calls lock module to release its internal structure for this glock.
157 *
158 */
159
160static void glock_free(struct gfs2_glock *gl)
161{
162 struct gfs2_sbd *sdp = gl->gl_sbd;
163 struct inode *aspace = gl->gl_aspace;
164
165 gfs2_lm_put_lock(sdp, gl->gl_lock);
166
167 if (aspace)
168 gfs2_aspace_put(aspace);
169
170 kmem_cache_free(gfs2_glock_cachep, gl);
171}
172
173/**
174 * gfs2_glock_hold() - increment reference count on glock
175 * @gl: The glock to hold
176 *
177 */
178
179void gfs2_glock_hold(struct gfs2_glock *gl)
180{
181 atomic_inc(&gl->gl_ref);
182}
183
184/**
185 * gfs2_glock_put() - Decrement reference count on glock
186 * @gl: The glock to put
187 *
188 */
189
190int gfs2_glock_put(struct gfs2_glock *gl)
191{
192 int rv = 0;
193 struct gfs2_sbd *sdp = gl->gl_sbd;
194
195 write_lock(gl_lock_addr(gl->gl_hash));
196 if (atomic_dec_and_test(&gl->gl_ref)) {
197 hlist_del(&gl->gl_list);
198 write_unlock(gl_lock_addr(gl->gl_hash));
199 BUG_ON(spin_is_locked(&gl->gl_spin));
200 gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
201 gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
202 gfs2_assert(sdp, list_empty(&gl->gl_holders));
203 gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
204 gfs2_assert(sdp, list_empty(&gl->gl_waiters2));
205 gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
206 glock_free(gl);
207 rv = 1;
208 goto out;
209 }
210 write_unlock(gl_lock_addr(gl->gl_hash));
211out:
212 return rv;
213}
214
215/**
216 * queue_empty - check to see if a glock's queue is empty
217 * @gl: the glock
218 * @head: the head of the queue to check
219 *
220 * This function protects the list in the event that a process already
221 * has a holder on the list and is adding a second holder for itself.
222 * The glmutex lock is what generally prevents processes from working
223 * on the same glock at once, but the special case of adding a second
224 * holder for yourself ("recursive" locking) doesn't involve locking
225 * glmutex, making the spin lock necessary.
226 *
227 * Returns: 1 if the queue is empty
228 */
229
230static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
231{
232 int empty;
233 spin_lock(&gl->gl_spin);
234 empty = list_empty(head);
235 spin_unlock(&gl->gl_spin);
236 return empty;
237}
238
239/**
240 * search_bucket() - Find struct gfs2_glock by lock number
241 * @bucket: the bucket to search
242 * @name: The lock name
243 *
244 * Returns: NULL, or the struct gfs2_glock with the requested number
245 */
246
247static struct gfs2_glock *search_bucket(unsigned int hash,
248 const struct gfs2_sbd *sdp,
249 const struct lm_lockname *name)
250{
251 struct gfs2_glock *gl;
252 struct hlist_node *h;
253
254 hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) {
255 if (!lm_name_equal(&gl->gl_name, name))
256 continue;
257 if (gl->gl_sbd != sdp)
258 continue;
259
260 atomic_inc(&gl->gl_ref);
261
262 return gl;
263 }
264
265 return NULL;
266}
267
268/**
269 * gfs2_glock_find() - Find glock by lock number
270 * @sdp: The GFS2 superblock
271 * @name: The lock name
272 *
273 * Returns: NULL, or the struct gfs2_glock with the requested number
274 */
275
276static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
277 const struct lm_lockname *name)
278{
279 unsigned int hash = gl_hash(sdp, name);
280 struct gfs2_glock *gl;
281
282 read_lock(gl_lock_addr(hash));
283 gl = search_bucket(hash, sdp, name);
284 read_unlock(gl_lock_addr(hash));
285
286 return gl;
287}
288
289/**
290 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
291 * @sdp: The GFS2 superblock
292 * @number: the lock number
293 * @glops: The glock_operations to use
294 * @create: If 0, don't create the glock if it doesn't exist
295 * @glp: the glock is returned here
296 *
297 * This does not lock a glock, just finds/creates structures for one.
298 *
299 * Returns: errno
300 */
301
302int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
303 const struct gfs2_glock_operations *glops, int create,
304 struct gfs2_glock **glp)
305{
306 struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type };
307 struct gfs2_glock *gl, *tmp;
308 unsigned int hash = gl_hash(sdp, &name);
309 int error;
310
311 read_lock(gl_lock_addr(hash));
312 gl = search_bucket(hash, sdp, &name);
313 read_unlock(gl_lock_addr(hash));
314
315 if (gl || !create) {
316 *glp = gl;
317 return 0;
318 }
319
320 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
321 if (!gl)
322 return -ENOMEM;
323
324 gl->gl_flags = 0;
325 gl->gl_name = name;
326 atomic_set(&gl->gl_ref, 1);
327 gl->gl_state = LM_ST_UNLOCKED;
328 gl->gl_hash = hash;
329 gl->gl_owner = NULL;
330 gl->gl_ip = 0;
331 gl->gl_ops = glops;
332 gl->gl_req_gh = NULL;
333 gl->gl_req_bh = NULL;
334 gl->gl_vn = 0;
335 gl->gl_stamp = jiffies;
336 gl->gl_object = NULL;
337 gl->gl_sbd = sdp;
338 gl->gl_aspace = NULL;
339 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
340
341 /* If this glock protects actual on-disk data or metadata blocks,
342 create a VFS inode to manage the pages/buffers holding them. */
343 if (glops == &gfs2_inode_glops || glops == &gfs2_rgrp_glops) {
344 gl->gl_aspace = gfs2_aspace_get(sdp);
345 if (!gl->gl_aspace) {
346 error = -ENOMEM;
347 goto fail;
348 }
349 }
350
351 error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
352 if (error)
353 goto fail_aspace;
354
355 write_lock(gl_lock_addr(hash));
356 tmp = search_bucket(hash, sdp, &name);
357 if (tmp) {
358 write_unlock(gl_lock_addr(hash));
359 glock_free(gl);
360 gl = tmp;
361 } else {
362 hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list);
363 write_unlock(gl_lock_addr(hash));
364 }
365
366 *glp = gl;
367
368 return 0;
369
370fail_aspace:
371 if (gl->gl_aspace)
372 gfs2_aspace_put(gl->gl_aspace);
373fail:
374 kmem_cache_free(gfs2_glock_cachep, gl);
375 return error;
376}
377
378/**
379 * gfs2_holder_init - initialize a struct gfs2_holder in the default way
380 * @gl: the glock
381 * @state: the state we're requesting
382 * @flags: the modifier flags
383 * @gh: the holder structure
384 *
385 */
386
387void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
388 struct gfs2_holder *gh)
389{
390 INIT_LIST_HEAD(&gh->gh_list);
391 gh->gh_gl = gl;
392 gh->gh_ip = (unsigned long)__builtin_return_address(0);
393 gh->gh_owner = current;
394 gh->gh_state = state;
395 gh->gh_flags = flags;
396 gh->gh_error = 0;
397 gh->gh_iflags = 0;
398 init_completion(&gh->gh_wait);
399
400 if (gh->gh_state == LM_ST_EXCLUSIVE)
401 gh->gh_flags |= GL_LOCAL_EXCL;
402
403 gfs2_glock_hold(gl);
404}
405
406/**
407 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
408 * @state: the state we're requesting
409 * @flags: the modifier flags
410 * @gh: the holder structure
411 *
412 * Don't mess with the glock.
413 *
414 */
415
416void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh)
417{
418 gh->gh_state = state;
419 gh->gh_flags = flags;
420 if (gh->gh_state == LM_ST_EXCLUSIVE)
421 gh->gh_flags |= GL_LOCAL_EXCL;
422
423 gh->gh_iflags &= 1 << HIF_ALLOCED;
424 gh->gh_ip = (unsigned long)__builtin_return_address(0);
425}
426
427/**
428 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
429 * @gh: the holder structure
430 *
431 */
432
433void gfs2_holder_uninit(struct gfs2_holder *gh)
434{
435 gfs2_glock_put(gh->gh_gl);
436 gh->gh_gl = NULL;
437 gh->gh_ip = 0;
438}
439
440/**
441 * gfs2_holder_get - get a struct gfs2_holder structure
442 * @gl: the glock
443 * @state: the state we're requesting
444 * @flags: the modifier flags
445 * @gfp_flags:
446 *
447 * Figure out how big an impact this function has. Either:
448 * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
449 * 2) Leave it like it is
450 *
451 * Returns: the holder structure, NULL on ENOMEM
452 */
453
454static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl,
455 unsigned int state,
456 int flags, gfp_t gfp_flags)
457{
458 struct gfs2_holder *gh;
459
460 gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags);
461 if (!gh)
462 return NULL;
463
464 gfs2_holder_init(gl, state, flags, gh);
465 set_bit(HIF_ALLOCED, &gh->gh_iflags);
466 gh->gh_ip = (unsigned long)__builtin_return_address(0);
467 return gh;
468}
469
470/**
471 * gfs2_holder_put - get rid of a struct gfs2_holder structure
472 * @gh: the holder structure
473 *
474 */
475
476static void gfs2_holder_put(struct gfs2_holder *gh)
477{
478 gfs2_holder_uninit(gh);
479 kfree(gh);
480}
481
482/**
483 * rq_mutex - process a mutex request in the queue
484 * @gh: the glock holder
485 *
486 * Returns: 1 if the queue is blocked
487 */
488
489static int rq_mutex(struct gfs2_holder *gh)
490{
491 struct gfs2_glock *gl = gh->gh_gl;
492
493 list_del_init(&gh->gh_list);
494 /* gh->gh_error never examined. */
495 set_bit(GLF_LOCK, &gl->gl_flags);
496 complete(&gh->gh_wait);
497
498 return 1;
499}
500
501/**
502 * rq_promote - process a promote request in the queue
503 * @gh: the glock holder
504 *
505 * Acquire a new inter-node lock, or change a lock state to more restrictive.
506 *
507 * Returns: 1 if the queue is blocked
508 */
509
510static int rq_promote(struct gfs2_holder *gh)
511{
512 struct gfs2_glock *gl = gh->gh_gl;
513 struct gfs2_sbd *sdp = gl->gl_sbd;
514 const struct gfs2_glock_operations *glops = gl->gl_ops;
515
516 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
517 if (list_empty(&gl->gl_holders)) {
518 gl->gl_req_gh = gh;
519 set_bit(GLF_LOCK, &gl->gl_flags);
520 spin_unlock(&gl->gl_spin);
521
522 if (atomic_read(&sdp->sd_reclaim_count) >
523 gfs2_tune_get(sdp, gt_reclaim_limit) &&
524 !(gh->gh_flags & LM_FLAG_PRIORITY)) {
525 gfs2_reclaim_glock(sdp);
526 gfs2_reclaim_glock(sdp);
527 }
528
529 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
530 spin_lock(&gl->gl_spin);
531 }
532 return 1;
533 }
534
535 if (list_empty(&gl->gl_holders)) {
536 set_bit(HIF_FIRST, &gh->gh_iflags);
537 set_bit(GLF_LOCK, &gl->gl_flags);
538 } else {
539 struct gfs2_holder *next_gh;
540 if (gh->gh_flags & GL_LOCAL_EXCL)
541 return 1;
542 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
543 gh_list);
544 if (next_gh->gh_flags & GL_LOCAL_EXCL)
545 return 1;
546 }
547
548 list_move_tail(&gh->gh_list, &gl->gl_holders);
549 gh->gh_error = 0;
550 set_bit(HIF_HOLDER, &gh->gh_iflags);
551
552 complete(&gh->gh_wait);
553
554 return 0;
555}
556
557/**
558 * rq_demote - process a demote request in the queue
559 * @gh: the glock holder
560 *
561 * Returns: 1 if the queue is blocked
562 */
563
564static int rq_demote(struct gfs2_holder *gh)
565{
566 struct gfs2_glock *gl = gh->gh_gl;
567 const struct gfs2_glock_operations *glops = gl->gl_ops;
568
569 if (!list_empty(&gl->gl_holders))
570 return 1;
571
572 if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
573 list_del_init(&gh->gh_list);
574 gh->gh_error = 0;
575 spin_unlock(&gl->gl_spin);
576 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
577 gfs2_holder_put(gh);
578 else
579 complete(&gh->gh_wait);
580 spin_lock(&gl->gl_spin);
581 } else {
582 gl->gl_req_gh = gh;
583 set_bit(GLF_LOCK, &gl->gl_flags);
584 spin_unlock(&gl->gl_spin);
585
586 if (gh->gh_state == LM_ST_UNLOCKED ||
587 gl->gl_state != LM_ST_EXCLUSIVE)
588 glops->go_drop_th(gl);
589 else
590 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
591
592 spin_lock(&gl->gl_spin);
593 }
594
595 return 0;
596}
597
598/**
599 * rq_greedy - process a queued request to drop greedy status
600 * @gh: the glock holder
601 *
602 * Returns: 1 if the queue is blocked
603 */
604
605static int rq_greedy(struct gfs2_holder *gh)
606{
607 struct gfs2_glock *gl = gh->gh_gl;
608
609 list_del_init(&gh->gh_list);
610 /* gh->gh_error never examined. */
611 clear_bit(GLF_GREEDY, &gl->gl_flags);
612 spin_unlock(&gl->gl_spin);
613
614 gfs2_holder_uninit(gh);
615 kfree(container_of(gh, struct greedy, gr_gh));
616
617 spin_lock(&gl->gl_spin);
618
619 return 0;
620}
621
622/**
623 * run_queue - process holder structures on a glock
624 * @gl: the glock
625 *
626 */
627static void run_queue(struct gfs2_glock *gl)
628{
629 struct gfs2_holder *gh;
630 int blocked = 1;
631
632 for (;;) {
633 if (test_bit(GLF_LOCK, &gl->gl_flags))
634 break;
635
636 if (!list_empty(&gl->gl_waiters1)) {
637 gh = list_entry(gl->gl_waiters1.next,
638 struct gfs2_holder, gh_list);
639
640 if (test_bit(HIF_MUTEX, &gh->gh_iflags))
641 blocked = rq_mutex(gh);
642 else
643 gfs2_assert_warn(gl->gl_sbd, 0);
644
645 } else if (!list_empty(&gl->gl_waiters2) &&
646 !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
647 gh = list_entry(gl->gl_waiters2.next,
648 struct gfs2_holder, gh_list);
649
650 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
651 blocked = rq_demote(gh);
652 else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
653 blocked = rq_greedy(gh);
654 else
655 gfs2_assert_warn(gl->gl_sbd, 0);
656
657 } else if (!list_empty(&gl->gl_waiters3)) {
658 gh = list_entry(gl->gl_waiters3.next,
659 struct gfs2_holder, gh_list);
660
661 if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
662 blocked = rq_promote(gh);
663 else
664 gfs2_assert_warn(gl->gl_sbd, 0);
665
666 } else
667 break;
668
669 if (blocked)
670 break;
671 }
672}
673
674/**
675 * gfs2_glmutex_lock - acquire a local lock on a glock
676 * @gl: the glock
677 *
678 * Gives caller exclusive access to manipulate a glock structure.
679 */
680
681static void gfs2_glmutex_lock(struct gfs2_glock *gl)
682{
683 struct gfs2_holder gh;
684
685 gfs2_holder_init(gl, 0, 0, &gh);
686 set_bit(HIF_MUTEX, &gh.gh_iflags);
687
688 spin_lock(&gl->gl_spin);
689 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
690 list_add_tail(&gh.gh_list, &gl->gl_waiters1);
691 } else {
692 gl->gl_owner = current;
693 gl->gl_ip = (unsigned long)__builtin_return_address(0);
694 complete(&gh.gh_wait);
695 }
696 spin_unlock(&gl->gl_spin);
697
698 wait_for_completion(&gh.gh_wait);
699 gfs2_holder_uninit(&gh);
700}
701
702/**
703 * gfs2_glmutex_trylock - try to acquire a local lock on a glock
704 * @gl: the glock
705 *
706 * Returns: 1 if the glock is acquired
707 */
708
709static int gfs2_glmutex_trylock(struct gfs2_glock *gl)
710{
711 int acquired = 1;
712
713 spin_lock(&gl->gl_spin);
714 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
715 acquired = 0;
716 } else {
717 gl->gl_owner = current;
718 gl->gl_ip = (unsigned long)__builtin_return_address(0);
719 }
720 spin_unlock(&gl->gl_spin);
721
722 return acquired;
723}
724
725/**
726 * gfs2_glmutex_unlock - release a local lock on a glock
727 * @gl: the glock
728 *
729 */
730
731static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
732{
733 spin_lock(&gl->gl_spin);
734 clear_bit(GLF_LOCK, &gl->gl_flags);
735 gl->gl_owner = NULL;
736 gl->gl_ip = 0;
737 run_queue(gl);
738 BUG_ON(!spin_is_locked(&gl->gl_spin));
739 spin_unlock(&gl->gl_spin);
740}
741
742/**
743 * handle_callback - add a demote request to a lock's queue
744 * @gl: the glock
745 * @state: the state the caller wants us to change to
746 *
747 * Note: This may fail sliently if we are out of memory.
748 */
749
750static void handle_callback(struct gfs2_glock *gl, unsigned int state)
751{
752 struct gfs2_holder *gh, *new_gh = NULL;
753
754restart:
755 spin_lock(&gl->gl_spin);
756
757 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
758 if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
759 gl->gl_req_gh != gh) {
760 if (gh->gh_state != state)
761 gh->gh_state = LM_ST_UNLOCKED;
762 goto out;
763 }
764 }
765
766 if (new_gh) {
767 list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
768 new_gh = NULL;
769 } else {
770 spin_unlock(&gl->gl_spin);
771
772 new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_KERNEL);
773 if (!new_gh)
774 return;
775 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
776 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
777
778 goto restart;
779 }
780
781out:
782 spin_unlock(&gl->gl_spin);
783
784 if (new_gh)
785 gfs2_holder_put(new_gh);
786}
787
788void gfs2_glock_inode_squish(struct inode *inode)
789{
790 struct gfs2_holder gh;
791 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
792 gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
793 set_bit(HIF_DEMOTE, &gh.gh_iflags);
794 spin_lock(&gl->gl_spin);
795 gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders));
796 list_add_tail(&gh.gh_list, &gl->gl_waiters2);
797 run_queue(gl);
798 spin_unlock(&gl->gl_spin);
799 wait_for_completion(&gh.gh_wait);
800 gfs2_holder_uninit(&gh);
801}
802
803/**
804 * state_change - record that the glock is now in a different state
805 * @gl: the glock
806 * @new_state the new state
807 *
808 */
809
810static void state_change(struct gfs2_glock *gl, unsigned int new_state)
811{
812 int held1, held2;
813
814 held1 = (gl->gl_state != LM_ST_UNLOCKED);
815 held2 = (new_state != LM_ST_UNLOCKED);
816
817 if (held1 != held2) {
818 if (held2)
819 gfs2_glock_hold(gl);
820 else
821 gfs2_glock_put(gl);
822 }
823
824 gl->gl_state = new_state;
825}
826
827/**
828 * xmote_bh - Called after the lock module is done acquiring a lock
829 * @gl: The glock in question
830 * @ret: the int returned from the lock module
831 *
832 */
833
834static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
835{
836 struct gfs2_sbd *sdp = gl->gl_sbd;
837 const struct gfs2_glock_operations *glops = gl->gl_ops;
838 struct gfs2_holder *gh = gl->gl_req_gh;
839 int prev_state = gl->gl_state;
840 int op_done = 1;
841
842 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
843 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
844 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
845
846 state_change(gl, ret & LM_OUT_ST_MASK);
847
848 if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
849 if (glops->go_inval)
850 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
851 } else if (gl->gl_state == LM_ST_DEFERRED) {
852 /* We might not want to do this here.
853 Look at moving to the inode glops. */
854 if (glops->go_inval)
855 glops->go_inval(gl, DIO_DATA);
856 }
857
858 /* Deal with each possible exit condition */
859
860 if (!gh)
861 gl->gl_stamp = jiffies;
862 else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
863 spin_lock(&gl->gl_spin);
864 list_del_init(&gh->gh_list);
865 gh->gh_error = -EIO;
866 spin_unlock(&gl->gl_spin);
867 } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
868 spin_lock(&gl->gl_spin);
869 list_del_init(&gh->gh_list);
870 if (gl->gl_state == gh->gh_state ||
871 gl->gl_state == LM_ST_UNLOCKED) {
872 gh->gh_error = 0;
873 } else {
874 if (gfs2_assert_warn(sdp, gh->gh_flags &
875 (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1)
876 fs_warn(sdp, "ret = 0x%.8X\n", ret);
877 gh->gh_error = GLR_TRYFAILED;
878 }
879 spin_unlock(&gl->gl_spin);
880
881 if (ret & LM_OUT_CANCELED)
882 handle_callback(gl, LM_ST_UNLOCKED);
883
884 } else if (ret & LM_OUT_CANCELED) {
885 spin_lock(&gl->gl_spin);
886 list_del_init(&gh->gh_list);
887 gh->gh_error = GLR_CANCELED;
888 spin_unlock(&gl->gl_spin);
889
890 } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
891 spin_lock(&gl->gl_spin);
892 list_move_tail(&gh->gh_list, &gl->gl_holders);
893 gh->gh_error = 0;
894 set_bit(HIF_HOLDER, &gh->gh_iflags);
895 spin_unlock(&gl->gl_spin);
896
897 set_bit(HIF_FIRST, &gh->gh_iflags);
898
899 op_done = 0;
900
901 } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
902 spin_lock(&gl->gl_spin);
903 list_del_init(&gh->gh_list);
904 gh->gh_error = GLR_TRYFAILED;
905 spin_unlock(&gl->gl_spin);
906
907 } else {
908 if (gfs2_assert_withdraw(sdp, 0) == -1)
909 fs_err(sdp, "ret = 0x%.8X\n", ret);
910 }
911
912 if (glops->go_xmote_bh)
913 glops->go_xmote_bh(gl);
914
915 if (op_done) {
916 spin_lock(&gl->gl_spin);
917 gl->gl_req_gh = NULL;
918 gl->gl_req_bh = NULL;
919 clear_bit(GLF_LOCK, &gl->gl_flags);
920 run_queue(gl);
921 spin_unlock(&gl->gl_spin);
922 }
923
924 gfs2_glock_put(gl);
925
926 if (gh) {
927 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
928 gfs2_holder_put(gh);
929 else
930 complete(&gh->gh_wait);
931 }
932}
933
934/**
935 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
936 * @gl: The glock in question
937 * @state: the requested state
938 * @flags: modifier flags to the lock call
939 *
940 */
941
942void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
943{
944 struct gfs2_sbd *sdp = gl->gl_sbd;
945 const struct gfs2_glock_operations *glops = gl->gl_ops;
946 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
947 LM_FLAG_NOEXP | LM_FLAG_ANY |
948 LM_FLAG_PRIORITY);
949 unsigned int lck_ret;
950
951 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
952 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
953 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
954 gfs2_assert_warn(sdp, state != gl->gl_state);
955
956 if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
957 glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
958
959 gfs2_glock_hold(gl);
960 gl->gl_req_bh = xmote_bh;
961
962 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags);
963
964 if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR)))
965 return;
966
967 if (lck_ret & LM_OUT_ASYNC)
968 gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
969 else
970 xmote_bh(gl, lck_ret);
971}
972
973/**
974 * drop_bh - Called after a lock module unlock completes
975 * @gl: the glock
976 * @ret: the return status
977 *
978 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
979 * Doesn't drop the reference on the glock the top half took out
980 *
981 */
982
983static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
984{
985 struct gfs2_sbd *sdp = gl->gl_sbd;
986 const struct gfs2_glock_operations *glops = gl->gl_ops;
987 struct gfs2_holder *gh = gl->gl_req_gh;
988
989 clear_bit(GLF_PREFETCH, &gl->gl_flags);
990
991 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
992 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
993 gfs2_assert_warn(sdp, !ret);
994
995 state_change(gl, LM_ST_UNLOCKED);
996
997 if (glops->go_inval)
998 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
999
1000 if (gh) {
1001 spin_lock(&gl->gl_spin);
1002 list_del_init(&gh->gh_list);
1003 gh->gh_error = 0;
1004 spin_unlock(&gl->gl_spin);
1005 }
1006
1007 if (glops->go_drop_bh)
1008 glops->go_drop_bh(gl);
1009
1010 spin_lock(&gl->gl_spin);
1011 gl->gl_req_gh = NULL;
1012 gl->gl_req_bh = NULL;
1013 clear_bit(GLF_LOCK, &gl->gl_flags);
1014 run_queue(gl);
1015 spin_unlock(&gl->gl_spin);
1016
1017 gfs2_glock_put(gl);
1018
1019 if (gh) {
1020 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
1021 gfs2_holder_put(gh);
1022 else
1023 complete(&gh->gh_wait);
1024 }
1025}
1026
1027/**
1028 * gfs2_glock_drop_th - call into the lock module to unlock a lock
1029 * @gl: the glock
1030 *
1031 */
1032
1033void gfs2_glock_drop_th(struct gfs2_glock *gl)
1034{
1035 struct gfs2_sbd *sdp = gl->gl_sbd;
1036 const struct gfs2_glock_operations *glops = gl->gl_ops;
1037 unsigned int ret;
1038
1039 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1040 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1041 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
1042
1043 if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
1044 glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
1045
1046 gfs2_glock_hold(gl);
1047 gl->gl_req_bh = drop_bh;
1048
1049 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
1050
1051 if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR)))
1052 return;
1053
1054 if (!ret)
1055 drop_bh(gl, ret);
1056 else
1057 gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
1058}
1059
1060/**
1061 * do_cancels - cancel requests for locks stuck waiting on an expire flag
1062 * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
1063 *
1064 * Don't cancel GL_NOCANCEL requests.
1065 */
1066
1067static void do_cancels(struct gfs2_holder *gh)
1068{
1069 struct gfs2_glock *gl = gh->gh_gl;
1070
1071 spin_lock(&gl->gl_spin);
1072
1073 while (gl->gl_req_gh != gh &&
1074 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1075 !list_empty(&gh->gh_list)) {
1076 if (gl->gl_req_bh && !(gl->gl_req_gh &&
1077 (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
1078 spin_unlock(&gl->gl_spin);
1079 gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock);
1080 msleep(100);
1081 spin_lock(&gl->gl_spin);
1082 } else {
1083 spin_unlock(&gl->gl_spin);
1084 msleep(100);
1085 spin_lock(&gl->gl_spin);
1086 }
1087 }
1088
1089 spin_unlock(&gl->gl_spin);
1090}
1091
1092/**
1093 * glock_wait_internal - wait on a glock acquisition
1094 * @gh: the glock holder
1095 *
1096 * Returns: 0 on success
1097 */
1098
1099static int glock_wait_internal(struct gfs2_holder *gh)
1100{
1101 struct gfs2_glock *gl = gh->gh_gl;
1102 struct gfs2_sbd *sdp = gl->gl_sbd;
1103 const struct gfs2_glock_operations *glops = gl->gl_ops;
1104
1105 if (test_bit(HIF_ABORTED, &gh->gh_iflags))
1106 return -EIO;
1107
1108 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1109 spin_lock(&gl->gl_spin);
1110 if (gl->gl_req_gh != gh &&
1111 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1112 !list_empty(&gh->gh_list)) {
1113 list_del_init(&gh->gh_list);
1114 gh->gh_error = GLR_TRYFAILED;
1115 run_queue(gl);
1116 spin_unlock(&gl->gl_spin);
1117 return gh->gh_error;
1118 }
1119 spin_unlock(&gl->gl_spin);
1120 }
1121
1122 if (gh->gh_flags & LM_FLAG_PRIORITY)
1123 do_cancels(gh);
1124
1125 wait_for_completion(&gh->gh_wait);
1126
1127 if (gh->gh_error)
1128 return gh->gh_error;
1129
1130 gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
1131 gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state,
1132 gh->gh_flags));
1133
1134 if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
1135 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1136
1137 if (glops->go_lock) {
1138 gh->gh_error = glops->go_lock(gh);
1139 if (gh->gh_error) {
1140 spin_lock(&gl->gl_spin);
1141 list_del_init(&gh->gh_list);
1142 spin_unlock(&gl->gl_spin);
1143 }
1144 }
1145
1146 spin_lock(&gl->gl_spin);
1147 gl->gl_req_gh = NULL;
1148 gl->gl_req_bh = NULL;
1149 clear_bit(GLF_LOCK, &gl->gl_flags);
1150 run_queue(gl);
1151 spin_unlock(&gl->gl_spin);
1152 }
1153
1154 return gh->gh_error;
1155}
1156
1157static inline struct gfs2_holder *
1158find_holder_by_owner(struct list_head *head, struct task_struct *owner)
1159{
1160 struct gfs2_holder *gh;
1161
1162 list_for_each_entry(gh, head, gh_list) {
1163 if (gh->gh_owner == owner)
1164 return gh;
1165 }
1166
1167 return NULL;
1168}
1169
1170/**
1171 * add_to_queue - Add a holder to the wait queue (but look for recursion)
1172 * @gh: the holder structure to add
1173 *
1174 */
1175
1176static void add_to_queue(struct gfs2_holder *gh)
1177{
1178 struct gfs2_glock *gl = gh->gh_gl;
1179 struct gfs2_holder *existing;
1180
1181 BUG_ON(!gh->gh_owner);
1182
1183 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
1184 if (existing) {
1185 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
1186 printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid);
1187 printk(KERN_INFO "lock type : %d lock state : %d\n",
1188 existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state);
1189 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1190 printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid);
1191 printk(KERN_INFO "lock type : %d lock state : %d\n",
1192 gl->gl_name.ln_type, gl->gl_state);
1193 BUG();
1194 }
1195
1196 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
1197 if (existing) {
1198 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
1199 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1200 BUG();
1201 }
1202
1203 if (gh->gh_flags & LM_FLAG_PRIORITY)
1204 list_add(&gh->gh_list, &gl->gl_waiters3);
1205 else
1206 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1207}
1208
1209/**
1210 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1211 * @gh: the holder structure
1212 *
1213 * if (gh->gh_flags & GL_ASYNC), this never returns an error
1214 *
1215 * Returns: 0, GLR_TRYFAILED, or errno on failure
1216 */
1217
1218int gfs2_glock_nq(struct gfs2_holder *gh)
1219{
1220 struct gfs2_glock *gl = gh->gh_gl;
1221 struct gfs2_sbd *sdp = gl->gl_sbd;
1222 int error = 0;
1223
1224restart:
1225 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
1226 set_bit(HIF_ABORTED, &gh->gh_iflags);
1227 return -EIO;
1228 }
1229
1230 set_bit(HIF_PROMOTE, &gh->gh_iflags);
1231
1232 spin_lock(&gl->gl_spin);
1233 add_to_queue(gh);
1234 run_queue(gl);
1235 spin_unlock(&gl->gl_spin);
1236
1237 if (!(gh->gh_flags & GL_ASYNC)) {
1238 error = glock_wait_internal(gh);
1239 if (error == GLR_CANCELED) {
1240 msleep(100);
1241 goto restart;
1242 }
1243 }
1244
1245 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1246
1247 if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP))
1248 dump_glock(gl);
1249
1250 return error;
1251}
1252
1253/**
1254 * gfs2_glock_poll - poll to see if an async request has been completed
1255 * @gh: the holder
1256 *
1257 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1258 */
1259
1260int gfs2_glock_poll(struct gfs2_holder *gh)
1261{
1262 struct gfs2_glock *gl = gh->gh_gl;
1263 int ready = 0;
1264
1265 spin_lock(&gl->gl_spin);
1266
1267 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1268 ready = 1;
1269 else if (list_empty(&gh->gh_list)) {
1270 if (gh->gh_error == GLR_CANCELED) {
1271 spin_unlock(&gl->gl_spin);
1272 msleep(100);
1273 if (gfs2_glock_nq(gh))
1274 return 1;
1275 return 0;
1276 } else
1277 ready = 1;
1278 }
1279
1280 spin_unlock(&gl->gl_spin);
1281
1282 return ready;
1283}
1284
1285/**
1286 * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
1287 * @gh: the holder structure
1288 *
1289 * Returns: 0, GLR_TRYFAILED, or errno on failure
1290 */
1291
1292int gfs2_glock_wait(struct gfs2_holder *gh)
1293{
1294 int error;
1295
1296 error = glock_wait_internal(gh);
1297 if (error == GLR_CANCELED) {
1298 msleep(100);
1299 gh->gh_flags &= ~GL_ASYNC;
1300 error = gfs2_glock_nq(gh);
1301 }
1302
1303 return error;
1304}
1305
1306/**
1307 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1308 * @gh: the glock holder
1309 *
1310 */
1311
1312void gfs2_glock_dq(struct gfs2_holder *gh)
1313{
1314 struct gfs2_glock *gl = gh->gh_gl;
1315 const struct gfs2_glock_operations *glops = gl->gl_ops;
1316
1317 if (gh->gh_flags & GL_NOCACHE)
1318 handle_callback(gl, LM_ST_UNLOCKED);
1319
1320 gfs2_glmutex_lock(gl);
1321
1322 spin_lock(&gl->gl_spin);
1323 list_del_init(&gh->gh_list);
1324
1325 if (list_empty(&gl->gl_holders)) {
1326 spin_unlock(&gl->gl_spin);
1327
1328 if (glops->go_unlock)
1329 glops->go_unlock(gh);
1330
1331 gl->gl_stamp = jiffies;
1332
1333 spin_lock(&gl->gl_spin);
1334 }
1335
1336 clear_bit(GLF_LOCK, &gl->gl_flags);
1337 run_queue(gl);
1338 spin_unlock(&gl->gl_spin);
1339}
1340
1341/**
1342 * gfs2_glock_prefetch - Try to prefetch a glock
1343 * @gl: the glock
1344 * @state: the state to prefetch in
1345 * @flags: flags passed to go_xmote_th()
1346 *
1347 */
1348
1349static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
1350 int flags)
1351{
1352 const struct gfs2_glock_operations *glops = gl->gl_ops;
1353
1354 spin_lock(&gl->gl_spin);
1355
1356 if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) ||
1357 !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) ||
1358 !list_empty(&gl->gl_waiters3) ||
1359 relaxed_state_ok(gl->gl_state, state, flags)) {
1360 spin_unlock(&gl->gl_spin);
1361 return;
1362 }
1363
1364 set_bit(GLF_PREFETCH, &gl->gl_flags);
1365 set_bit(GLF_LOCK, &gl->gl_flags);
1366 spin_unlock(&gl->gl_spin);
1367
1368 glops->go_xmote_th(gl, state, flags);
1369}
1370
1371static void greedy_work(void *data)
1372{
1373 struct greedy *gr = data;
1374 struct gfs2_holder *gh = &gr->gr_gh;
1375 struct gfs2_glock *gl = gh->gh_gl;
1376 const struct gfs2_glock_operations *glops = gl->gl_ops;
1377
1378 clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1379
1380 if (glops->go_greedy)
1381 glops->go_greedy(gl);
1382
1383 spin_lock(&gl->gl_spin);
1384
1385 if (list_empty(&gl->gl_waiters2)) {
1386 clear_bit(GLF_GREEDY, &gl->gl_flags);
1387 spin_unlock(&gl->gl_spin);
1388 gfs2_holder_uninit(gh);
1389 kfree(gr);
1390 } else {
1391 gfs2_glock_hold(gl);
1392 list_add_tail(&gh->gh_list, &gl->gl_waiters2);
1393 run_queue(gl);
1394 spin_unlock(&gl->gl_spin);
1395 gfs2_glock_put(gl);
1396 }
1397}
1398
1399/**
1400 * gfs2_glock_be_greedy -
1401 * @gl:
1402 * @time:
1403 *
1404 * Returns: 0 if go_greedy will be called, 1 otherwise
1405 */
1406
1407int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
1408{
1409 struct greedy *gr;
1410 struct gfs2_holder *gh;
1411
1412 if (!time || gl->gl_sbd->sd_args.ar_localcaching ||
1413 test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
1414 return 1;
1415
1416 gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
1417 if (!gr) {
1418 clear_bit(GLF_GREEDY, &gl->gl_flags);
1419 return 1;
1420 }
1421 gh = &gr->gr_gh;
1422
1423 gfs2_holder_init(gl, 0, 0, gh);
1424 set_bit(HIF_GREEDY, &gh->gh_iflags);
1425 INIT_WORK(&gr->gr_work, greedy_work, gr);
1426
1427 set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1428 schedule_delayed_work(&gr->gr_work, time);
1429
1430 return 0;
1431}
1432
1433/**
1434 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1435 * @gh: the holder structure
1436 *
1437 */
1438
1439void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1440{
1441 gfs2_glock_dq(gh);
1442 gfs2_holder_uninit(gh);
1443}
1444
1445/**
1446 * gfs2_glock_nq_num - acquire a glock based on lock number
1447 * @sdp: the filesystem
1448 * @number: the lock number
1449 * @glops: the glock operations for the type of glock
1450 * @state: the state to acquire the glock in
1451 * @flags: modifier flags for the aquisition
1452 * @gh: the struct gfs2_holder
1453 *
1454 * Returns: errno
1455 */
1456
1457int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
1458 const struct gfs2_glock_operations *glops,
1459 unsigned int state, int flags, struct gfs2_holder *gh)
1460{
1461 struct gfs2_glock *gl;
1462 int error;
1463
1464 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1465 if (!error) {
1466 error = gfs2_glock_nq_init(gl, state, flags, gh);
1467 gfs2_glock_put(gl);
1468 }
1469
1470 return error;
1471}
1472
1473/**
1474 * glock_compare - Compare two struct gfs2_glock structures for sorting
1475 * @arg_a: the first structure
1476 * @arg_b: the second structure
1477 *
1478 */
1479
1480static int glock_compare(const void *arg_a, const void *arg_b)
1481{
1482 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1483 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1484 const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1485 const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1486
1487 if (a->ln_number > b->ln_number)
1488 return 1;
1489 if (a->ln_number < b->ln_number)
1490 return -1;
1491 if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
1492 return 1;
1493 if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL))
1494 return 1;
1495 return 0;
1496}
1497
1498/**
1499 * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1500 * @num_gh: the number of structures
1501 * @ghs: an array of struct gfs2_holder structures
1502 *
1503 * Returns: 0 on success (all glocks acquired),
1504 * errno on failure (no glocks acquired)
1505 */
1506
1507static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1508 struct gfs2_holder **p)
1509{
1510 unsigned int x;
1511 int error = 0;
1512
1513 for (x = 0; x < num_gh; x++)
1514 p[x] = &ghs[x];
1515
1516 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1517
1518 for (x = 0; x < num_gh; x++) {
1519 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1520
1521 error = gfs2_glock_nq(p[x]);
1522 if (error) {
1523 while (x--)
1524 gfs2_glock_dq(p[x]);
1525 break;
1526 }
1527 }
1528
1529 return error;
1530}
1531
1532/**
1533 * gfs2_glock_nq_m - acquire multiple glocks
1534 * @num_gh: the number of structures
1535 * @ghs: an array of struct gfs2_holder structures
1536 *
1537 * Figure out how big an impact this function has. Either:
1538 * 1) Replace this code with code that calls gfs2_glock_prefetch()
1539 * 2) Forget async stuff and just call nq_m_sync()
1540 * 3) Leave it like it is
1541 *
1542 * Returns: 0 on success (all glocks acquired),
1543 * errno on failure (no glocks acquired)
1544 */
1545
1546int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1547{
1548 int *e;
1549 unsigned int x;
1550 int borked = 0, serious = 0;
1551 int error = 0;
1552
1553 if (!num_gh)
1554 return 0;
1555
1556 if (num_gh == 1) {
1557 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1558 return gfs2_glock_nq(ghs);
1559 }
1560
1561 e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1562 if (!e)
1563 return -ENOMEM;
1564
1565 for (x = 0; x < num_gh; x++) {
1566 ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
1567 error = gfs2_glock_nq(&ghs[x]);
1568 if (error) {
1569 borked = 1;
1570 serious = error;
1571 num_gh = x;
1572 break;
1573 }
1574 }
1575
1576 for (x = 0; x < num_gh; x++) {
1577 error = e[x] = glock_wait_internal(&ghs[x]);
1578 if (error) {
1579 borked = 1;
1580 if (error != GLR_TRYFAILED && error != GLR_CANCELED)
1581 serious = error;
1582 }
1583 }
1584
1585 if (!borked) {
1586 kfree(e);
1587 return 0;
1588 }
1589
1590 for (x = 0; x < num_gh; x++)
1591 if (!e[x])
1592 gfs2_glock_dq(&ghs[x]);
1593
1594 if (serious)
1595 error = serious;
1596 else {
1597 for (x = 0; x < num_gh; x++)
1598 gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
1599 &ghs[x]);
1600 error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
1601 }
1602
1603 kfree(e);
1604
1605 return error;
1606}
1607
1608/**
1609 * gfs2_glock_dq_m - release multiple glocks
1610 * @num_gh: the number of structures
1611 * @ghs: an array of struct gfs2_holder structures
1612 *
1613 */
1614
1615void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1616{
1617 unsigned int x;
1618
1619 for (x = 0; x < num_gh; x++)
1620 gfs2_glock_dq(&ghs[x]);
1621}
1622
1623/**
1624 * gfs2_glock_dq_uninit_m - release multiple glocks
1625 * @num_gh: the number of structures
1626 * @ghs: an array of struct gfs2_holder structures
1627 *
1628 */
1629
1630void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1631{
1632 unsigned int x;
1633
1634 for (x = 0; x < num_gh; x++)
1635 gfs2_glock_dq_uninit(&ghs[x]);
1636}
1637
1638/**
1639 * gfs2_glock_prefetch_num - prefetch a glock based on lock number
1640 * @sdp: the filesystem
1641 * @number: the lock number
1642 * @glops: the glock operations for the type of glock
1643 * @state: the state to acquire the glock in
1644 * @flags: modifier flags for the aquisition
1645 *
1646 * Returns: errno
1647 */
1648
1649void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
1650 const struct gfs2_glock_operations *glops,
1651 unsigned int state, int flags)
1652{
1653 struct gfs2_glock *gl;
1654 int error;
1655
1656 if (atomic_read(&sdp->sd_reclaim_count) <
1657 gfs2_tune_get(sdp, gt_reclaim_limit)) {
1658 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1659 if (!error) {
1660 gfs2_glock_prefetch(gl, state, flags);
1661 gfs2_glock_put(gl);
1662 }
1663 }
1664}
1665
1666/**
1667 * gfs2_lvb_hold - attach a LVB from a glock
1668 * @gl: The glock in question
1669 *
1670 */
1671
1672int gfs2_lvb_hold(struct gfs2_glock *gl)
1673{
1674 int error;
1675
1676 gfs2_glmutex_lock(gl);
1677
1678 if (!atomic_read(&gl->gl_lvb_count)) {
1679 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1680 if (error) {
1681 gfs2_glmutex_unlock(gl);
1682 return error;
1683 }
1684 gfs2_glock_hold(gl);
1685 }
1686 atomic_inc(&gl->gl_lvb_count);
1687
1688 gfs2_glmutex_unlock(gl);
1689
1690 return 0;
1691}
1692
1693/**
1694 * gfs2_lvb_unhold - detach a LVB from a glock
1695 * @gl: The glock in question
1696 *
1697 */
1698
1699void gfs2_lvb_unhold(struct gfs2_glock *gl)
1700{
1701 gfs2_glock_hold(gl);
1702 gfs2_glmutex_lock(gl);
1703
1704 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1705 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1706 gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1707 gl->gl_lvb = NULL;
1708 gfs2_glock_put(gl);
1709 }
1710
1711 gfs2_glmutex_unlock(gl);
1712 gfs2_glock_put(gl);
1713}
1714
1715static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1716 unsigned int state)
1717{
1718 struct gfs2_glock *gl;
1719
1720 gl = gfs2_glock_find(sdp, name);
1721 if (!gl)
1722 return;
1723
1724 if (gl->gl_ops->go_callback)
1725 gl->gl_ops->go_callback(gl, state);
1726 handle_callback(gl, state);
1727
1728 spin_lock(&gl->gl_spin);
1729 run_queue(gl);
1730 spin_unlock(&gl->gl_spin);
1731
1732 gfs2_glock_put(gl);
1733}
1734
1735/**
1736 * gfs2_glock_cb - Callback used by locking module
1737 * @sdp: Pointer to the superblock
1738 * @type: Type of callback
1739 * @data: Type dependent data pointer
1740 *
1741 * Called by the locking module when it wants to tell us something.
1742 * Either we need to drop a lock, one of our ASYNC requests completed, or
1743 * a journal from another client needs to be recovered.
1744 */
1745
1746void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1747{
1748 struct gfs2_sbd *sdp = cb_data;
1749
1750 switch (type) {
1751 case LM_CB_NEED_E:
1752 blocking_cb(sdp, data, LM_ST_UNLOCKED);
1753 return;
1754
1755 case LM_CB_NEED_D:
1756 blocking_cb(sdp, data, LM_ST_DEFERRED);
1757 return;
1758
1759 case LM_CB_NEED_S:
1760 blocking_cb(sdp, data, LM_ST_SHARED);
1761 return;
1762
1763 case LM_CB_ASYNC: {
1764 struct lm_async_cb *async = data;
1765 struct gfs2_glock *gl;
1766
1767 gl = gfs2_glock_find(sdp, &async->lc_name);
1768 if (gfs2_assert_warn(sdp, gl))
1769 return;
1770 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1771 gl->gl_req_bh(gl, async->lc_ret);
1772 gfs2_glock_put(gl);
1773 return;
1774 }
1775
1776 case LM_CB_NEED_RECOVERY:
1777 gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
1778 if (sdp->sd_recoverd_process)
1779 wake_up_process(sdp->sd_recoverd_process);
1780 return;
1781
1782 case LM_CB_DROPLOCKS:
1783 gfs2_gl_hash_clear(sdp, NO_WAIT);
1784 gfs2_quota_scan(sdp);
1785 return;
1786
1787 default:
1788 gfs2_assert_warn(sdp, 0);
1789 return;
1790 }
1791}
1792
1793/**
1794 * demote_ok - Check to see if it's ok to unlock a glock
1795 * @gl: the glock
1796 *
1797 * Returns: 1 if it's ok
1798 */
1799
1800static int demote_ok(struct gfs2_glock *gl)
1801{
1802 struct gfs2_sbd *sdp = gl->gl_sbd;
1803 const struct gfs2_glock_operations *glops = gl->gl_ops;
1804 int demote = 1;
1805
1806 if (test_bit(GLF_STICKY, &gl->gl_flags))
1807 demote = 0;
1808 else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
1809 demote = time_after_eq(jiffies, gl->gl_stamp +
1810 gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
1811 else if (glops->go_demote_ok)
1812 demote = glops->go_demote_ok(gl);
1813
1814 return demote;
1815}
1816
1817/**
1818 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
1819 * @gl: the glock
1820 *
1821 */
1822
1823void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
1824{
1825 struct gfs2_sbd *sdp = gl->gl_sbd;
1826
1827 spin_lock(&sdp->sd_reclaim_lock);
1828 if (list_empty(&gl->gl_reclaim)) {
1829 gfs2_glock_hold(gl);
1830 list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
1831 atomic_inc(&sdp->sd_reclaim_count);
1832 }
1833 spin_unlock(&sdp->sd_reclaim_lock);
1834
1835 wake_up(&sdp->sd_reclaim_wq);
1836}
1837
1838/**
1839 * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
1840 * @sdp: the filesystem
1841 *
1842 * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
1843 * different glock and we notice that there are a lot of glocks in the
1844 * reclaim list.
1845 *
1846 */
1847
1848void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1849{
1850 struct gfs2_glock *gl;
1851
1852 spin_lock(&sdp->sd_reclaim_lock);
1853 if (list_empty(&sdp->sd_reclaim_list)) {
1854 spin_unlock(&sdp->sd_reclaim_lock);
1855 return;
1856 }
1857 gl = list_entry(sdp->sd_reclaim_list.next,
1858 struct gfs2_glock, gl_reclaim);
1859 list_del_init(&gl->gl_reclaim);
1860 spin_unlock(&sdp->sd_reclaim_lock);
1861
1862 atomic_dec(&sdp->sd_reclaim_count);
1863 atomic_inc(&sdp->sd_reclaimed);
1864
1865 if (gfs2_glmutex_trylock(gl)) {
1866 if (queue_empty(gl, &gl->gl_holders) &&
1867 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1868 handle_callback(gl, LM_ST_UNLOCKED);
1869 gfs2_glmutex_unlock(gl);
1870 }
1871
1872 gfs2_glock_put(gl);
1873}
1874
1875/**
1876 * examine_bucket - Call a function for glock in a hash bucket
1877 * @examiner: the function
1878 * @sdp: the filesystem
1879 * @bucket: the bucket
1880 *
1881 * Returns: 1 if the bucket has entries
1882 */
1883
1884static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
1885 unsigned int hash)
1886{
1887 struct gfs2_glock *gl, *prev = NULL;
1888 int has_entries = 0;
1889 struct hlist_head *head = &gl_hash_table[hash].hb_list;
1890
1891 read_lock(gl_lock_addr(hash));
1892 /* Can't use hlist_for_each_entry - don't want prefetch here */
1893 if (hlist_empty(head))
1894 goto out;
1895 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1896 while(1) {
1897 if (gl->gl_sbd == sdp) {
1898 gfs2_glock_hold(gl);
1899 read_unlock(gl_lock_addr(hash));
1900 if (prev)
1901 gfs2_glock_put(prev);
1902 prev = gl;
1903 examiner(gl);
1904 has_entries = 1;
1905 read_lock(gl_lock_addr(hash));
1906 }
1907 if (gl->gl_list.next == NULL)
1908 break;
1909 gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
1910 }
1911out:
1912 read_unlock(gl_lock_addr(hash));
1913 if (prev)
1914 gfs2_glock_put(prev);
1915 return has_entries;
1916}
1917
1918/**
1919 * scan_glock - look at a glock and see if we can reclaim it
1920 * @gl: the glock to look at
1921 *
1922 */
1923
1924static void scan_glock(struct gfs2_glock *gl)
1925{
1926 if (gl->gl_ops == &gfs2_inode_glops)
1927 return;
1928
1929 if (gfs2_glmutex_trylock(gl)) {
1930 if (queue_empty(gl, &gl->gl_holders) &&
1931 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1932 goto out_schedule;
1933 gfs2_glmutex_unlock(gl);
1934 }
1935 return;
1936
1937out_schedule:
1938 gfs2_glmutex_unlock(gl);
1939 gfs2_glock_schedule_for_reclaim(gl);
1940}
1941
1942/**
1943 * gfs2_scand_internal - Look for glocks and inodes to toss from memory
1944 * @sdp: the filesystem
1945 *
1946 */
1947
1948void gfs2_scand_internal(struct gfs2_sbd *sdp)
1949{
1950 unsigned int x;
1951
1952 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1953 examine_bucket(scan_glock, sdp, x);
1954}
1955
1956/**
1957 * clear_glock - look at a glock and see if we can free it from glock cache
1958 * @gl: the glock to look at
1959 *
1960 */
1961
1962static void clear_glock(struct gfs2_glock *gl)
1963{
1964 struct gfs2_sbd *sdp = gl->gl_sbd;
1965 int released;
1966
1967 spin_lock(&sdp->sd_reclaim_lock);
1968 if (!list_empty(&gl->gl_reclaim)) {
1969 list_del_init(&gl->gl_reclaim);
1970 atomic_dec(&sdp->sd_reclaim_count);
1971 spin_unlock(&sdp->sd_reclaim_lock);
1972 released = gfs2_glock_put(gl);
1973 gfs2_assert(sdp, !released);
1974 } else {
1975 spin_unlock(&sdp->sd_reclaim_lock);
1976 }
1977
1978 if (gfs2_glmutex_trylock(gl)) {
1979 if (queue_empty(gl, &gl->gl_holders) &&
1980 gl->gl_state != LM_ST_UNLOCKED)
1981 handle_callback(gl, LM_ST_UNLOCKED);
1982 gfs2_glmutex_unlock(gl);
1983 }
1984}
1985
1986/**
1987 * gfs2_gl_hash_clear - Empty out the glock hash table
1988 * @sdp: the filesystem
1989 * @wait: wait until it's all gone
1990 *
1991 * Called when unmounting the filesystem, or when inter-node lock manager
1992 * requests DROPLOCKS because it is running out of capacity.
1993 */
1994
1995void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
1996{
1997 unsigned long t;
1998 unsigned int x;
1999 int cont;
2000
2001 t = jiffies;
2002
2003 for (;;) {
2004 cont = 0;
2005 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2006 if (examine_bucket(clear_glock, sdp, x))
2007 cont = 1;
2008 }
2009
2010 if (!wait || !cont)
2011 break;
2012
2013 if (time_after_eq(jiffies,
2014 t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
2015 fs_warn(sdp, "Unmount seems to be stalled. "
2016 "Dumping lock state...\n");
2017 gfs2_dump_lockstate(sdp);
2018 t = jiffies;
2019 }
2020
2021 invalidate_inodes(sdp->sd_vfs);
2022 msleep(10);
2023 }
2024}
2025
2026/*
2027 * Diagnostic routines to help debug distributed deadlock
2028 */
2029
2030/**
2031 * dump_holder - print information about a glock holder
2032 * @str: a string naming the type of holder
2033 * @gh: the glock holder
2034 *
2035 * Returns: 0 on success, -ENOBUFS when we run out of space
2036 */
2037
2038static int dump_holder(char *str, struct gfs2_holder *gh)
2039{
2040 unsigned int x;
2041 int error = -ENOBUFS;
2042
2043 printk(KERN_INFO " %s\n", str);
2044 printk(KERN_INFO " owner = %ld\n",
2045 (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
2046 printk(KERN_INFO " gh_state = %u\n", gh->gh_state);
2047 printk(KERN_INFO " gh_flags =");
2048 for (x = 0; x < 32; x++)
2049 if (gh->gh_flags & (1 << x))
2050 printk(" %u", x);
2051 printk(" \n");
2052 printk(KERN_INFO " error = %d\n", gh->gh_error);
2053 printk(KERN_INFO " gh_iflags =");
2054 for (x = 0; x < 32; x++)
2055 if (test_bit(x, &gh->gh_iflags))
2056 printk(" %u", x);
2057 printk(" \n");
2058 print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip);
2059
2060 error = 0;
2061
2062 return error;
2063}
2064
2065/**
2066 * dump_inode - print information about an inode
2067 * @ip: the inode
2068 *
2069 * Returns: 0 on success, -ENOBUFS when we run out of space
2070 */
2071
2072static int dump_inode(struct gfs2_inode *ip)
2073{
2074 unsigned int x;
2075 int error = -ENOBUFS;
2076
2077 printk(KERN_INFO " Inode:\n");
2078 printk(KERN_INFO " num = %llu %llu\n",
2079 (unsigned long long)ip->i_num.no_formal_ino,
2080 (unsigned long long)ip->i_num.no_addr);
2081 printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode));
2082 printk(KERN_INFO " i_flags =");
2083 for (x = 0; x < 32; x++)
2084 if (test_bit(x, &ip->i_flags))
2085 printk(" %u", x);
2086 printk(" \n");
2087
2088 error = 0;
2089
2090 return error;
2091}
2092
2093/**
2094 * dump_glock - print information about a glock
2095 * @gl: the glock
2096 * @count: where we are in the buffer
2097 *
2098 * Returns: 0 on success, -ENOBUFS when we run out of space
2099 */
2100
2101static int dump_glock(struct gfs2_glock *gl)
2102{
2103 struct gfs2_holder *gh;
2104 unsigned int x;
2105 int error = -ENOBUFS;
2106
2107 spin_lock(&gl->gl_spin);
2108
2109 printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type,
2110 (unsigned long long)gl->gl_name.ln_number);
2111 printk(KERN_INFO " gl_flags =");
2112 for (x = 0; x < 32; x++) {
2113 if (test_bit(x, &gl->gl_flags))
2114 printk(" %u", x);
2115 }
2116 printk(" \n");
2117 printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref));
2118 printk(KERN_INFO " gl_state = %u\n", gl->gl_state);
2119 printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm);
2120 print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip);
2121 printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
2122 printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
2123 printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
2124 printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no");
2125 printk(KERN_INFO " le = %s\n",
2126 (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
2127 printk(KERN_INFO " reclaim = %s\n",
2128 (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
2129 if (gl->gl_aspace)
2130 printk(KERN_INFO " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace,
2131 gl->gl_aspace->i_mapping->nrpages);
2132 else
2133 printk(KERN_INFO " aspace = no\n");
2134 printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count));
2135 if (gl->gl_req_gh) {
2136 error = dump_holder("Request", gl->gl_req_gh);
2137 if (error)
2138 goto out;
2139 }
2140 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
2141 error = dump_holder("Holder", gh);
2142 if (error)
2143 goto out;
2144 }
2145 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
2146 error = dump_holder("Waiter1", gh);
2147 if (error)
2148 goto out;
2149 }
2150 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
2151 error = dump_holder("Waiter2", gh);
2152 if (error)
2153 goto out;
2154 }
2155 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
2156 error = dump_holder("Waiter3", gh);
2157 if (error)
2158 goto out;
2159 }
2160 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
2161 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
2162 list_empty(&gl->gl_holders)) {
2163 error = dump_inode(gl->gl_object);
2164 if (error)
2165 goto out;
2166 } else {
2167 error = -ENOBUFS;
2168 printk(KERN_INFO " Inode: busy\n");
2169 }
2170 }
2171
2172 error = 0;
2173
2174out:
2175 spin_unlock(&gl->gl_spin);
2176 return error;
2177}
2178
2179/**
2180 * gfs2_dump_lockstate - print out the current lockstate
2181 * @sdp: the filesystem
2182 * @ub: the buffer to copy the information into
2183 *
2184 * If @ub is NULL, dump the lockstate to the console.
2185 *
2186 */
2187
2188static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
2189{
2190 struct gfs2_glock *gl;
2191 struct hlist_node *h;
2192 unsigned int x;
2193 int error = 0;
2194
2195 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2196
2197 read_lock(gl_lock_addr(x));
2198
2199 hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
2200 if (gl->gl_sbd != sdp)
2201 continue;
2202
2203 error = dump_glock(gl);
2204 if (error)
2205 break;
2206 }
2207
2208 read_unlock(gl_lock_addr(x));
2209
2210 if (error)
2211 break;
2212 }
2213
2214
2215 return error;
2216}
2217
2218int __init gfs2_glock_init(void)
2219{
2220 unsigned i;
2221 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
2222 INIT_HLIST_HEAD(&gl_hash_table[i].hb_list);
2223 }
2224#ifdef GL_HASH_LOCK_SZ
2225 for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
2226 rwlock_init(&gl_hash_locks[i]);
2227 }
2228#endif
2229 return 0;
2230}
2231
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
new file mode 100644
index 000000000000..2b2a889ee2cc
--- /dev/null
+++ b/fs/gfs2/glock.h
@@ -0,0 +1,153 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __GLOCK_DOT_H__
11#define __GLOCK_DOT_H__
12
13#include "incore.h"
14
15/* Flags for lock requests; used in gfs2_holder gh_flag field.
16 From lm_interface.h:
17#define LM_FLAG_TRY 0x00000001
18#define LM_FLAG_TRY_1CB 0x00000002
19#define LM_FLAG_NOEXP 0x00000004
20#define LM_FLAG_ANY 0x00000008
21#define LM_FLAG_PRIORITY 0x00000010 */
22
23#define GL_LOCAL_EXCL 0x00000020
24#define GL_ASYNC 0x00000040
25#define GL_EXACT 0x00000080
26#define GL_SKIP 0x00000100
27#define GL_ATIME 0x00000200
28#define GL_NOCACHE 0x00000400
29#define GL_NOCANCEL 0x00001000
30#define GL_AOP 0x00004000
31#define GL_DUMP 0x00008000
32
33#define GLR_TRYFAILED 13
34#define GLR_CANCELED 14
35
36static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
37{
38 struct gfs2_holder *gh;
39 int locked = 0;
40
41 /* Look in glock's list of holders for one with current task as owner */
42 spin_lock(&gl->gl_spin);
43 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
44 if (gh->gh_owner == current) {
45 locked = 1;
46 break;
47 }
48 }
49 spin_unlock(&gl->gl_spin);
50
51 return locked;
52}
53
54static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
55{
56 return gl->gl_state == LM_ST_EXCLUSIVE;
57}
58
59static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
60{
61 return gl->gl_state == LM_ST_DEFERRED;
62}
63
64static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
65{
66 return gl->gl_state == LM_ST_SHARED;
67}
68
69static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
70{
71 int ret;
72 spin_lock(&gl->gl_spin);
73 ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
74 spin_unlock(&gl->gl_spin);
75 return ret;
76}
77
78int gfs2_glock_get(struct gfs2_sbd *sdp,
79 u64 number, const struct gfs2_glock_operations *glops,
80 int create, struct gfs2_glock **glp);
81void gfs2_glock_hold(struct gfs2_glock *gl);
82int gfs2_glock_put(struct gfs2_glock *gl);
83void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
84 struct gfs2_holder *gh);
85void gfs2_holder_reinit(unsigned int state, unsigned flags,
86 struct gfs2_holder *gh);
87void gfs2_holder_uninit(struct gfs2_holder *gh);
88
89void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
90void gfs2_glock_drop_th(struct gfs2_glock *gl);
91
92int gfs2_glock_nq(struct gfs2_holder *gh);
93int gfs2_glock_poll(struct gfs2_holder *gh);
94int gfs2_glock_wait(struct gfs2_holder *gh);
95void gfs2_glock_dq(struct gfs2_holder *gh);
96
97int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
98
99void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
100int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
101 u64 number, const struct gfs2_glock_operations *glops,
102 unsigned int state, int flags, struct gfs2_holder *gh);
103
104int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
105void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
106void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
107
108void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
109 const struct gfs2_glock_operations *glops,
110 unsigned int state, int flags);
111void gfs2_glock_inode_squish(struct inode *inode);
112
113/**
114 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
115 * @gl: the glock
116 * @state: the state we're requesting
117 * @flags: the modifier flags
118 * @gh: the holder structure
119 *
120 * Returns: 0, GLR_*, or errno
121 */
122
123static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
124 unsigned int state, int flags,
125 struct gfs2_holder *gh)
126{
127 int error;
128
129 gfs2_holder_init(gl, state, flags, gh);
130
131 error = gfs2_glock_nq(gh);
132 if (error)
133 gfs2_holder_uninit(gh);
134
135 return error;
136}
137
138/* Lock Value Block functions */
139
140int gfs2_lvb_hold(struct gfs2_glock *gl);
141void gfs2_lvb_unhold(struct gfs2_glock *gl);
142
143void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
144
145void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
146void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
147
148void gfs2_scand_internal(struct gfs2_sbd *sdp);
149void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
150
151int __init gfs2_glock_init(void);
152
153#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
new file mode 100644
index 000000000000..ef1492e2d445
--- /dev/null
+++ b/fs/gfs2/glops.c
@@ -0,0 +1,564 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "bmap.h"
21#include "glock.h"
22#include "glops.h"
23#include "inode.h"
24#include "log.h"
25#include "meta_io.h"
26#include "recovery.h"
27#include "rgrp.h"
28#include "util.h"
29
30
31/**
32 * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
33 * @gl: the glock
34 *
35 */
36
37static void gfs2_pte_inval(struct gfs2_glock *gl)
38{
39 struct gfs2_inode *ip;
40 struct inode *inode;
41
42 ip = gl->gl_object;
43 inode = &ip->i_inode;
44 if (!ip || !S_ISREG(ip->i_di.di_mode))
45 return;
46
47 if (!test_bit(GIF_PAGED, &ip->i_flags))
48 return;
49
50 unmap_shared_mapping_range(inode->i_mapping, 0, 0);
51
52 if (test_bit(GIF_SW_PAGED, &ip->i_flags))
53 set_bit(GLF_DIRTY, &gl->gl_flags);
54
55 clear_bit(GIF_SW_PAGED, &ip->i_flags);
56}
57
58/**
59 * gfs2_page_inval - Invalidate all pages associated with a glock
60 * @gl: the glock
61 *
62 */
63
64static void gfs2_page_inval(struct gfs2_glock *gl)
65{
66 struct gfs2_inode *ip;
67 struct inode *inode;
68
69 ip = gl->gl_object;
70 inode = &ip->i_inode;
71 if (!ip || !S_ISREG(ip->i_di.di_mode))
72 return;
73
74 truncate_inode_pages(inode->i_mapping, 0);
75 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages);
76 clear_bit(GIF_PAGED, &ip->i_flags);
77}
78
79/**
80 * gfs2_page_wait - Wait for writeback of data
81 * @gl: the glock
82 *
83 * Syncs data (not metadata) for a regular file.
84 * No-op for all other types.
85 */
86
87static void gfs2_page_wait(struct gfs2_glock *gl)
88{
89 struct gfs2_inode *ip = gl->gl_object;
90 struct inode *inode = &ip->i_inode;
91 struct address_space *mapping = inode->i_mapping;
92 int error;
93
94 if (!S_ISREG(ip->i_di.di_mode))
95 return;
96
97 error = filemap_fdatawait(mapping);
98
99 /* Put back any errors cleared by filemap_fdatawait()
100 so they can be caught by someone who can pass them
101 up to user space. */
102
103 if (error == -ENOSPC)
104 set_bit(AS_ENOSPC, &mapping->flags);
105 else if (error)
106 set_bit(AS_EIO, &mapping->flags);
107
108}
109
110static void gfs2_page_writeback(struct gfs2_glock *gl)
111{
112 struct gfs2_inode *ip = gl->gl_object;
113 struct inode *inode = &ip->i_inode;
114 struct address_space *mapping = inode->i_mapping;
115
116 if (!S_ISREG(ip->i_di.di_mode))
117 return;
118
119 filemap_fdatawrite(mapping);
120}
121
122/**
123 * meta_go_sync - sync out the metadata for this glock
124 * @gl: the glock
125 * @flags: DIO_*
126 *
127 * Called when demoting or unlocking an EX glock. We must flush
128 * to disk all dirty buffers/pages relating to this glock, and must not
129 * not return to caller to demote/unlock the glock until I/O is complete.
130 */
131
132static void meta_go_sync(struct gfs2_glock *gl, int flags)
133{
134 if (!(flags & DIO_METADATA))
135 return;
136
137 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
138 gfs2_log_flush(gl->gl_sbd, gl);
139 gfs2_meta_sync(gl);
140 if (flags & DIO_RELEASE)
141 gfs2_ail_empty_gl(gl);
142 }
143
144}
145
146/**
147 * meta_go_inval - invalidate the metadata for this glock
148 * @gl: the glock
149 * @flags:
150 *
151 */
152
153static void meta_go_inval(struct gfs2_glock *gl, int flags)
154{
155 if (!(flags & DIO_METADATA))
156 return;
157
158 gfs2_meta_inval(gl);
159 gl->gl_vn++;
160}
161
162/**
163 * inode_go_xmote_th - promote/demote a glock
164 * @gl: the glock
165 * @state: the requested state
166 * @flags:
167 *
168 */
169
170static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
171 int flags)
172{
173 if (gl->gl_state != LM_ST_UNLOCKED)
174 gfs2_pte_inval(gl);
175 gfs2_glock_xmote_th(gl, state, flags);
176}
177
178/**
179 * inode_go_xmote_bh - After promoting/demoting a glock
180 * @gl: the glock
181 *
182 */
183
184static void inode_go_xmote_bh(struct gfs2_glock *gl)
185{
186 struct gfs2_holder *gh = gl->gl_req_gh;
187 struct buffer_head *bh;
188 int error;
189
190 if (gl->gl_state != LM_ST_UNLOCKED &&
191 (!gh || !(gh->gh_flags & GL_SKIP))) {
192 error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh);
193 if (!error)
194 brelse(bh);
195 }
196}
197
198/**
199 * inode_go_drop_th - unlock a glock
200 * @gl: the glock
201 *
202 * Invoked from rq_demote().
203 * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
204 * is being purged from our node's glock cache; we're dropping lock.
205 */
206
207static void inode_go_drop_th(struct gfs2_glock *gl)
208{
209 gfs2_pte_inval(gl);
210 gfs2_glock_drop_th(gl);
211}
212
213/**
214 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
215 * @gl: the glock protecting the inode
216 * @flags:
217 *
218 */
219
220static void inode_go_sync(struct gfs2_glock *gl, int flags)
221{
222 int meta = (flags & DIO_METADATA);
223 int data = (flags & DIO_DATA);
224
225 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
226 if (meta && data) {
227 gfs2_page_writeback(gl);
228 gfs2_log_flush(gl->gl_sbd, gl);
229 gfs2_meta_sync(gl);
230 gfs2_page_wait(gl);
231 clear_bit(GLF_DIRTY, &gl->gl_flags);
232 } else if (meta) {
233 gfs2_log_flush(gl->gl_sbd, gl);
234 gfs2_meta_sync(gl);
235 } else if (data) {
236 gfs2_page_writeback(gl);
237 gfs2_page_wait(gl);
238 }
239 if (flags & DIO_RELEASE)
240 gfs2_ail_empty_gl(gl);
241 }
242}
243
244/**
245 * inode_go_inval - prepare a inode glock to be released
246 * @gl: the glock
247 * @flags:
248 *
249 */
250
251static void inode_go_inval(struct gfs2_glock *gl, int flags)
252{
253 int meta = (flags & DIO_METADATA);
254 int data = (flags & DIO_DATA);
255
256 if (meta) {
257 gfs2_meta_inval(gl);
258 gl->gl_vn++;
259 }
260 if (data)
261 gfs2_page_inval(gl);
262}
263
264/**
265 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
266 * @gl: the glock
267 *
268 * Returns: 1 if it's ok
269 */
270
271static int inode_go_demote_ok(struct gfs2_glock *gl)
272{
273 struct gfs2_sbd *sdp = gl->gl_sbd;
274 int demote = 0;
275
276 if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages)
277 demote = 1;
278 else if (!sdp->sd_args.ar_localcaching &&
279 time_after_eq(jiffies, gl->gl_stamp +
280 gfs2_tune_get(sdp, gt_demote_secs) * HZ))
281 demote = 1;
282
283 return demote;
284}
285
286/**
287 * inode_go_lock - operation done after an inode lock is locked by a process
288 * @gl: the glock
289 * @flags:
290 *
291 * Returns: errno
292 */
293
294static int inode_go_lock(struct gfs2_holder *gh)
295{
296 struct gfs2_glock *gl = gh->gh_gl;
297 struct gfs2_inode *ip = gl->gl_object;
298 int error = 0;
299
300 if (!ip)
301 return 0;
302
303 if (ip->i_vn != gl->gl_vn) {
304 error = gfs2_inode_refresh(ip);
305 if (error)
306 return error;
307 gfs2_inode_attr_in(ip);
308 }
309
310 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
311 (gl->gl_state == LM_ST_EXCLUSIVE) &&
312 (gh->gh_flags & GL_LOCAL_EXCL))
313 error = gfs2_truncatei_resume(ip);
314
315 return error;
316}
317
318/**
319 * inode_go_unlock - operation done before an inode lock is unlocked by a
320 * process
321 * @gl: the glock
322 * @flags:
323 *
324 */
325
326static void inode_go_unlock(struct gfs2_holder *gh)
327{
328 struct gfs2_glock *gl = gh->gh_gl;
329 struct gfs2_inode *ip = gl->gl_object;
330
331 if (ip == NULL)
332 return;
333 if (test_bit(GLF_DIRTY, &gl->gl_flags))
334 gfs2_inode_attr_in(ip);
335 gfs2_meta_cache_flush(ip);
336}
337
338/**
339 * inode_greedy -
340 * @gl: the glock
341 *
342 */
343
344static void inode_greedy(struct gfs2_glock *gl)
345{
346 struct gfs2_sbd *sdp = gl->gl_sbd;
347 struct gfs2_inode *ip = gl->gl_object;
348 unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
349 unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
350 unsigned int new_time;
351
352 spin_lock(&ip->i_spin);
353
354 if (time_after(ip->i_last_pfault + quantum, jiffies)) {
355 new_time = ip->i_greedy + quantum;
356 if (new_time > max)
357 new_time = max;
358 } else {
359 new_time = ip->i_greedy - quantum;
360 if (!new_time || new_time > max)
361 new_time = 1;
362 }
363
364 ip->i_greedy = new_time;
365
366 spin_unlock(&ip->i_spin);
367
368 iput(&ip->i_inode);
369}
370
371/**
372 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
373 * @gl: the glock
374 *
375 * Returns: 1 if it's ok
376 */
377
378static int rgrp_go_demote_ok(struct gfs2_glock *gl)
379{
380 return !gl->gl_aspace->i_mapping->nrpages;
381}
382
383/**
384 * rgrp_go_lock - operation done after an rgrp lock is locked by
385 * a first holder on this node.
386 * @gl: the glock
387 * @flags:
388 *
389 * Returns: errno
390 */
391
392static int rgrp_go_lock(struct gfs2_holder *gh)
393{
394 return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
395}
396
397/**
398 * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
399 * a last holder on this node.
400 * @gl: the glock
401 * @flags:
402 *
403 */
404
405static void rgrp_go_unlock(struct gfs2_holder *gh)
406{
407 gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
408}
409
410/**
411 * trans_go_xmote_th - promote/demote the transaction glock
412 * @gl: the glock
413 * @state: the requested state
414 * @flags:
415 *
416 */
417
418static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
419 int flags)
420{
421 struct gfs2_sbd *sdp = gl->gl_sbd;
422
423 if (gl->gl_state != LM_ST_UNLOCKED &&
424 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
425 gfs2_meta_syncfs(sdp);
426 gfs2_log_shutdown(sdp);
427 }
428
429 gfs2_glock_xmote_th(gl, state, flags);
430}
431
432/**
433 * trans_go_xmote_bh - After promoting/demoting the transaction glock
434 * @gl: the glock
435 *
436 */
437
438static void trans_go_xmote_bh(struct gfs2_glock *gl)
439{
440 struct gfs2_sbd *sdp = gl->gl_sbd;
441 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
442 struct gfs2_glock *j_gl = ip->i_gl;
443 struct gfs2_log_header head;
444 int error;
445
446 if (gl->gl_state != LM_ST_UNLOCKED &&
447 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
448 gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode));
449 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
450
451 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
452 if (error)
453 gfs2_consist(sdp);
454 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
455 gfs2_consist(sdp);
456
457 /* Initialize some head of the log stuff */
458 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
459 sdp->sd_log_sequence = head.lh_sequence + 1;
460 gfs2_log_pointers_init(sdp, head.lh_blkno);
461 }
462 }
463}
464
465/**
466 * trans_go_drop_th - unlock the transaction glock
467 * @gl: the glock
468 *
469 * We want to sync the device even with localcaching. Remember
470 * that localcaching journal replay only marks buffers dirty.
471 */
472
473static void trans_go_drop_th(struct gfs2_glock *gl)
474{
475 struct gfs2_sbd *sdp = gl->gl_sbd;
476
477 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
478 gfs2_meta_syncfs(sdp);
479 gfs2_log_shutdown(sdp);
480 }
481
482 gfs2_glock_drop_th(gl);
483}
484
485/**
486 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
487 * @gl: the glock
488 *
489 * Returns: 1 if it's ok
490 */
491
492static int quota_go_demote_ok(struct gfs2_glock *gl)
493{
494 return !atomic_read(&gl->gl_lvb_count);
495}
496
497const struct gfs2_glock_operations gfs2_meta_glops = {
498 .go_xmote_th = gfs2_glock_xmote_th,
499 .go_drop_th = gfs2_glock_drop_th,
500 .go_type = LM_TYPE_META,
501};
502
503const struct gfs2_glock_operations gfs2_inode_glops = {
504 .go_xmote_th = inode_go_xmote_th,
505 .go_xmote_bh = inode_go_xmote_bh,
506 .go_drop_th = inode_go_drop_th,
507 .go_sync = inode_go_sync,
508 .go_inval = inode_go_inval,
509 .go_demote_ok = inode_go_demote_ok,
510 .go_lock = inode_go_lock,
511 .go_unlock = inode_go_unlock,
512 .go_greedy = inode_greedy,
513 .go_type = LM_TYPE_INODE,
514};
515
516const struct gfs2_glock_operations gfs2_rgrp_glops = {
517 .go_xmote_th = gfs2_glock_xmote_th,
518 .go_drop_th = gfs2_glock_drop_th,
519 .go_sync = meta_go_sync,
520 .go_inval = meta_go_inval,
521 .go_demote_ok = rgrp_go_demote_ok,
522 .go_lock = rgrp_go_lock,
523 .go_unlock = rgrp_go_unlock,
524 .go_type = LM_TYPE_RGRP,
525};
526
527const struct gfs2_glock_operations gfs2_trans_glops = {
528 .go_xmote_th = trans_go_xmote_th,
529 .go_xmote_bh = trans_go_xmote_bh,
530 .go_drop_th = trans_go_drop_th,
531 .go_type = LM_TYPE_NONDISK,
532};
533
534const struct gfs2_glock_operations gfs2_iopen_glops = {
535 .go_xmote_th = gfs2_glock_xmote_th,
536 .go_drop_th = gfs2_glock_drop_th,
537 .go_type = LM_TYPE_IOPEN,
538};
539
540const struct gfs2_glock_operations gfs2_flock_glops = {
541 .go_xmote_th = gfs2_glock_xmote_th,
542 .go_drop_th = gfs2_glock_drop_th,
543 .go_type = LM_TYPE_FLOCK,
544};
545
546const struct gfs2_glock_operations gfs2_nondisk_glops = {
547 .go_xmote_th = gfs2_glock_xmote_th,
548 .go_drop_th = gfs2_glock_drop_th,
549 .go_type = LM_TYPE_NONDISK,
550};
551
552const struct gfs2_glock_operations gfs2_quota_glops = {
553 .go_xmote_th = gfs2_glock_xmote_th,
554 .go_drop_th = gfs2_glock_drop_th,
555 .go_demote_ok = quota_go_demote_ok,
556 .go_type = LM_TYPE_QUOTA,
557};
558
559const struct gfs2_glock_operations gfs2_journal_glops = {
560 .go_xmote_th = gfs2_glock_xmote_th,
561 .go_drop_th = gfs2_glock_drop_th,
562 .go_type = LM_TYPE_JOURNAL,
563};
564
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
new file mode 100644
index 000000000000..a1d9b5b024e6
--- /dev/null
+++ b/fs/gfs2/glops.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __GLOPS_DOT_H__
11#define __GLOPS_DOT_H__
12
13#include "incore.h"
14
15extern const struct gfs2_glock_operations gfs2_meta_glops;
16extern const struct gfs2_glock_operations gfs2_inode_glops;
17extern const struct gfs2_glock_operations gfs2_rgrp_glops;
18extern const struct gfs2_glock_operations gfs2_trans_glops;
19extern const struct gfs2_glock_operations gfs2_iopen_glops;
20extern const struct gfs2_glock_operations gfs2_flock_glops;
21extern const struct gfs2_glock_operations gfs2_nondisk_glops;
22extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops;
24
25#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
new file mode 100644
index 000000000000..118dc693d111
--- /dev/null
+++ b/fs/gfs2/incore.h
@@ -0,0 +1,634 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __INCORE_DOT_H__
11#define __INCORE_DOT_H__
12
13#include <linux/fs.h>
14
15#define DIO_WAIT 0x00000010
16#define DIO_METADATA 0x00000020
17#define DIO_DATA 0x00000040
18#define DIO_RELEASE 0x00000080
19#define DIO_ALL 0x00000100
20
21struct gfs2_log_operations;
22struct gfs2_log_element;
23struct gfs2_holder;
24struct gfs2_glock;
25struct gfs2_quota_data;
26struct gfs2_trans;
27struct gfs2_ail;
28struct gfs2_jdesc;
29struct gfs2_sbd;
30
31typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
32
33/*
34 * Structure of operations that are associated with each
35 * type of element in the log.
36 */
37
38struct gfs2_log_operations {
39 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
40 void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
41 void (*lo_before_commit) (struct gfs2_sbd *sdp);
42 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
43 void (*lo_before_scan) (struct gfs2_jdesc *jd,
44 struct gfs2_log_header *head, int pass);
45 int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
46 struct gfs2_log_descriptor *ld, __be64 *ptr,
47 int pass);
48 void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
49 const char *lo_name;
50};
51
52struct gfs2_log_element {
53 struct list_head le_list;
54 const struct gfs2_log_operations *le_ops;
55};
56
57struct gfs2_bitmap {
58 struct buffer_head *bi_bh;
59 char *bi_clone;
60 u32 bi_offset;
61 u32 bi_start;
62 u32 bi_len;
63};
64
65struct gfs2_rgrpd {
66 struct list_head rd_list; /* Link with superblock */
67 struct list_head rd_list_mru;
68 struct list_head rd_recent; /* Recently used rgrps */
69 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
70 struct gfs2_rindex rd_ri;
71 struct gfs2_rgrp rd_rg;
72 u64 rd_rg_vn;
73 struct gfs2_bitmap *rd_bits;
74 unsigned int rd_bh_count;
75 struct mutex rd_mutex;
76 u32 rd_free_clone;
77 struct gfs2_log_element rd_le;
78 u32 rd_last_alloc_data;
79 u32 rd_last_alloc_meta;
80 struct gfs2_sbd *rd_sbd;
81};
82
83enum gfs2_state_bits {
84 BH_Pinned = BH_PrivateStart,
85 BH_Escaped = BH_PrivateStart + 1,
86};
87
88BUFFER_FNS(Pinned, pinned)
89TAS_BUFFER_FNS(Pinned, pinned)
90BUFFER_FNS(Escaped, escaped)
91TAS_BUFFER_FNS(Escaped, escaped)
92
93struct gfs2_bufdata {
94 struct buffer_head *bd_bh;
95 struct gfs2_glock *bd_gl;
96
97 struct list_head bd_list_tr;
98 struct gfs2_log_element bd_le;
99
100 struct gfs2_ail *bd_ail;
101 struct list_head bd_ail_st_list;
102 struct list_head bd_ail_gl_list;
103};
104
105struct gfs2_glock_operations {
106 void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
107 int flags);
108 void (*go_xmote_bh) (struct gfs2_glock * gl);
109 void (*go_drop_th) (struct gfs2_glock * gl);
110 void (*go_drop_bh) (struct gfs2_glock * gl);
111 void (*go_sync) (struct gfs2_glock * gl, int flags);
112 void (*go_inval) (struct gfs2_glock * gl, int flags);
113 int (*go_demote_ok) (struct gfs2_glock * gl);
114 int (*go_lock) (struct gfs2_holder * gh);
115 void (*go_unlock) (struct gfs2_holder * gh);
116 void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
117 void (*go_greedy) (struct gfs2_glock * gl);
118 const int go_type;
119};
120
121enum {
122 /* Actions */
123 HIF_MUTEX = 0,
124 HIF_PROMOTE = 1,
125 HIF_DEMOTE = 2,
126 HIF_GREEDY = 3,
127
128 /* States */
129 HIF_ALLOCED = 4,
130 HIF_DEALLOC = 5,
131 HIF_HOLDER = 6,
132 HIF_FIRST = 7,
133 HIF_ABORTED = 9,
134};
135
136struct gfs2_holder {
137 struct list_head gh_list;
138
139 struct gfs2_glock *gh_gl;
140 struct task_struct *gh_owner;
141 unsigned int gh_state;
142 unsigned gh_flags;
143
144 int gh_error;
145 unsigned long gh_iflags;
146 struct completion gh_wait;
147 unsigned long gh_ip;
148};
149
150enum {
151 GLF_LOCK = 1,
152 GLF_STICKY = 2,
153 GLF_PREFETCH = 3,
154 GLF_DIRTY = 5,
155 GLF_SKIP_WAITERS2 = 6,
156 GLF_GREEDY = 7,
157};
158
159struct gfs2_glock {
160 struct hlist_node gl_list;
161 unsigned long gl_flags; /* GLF_... */
162 struct lm_lockname gl_name;
163 atomic_t gl_ref;
164
165 spinlock_t gl_spin;
166
167 unsigned int gl_state;
168 unsigned int gl_hash;
169 struct task_struct *gl_owner;
170 unsigned long gl_ip;
171 struct list_head gl_holders;
172 struct list_head gl_waiters1; /* HIF_MUTEX */
173 struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
174 struct list_head gl_waiters3; /* HIF_PROMOTE */
175
176 const struct gfs2_glock_operations *gl_ops;
177
178 struct gfs2_holder *gl_req_gh;
179 gfs2_glop_bh_t gl_req_bh;
180
181 void *gl_lock;
182 char *gl_lvb;
183 atomic_t gl_lvb_count;
184
185 u64 gl_vn;
186 unsigned long gl_stamp;
187 void *gl_object;
188
189 struct list_head gl_reclaim;
190
191 struct gfs2_sbd *gl_sbd;
192
193 struct inode *gl_aspace;
194 struct gfs2_log_element gl_le;
195 struct list_head gl_ail_list;
196 atomic_t gl_ail_count;
197};
198
199struct gfs2_alloc {
200 /* Quota stuff */
201
202 struct gfs2_quota_data *al_qd[2*MAXQUOTAS];
203 struct gfs2_holder al_qd_ghs[2*MAXQUOTAS];
204 unsigned int al_qd_num;
205
206 u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */
207 u32 al_alloced; /* Filled in by gfs2_alloc_*() */
208
209 /* Filled in by gfs2_inplace_reserve() */
210
211 unsigned int al_line;
212 char *al_file;
213 struct gfs2_holder al_ri_gh;
214 struct gfs2_holder al_rgd_gh;
215 struct gfs2_rgrpd *al_rgd;
216
217};
218
219enum {
220 GIF_QD_LOCKED = 1,
221 GIF_PAGED = 2,
222 GIF_SW_PAGED = 3,
223};
224
225struct gfs2_inode {
226 struct inode i_inode;
227 struct gfs2_inum i_num;
228
229 unsigned long i_flags; /* GIF_... */
230
231 u64 i_vn;
232 struct gfs2_dinode i_di; /* To be replaced by ref to block */
233
234 struct gfs2_glock *i_gl; /* Move into i_gh? */
235 struct gfs2_holder i_iopen_gh;
236 struct gfs2_holder i_gh; /* for prepare/commit_write only */
237 struct gfs2_alloc i_alloc;
238 u64 i_last_rg_alloc;
239
240 spinlock_t i_spin;
241 struct rw_semaphore i_rw_mutex;
242 unsigned int i_greedy;
243 unsigned long i_last_pfault;
244
245 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
246};
247
248/*
249 * Since i_inode is the first element of struct gfs2_inode,
250 * this is effectively a cast.
251 */
252static inline struct gfs2_inode *GFS2_I(struct inode *inode)
253{
254 return container_of(inode, struct gfs2_inode, i_inode);
255}
256
257/* To be removed? */
258static inline struct gfs2_sbd *GFS2_SB(struct inode *inode)
259{
260 return inode->i_sb->s_fs_info;
261}
262
263enum {
264 GFF_DID_DIRECT_ALLOC = 0,
265 GFF_EXLOCK = 1,
266};
267
268struct gfs2_file {
269 unsigned long f_flags; /* GFF_... */
270 struct mutex f_fl_mutex;
271 struct gfs2_holder f_fl_gh;
272};
273
274struct gfs2_revoke {
275 struct gfs2_log_element rv_le;
276 u64 rv_blkno;
277};
278
279struct gfs2_revoke_replay {
280 struct list_head rr_list;
281 u64 rr_blkno;
282 unsigned int rr_where;
283};
284
285enum {
286 QDF_USER = 0,
287 QDF_CHANGE = 1,
288 QDF_LOCKED = 2,
289};
290
291struct gfs2_quota_lvb {
292 __be32 qb_magic;
293 u32 __pad;
294 __be64 qb_limit; /* Hard limit of # blocks to alloc */
295 __be64 qb_warn; /* Warn user when alloc is above this # */
296 __be64 qb_value; /* Current # blocks allocated */
297};
298
299struct gfs2_quota_data {
300 struct list_head qd_list;
301 unsigned int qd_count;
302
303 u32 qd_id;
304 unsigned long qd_flags; /* QDF_... */
305
306 s64 qd_change;
307 s64 qd_change_sync;
308
309 unsigned int qd_slot;
310 unsigned int qd_slot_count;
311
312 struct buffer_head *qd_bh;
313 struct gfs2_quota_change *qd_bh_qc;
314 unsigned int qd_bh_count;
315
316 struct gfs2_glock *qd_gl;
317 struct gfs2_quota_lvb qd_qb;
318
319 u64 qd_sync_gen;
320 unsigned long qd_last_warn;
321 unsigned long qd_last_touched;
322};
323
324struct gfs2_log_buf {
325 struct list_head lb_list;
326 struct buffer_head *lb_bh;
327 struct buffer_head *lb_real;
328};
329
330struct gfs2_trans {
331 unsigned long tr_ip;
332
333 unsigned int tr_blocks;
334 unsigned int tr_revokes;
335 unsigned int tr_reserved;
336
337 struct gfs2_holder tr_t_gh;
338
339 int tr_touched;
340
341 unsigned int tr_num_buf;
342 unsigned int tr_num_buf_new;
343 unsigned int tr_num_buf_rm;
344 struct list_head tr_list_buf;
345
346 unsigned int tr_num_revoke;
347 unsigned int tr_num_revoke_rm;
348};
349
350struct gfs2_ail {
351 struct list_head ai_list;
352
353 unsigned int ai_first;
354 struct list_head ai_ail1_list;
355 struct list_head ai_ail2_list;
356
357 u64 ai_sync_gen;
358};
359
360struct gfs2_jdesc {
361 struct list_head jd_list;
362
363 struct inode *jd_inode;
364 unsigned int jd_jid;
365 int jd_dirty;
366
367 unsigned int jd_blocks;
368};
369
370#define GFS2_GLOCKD_DEFAULT 1
371#define GFS2_GLOCKD_MAX 16
372
373#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF
374#define GFS2_QUOTA_OFF 0
375#define GFS2_QUOTA_ACCOUNT 1
376#define GFS2_QUOTA_ON 2
377
378#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
379#define GFS2_DATA_WRITEBACK 1
380#define GFS2_DATA_ORDERED 2
381
382struct gfs2_args {
383 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
384 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
385 char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
386 int ar_spectator; /* Don't get a journal because we're always RO */
387 int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
388 int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
389 int ar_localcaching; /* Local-style caching (dangerous on multihost) */
390 int ar_debug; /* Oops on errors instead of trying to be graceful */
391 int ar_upgrade; /* Upgrade ondisk/multihost format */
392 unsigned int ar_num_glockd; /* Number of glockd threads */
393 int ar_posix_acl; /* Enable posix acls */
394 int ar_quota; /* off/account/on */
395 int ar_suiddir; /* suiddir support */
396 int ar_data; /* ordered/writeback */
397};
398
399struct gfs2_tune {
400 spinlock_t gt_spin;
401
402 unsigned int gt_ilimit;
403 unsigned int gt_ilimit_tries;
404 unsigned int gt_ilimit_min;
405 unsigned int gt_demote_secs; /* Cache retention for unheld glock */
406 unsigned int gt_incore_log_blocks;
407 unsigned int gt_log_flush_secs;
408 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
409
410 unsigned int gt_scand_secs;
411 unsigned int gt_recoverd_secs;
412 unsigned int gt_logd_secs;
413 unsigned int gt_quotad_secs;
414
415 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
416 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
417 unsigned int gt_quota_scale_num; /* Numerator */
418 unsigned int gt_quota_scale_den; /* Denominator */
419 unsigned int gt_quota_cache_secs;
420 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
421 unsigned int gt_atime_quantum; /* Min secs between atime updates */
422 unsigned int gt_new_files_jdata;
423 unsigned int gt_new_files_directio;
424 unsigned int gt_max_atomic_write; /* Split big writes into this size */
425 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
426 unsigned int gt_lockdump_size;
427 unsigned int gt_stall_secs; /* Detects trouble! */
428 unsigned int gt_complain_secs;
429 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
430 unsigned int gt_entries_per_readdir;
431 unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
432 unsigned int gt_greedy_default;
433 unsigned int gt_greedy_quantum;
434 unsigned int gt_greedy_max;
435 unsigned int gt_statfs_quantum;
436 unsigned int gt_statfs_slow;
437};
438
439enum {
440 SDF_JOURNAL_CHECKED = 0,
441 SDF_JOURNAL_LIVE = 1,
442 SDF_SHUTDOWN = 2,
443 SDF_NOATIME = 3,
444};
445
446#define GFS2_FSNAME_LEN 256
447
448struct gfs2_sbd {
449 struct super_block *sd_vfs;
450 struct super_block *sd_vfs_meta;
451 struct kobject sd_kobj;
452 unsigned long sd_flags; /* SDF_... */
453 struct gfs2_sb sd_sb;
454
455 /* Constants computed on mount */
456
457 u32 sd_fsb2bb;
458 u32 sd_fsb2bb_shift;
459 u32 sd_diptrs; /* Number of pointers in a dinode */
460 u32 sd_inptrs; /* Number of pointers in a indirect block */
461 u32 sd_jbsize; /* Size of a journaled data block */
462 u32 sd_hash_bsize; /* sizeof(exhash block) */
463 u32 sd_hash_bsize_shift;
464 u32 sd_hash_ptrs; /* Number of pointers in a hash block */
465 u32 sd_qc_per_block;
466 u32 sd_max_dirres; /* Max blocks needed to add a directory entry */
467 u32 sd_max_height; /* Max height of a file's metadata tree */
468 u64 sd_heightsize[GFS2_MAX_META_HEIGHT];
469 u32 sd_max_jheight; /* Max height of journaled file's meta tree */
470 u64 sd_jheightsize[GFS2_MAX_META_HEIGHT];
471
472 struct gfs2_args sd_args; /* Mount arguments */
473 struct gfs2_tune sd_tune; /* Filesystem tuning structure */
474
475 /* Lock Stuff */
476
477 struct lm_lockstruct sd_lockstruct;
478 struct list_head sd_reclaim_list;
479 spinlock_t sd_reclaim_lock;
480 wait_queue_head_t sd_reclaim_wq;
481 atomic_t sd_reclaim_count;
482 struct gfs2_holder sd_live_gh;
483 struct gfs2_glock *sd_rename_gl;
484 struct gfs2_glock *sd_trans_gl;
485
486 /* Inode Stuff */
487
488 struct inode *sd_master_dir;
489 struct inode *sd_jindex;
490 struct inode *sd_inum_inode;
491 struct inode *sd_statfs_inode;
492 struct inode *sd_ir_inode;
493 struct inode *sd_sc_inode;
494 struct inode *sd_qc_inode;
495 struct inode *sd_rindex;
496 struct inode *sd_quota_inode;
497
498 /* Inum stuff */
499
500 struct mutex sd_inum_mutex;
501
502 /* StatFS stuff */
503
504 spinlock_t sd_statfs_spin;
505 struct mutex sd_statfs_mutex;
506 struct gfs2_statfs_change sd_statfs_master;
507 struct gfs2_statfs_change sd_statfs_local;
508 unsigned long sd_statfs_sync_time;
509
510 /* Resource group stuff */
511
512 u64 sd_rindex_vn;
513 spinlock_t sd_rindex_spin;
514 struct mutex sd_rindex_mutex;
515 struct list_head sd_rindex_list;
516 struct list_head sd_rindex_mru_list;
517 struct list_head sd_rindex_recent_list;
518 struct gfs2_rgrpd *sd_rindex_forward;
519 unsigned int sd_rgrps;
520
521 /* Journal index stuff */
522
523 struct list_head sd_jindex_list;
524 spinlock_t sd_jindex_spin;
525 struct mutex sd_jindex_mutex;
526 unsigned int sd_journals;
527 unsigned long sd_jindex_refresh_time;
528
529 struct gfs2_jdesc *sd_jdesc;
530 struct gfs2_holder sd_journal_gh;
531 struct gfs2_holder sd_jinode_gh;
532
533 struct gfs2_holder sd_ir_gh;
534 struct gfs2_holder sd_sc_gh;
535 struct gfs2_holder sd_qc_gh;
536
537 /* Daemon stuff */
538
539 struct task_struct *sd_scand_process;
540 struct task_struct *sd_recoverd_process;
541 struct task_struct *sd_logd_process;
542 struct task_struct *sd_quotad_process;
543 struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
544 unsigned int sd_glockd_num;
545
546 /* Quota stuff */
547
548 struct list_head sd_quota_list;
549 atomic_t sd_quota_count;
550 spinlock_t sd_quota_spin;
551 struct mutex sd_quota_mutex;
552
553 unsigned int sd_quota_slots;
554 unsigned int sd_quota_chunks;
555 unsigned char **sd_quota_bitmap;
556
557 u64 sd_quota_sync_gen;
558 unsigned long sd_quota_sync_time;
559
560 /* Log stuff */
561
562 spinlock_t sd_log_lock;
563
564 unsigned int sd_log_blks_reserved;
565 unsigned int sd_log_commited_buf;
566 unsigned int sd_log_commited_revoke;
567
568 unsigned int sd_log_num_gl;
569 unsigned int sd_log_num_buf;
570 unsigned int sd_log_num_revoke;
571 unsigned int sd_log_num_rg;
572 unsigned int sd_log_num_databuf;
573 unsigned int sd_log_num_jdata;
574 unsigned int sd_log_num_hdrs;
575
576 struct list_head sd_log_le_gl;
577 struct list_head sd_log_le_buf;
578 struct list_head sd_log_le_revoke;
579 struct list_head sd_log_le_rg;
580 struct list_head sd_log_le_databuf;
581
582 unsigned int sd_log_blks_free;
583 struct mutex sd_log_reserve_mutex;
584
585 u64 sd_log_sequence;
586 unsigned int sd_log_head;
587 unsigned int sd_log_tail;
588 int sd_log_idle;
589
590 unsigned long sd_log_flush_time;
591 struct rw_semaphore sd_log_flush_lock;
592 struct list_head sd_log_flush_list;
593
594 unsigned int sd_log_flush_head;
595 u64 sd_log_flush_wrapped;
596
597 struct list_head sd_ail1_list;
598 struct list_head sd_ail2_list;
599 u64 sd_ail_sync_gen;
600
601 /* Replay stuff */
602
603 struct list_head sd_revoke_list;
604 unsigned int sd_replay_tail;
605
606 unsigned int sd_found_blocks;
607 unsigned int sd_found_revokes;
608 unsigned int sd_replayed_blocks;
609
610 /* For quiescing the filesystem */
611
612 struct gfs2_holder sd_freeze_gh;
613 struct mutex sd_freeze_lock;
614 unsigned int sd_freeze_count;
615
616 /* Counters */
617
618 atomic_t sd_glock_count;
619 atomic_t sd_glock_held_count;
620 atomic_t sd_inode_count;
621 atomic_t sd_reclaimed;
622
623 char sd_fsname[GFS2_FSNAME_LEN];
624 char sd_table_name[GFS2_FSNAME_LEN];
625 char sd_proto_name[GFS2_FSNAME_LEN];
626
627 /* Debugging crud */
628
629 unsigned long sd_last_warning;
630 struct vfsmount *sd_gfs2mnt;
631};
632
633#endif /* __INCORE_DOT_H__ */
634
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
new file mode 100644
index 000000000000..57a4c8b68587
--- /dev/null
+++ b/fs/gfs2/inode.c
@@ -0,0 +1,1338 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h>
19#include <linux/lm_interface.h>
20
21#include "gfs2.h"
22#include "incore.h"
23#include "acl.h"
24#include "bmap.h"
25#include "dir.h"
26#include "eattr.h"
27#include "glock.h"
28#include "glops.h"
29#include "inode.h"
30#include "log.h"
31#include "meta_io.h"
32#include "ops_address.h"
33#include "ops_file.h"
34#include "ops_inode.h"
35#include "quota.h"
36#include "rgrp.h"
37#include "trans.h"
38#include "util.h"
39
40/**
41 * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
42 * @ip: The GFS2 inode (with embedded disk inode data)
43 * @inode: The Linux VFS inode
44 *
45 */
46
47void gfs2_inode_attr_in(struct gfs2_inode *ip)
48{
49 struct inode *inode = &ip->i_inode;
50 struct gfs2_dinode *di = &ip->i_di;
51
52 inode->i_ino = ip->i_num.no_addr;
53
54 switch (di->di_mode & S_IFMT) {
55 case S_IFBLK:
56 case S_IFCHR:
57 inode->i_rdev = MKDEV(di->di_major, di->di_minor);
58 break;
59 default:
60 inode->i_rdev = 0;
61 break;
62 };
63
64 inode->i_mode = di->di_mode;
65 inode->i_nlink = di->di_nlink;
66 inode->i_uid = di->di_uid;
67 inode->i_gid = di->di_gid;
68 i_size_write(inode, di->di_size);
69 inode->i_atime.tv_sec = di->di_atime;
70 inode->i_mtime.tv_sec = di->di_mtime;
71 inode->i_ctime.tv_sec = di->di_ctime;
72 inode->i_atime.tv_nsec = 0;
73 inode->i_mtime.tv_nsec = 0;
74 inode->i_ctime.tv_nsec = 0;
75 inode->i_blocks = di->di_blocks <<
76 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
77
78 if (di->di_flags & GFS2_DIF_IMMUTABLE)
79 inode->i_flags |= S_IMMUTABLE;
80 else
81 inode->i_flags &= ~S_IMMUTABLE;
82
83 if (di->di_flags & GFS2_DIF_APPENDONLY)
84 inode->i_flags |= S_APPEND;
85 else
86 inode->i_flags &= ~S_APPEND;
87}
88
89/**
90 * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
91 * @ip: The GFS2 inode
92 *
93 * Only copy out the attributes that we want the VFS layer
94 * to be able to modify.
95 */
96
97void gfs2_inode_attr_out(struct gfs2_inode *ip)
98{
99 struct inode *inode = &ip->i_inode;
100 struct gfs2_dinode *di = &ip->i_di;
101 gfs2_assert_withdraw(GFS2_SB(inode),
102 (di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
103 di->di_mode = inode->i_mode;
104 di->di_uid = inode->i_uid;
105 di->di_gid = inode->i_gid;
106 di->di_atime = inode->i_atime.tv_sec;
107 di->di_mtime = inode->i_mtime.tv_sec;
108 di->di_ctime = inode->i_ctime.tv_sec;
109}
110
111static int iget_test(struct inode *inode, void *opaque)
112{
113 struct gfs2_inode *ip = GFS2_I(inode);
114 struct gfs2_inum *inum = opaque;
115
116 if (ip && ip->i_num.no_addr == inum->no_addr)
117 return 1;
118
119 return 0;
120}
121
122static int iget_set(struct inode *inode, void *opaque)
123{
124 struct gfs2_inode *ip = GFS2_I(inode);
125 struct gfs2_inum *inum = opaque;
126
127 ip->i_num = *inum;
128 return 0;
129}
130
131struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum)
132{
133 return ilookup5(sb, (unsigned long)inum->no_formal_ino,
134 iget_test, inum);
135}
136
137static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
138{
139 return iget5_locked(sb, (unsigned long)inum->no_formal_ino,
140 iget_test, iget_set, inum);
141}
142
143/**
144 * gfs2_inode_lookup - Lookup an inode
145 * @sb: The super block
146 * @inum: The inode number
147 * @type: The type of the inode
148 *
149 * Returns: A VFS inode, or an error
150 */
151
152struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned int type)
153{
154 struct inode *inode = gfs2_iget(sb, inum);
155 struct gfs2_inode *ip = GFS2_I(inode);
156 struct gfs2_glock *io_gl;
157 int error;
158
159 if (inode->i_state & I_NEW) {
160 struct gfs2_sbd *sdp = GFS2_SB(inode);
161 umode_t mode = DT2IF(type);
162 inode->i_private = ip;
163 inode->i_mode = mode;
164
165 if (S_ISREG(mode)) {
166 inode->i_op = &gfs2_file_iops;
167 inode->i_fop = &gfs2_file_fops;
168 inode->i_mapping->a_ops = &gfs2_file_aops;
169 } else if (S_ISDIR(mode)) {
170 inode->i_op = &gfs2_dir_iops;
171 inode->i_fop = &gfs2_dir_fops;
172 } else if (S_ISLNK(mode)) {
173 inode->i_op = &gfs2_symlink_iops;
174 } else {
175 inode->i_op = &gfs2_dev_iops;
176 }
177
178 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
179 if (unlikely(error))
180 goto fail;
181 ip->i_gl->gl_object = ip;
182
183 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
184 if (unlikely(error))
185 goto fail_put;
186
187 ip->i_vn = ip->i_gl->gl_vn - 1;
188 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
189 if (unlikely(error))
190 goto fail_iopen;
191
192 gfs2_glock_put(io_gl);
193 unlock_new_inode(inode);
194 }
195
196 return inode;
197fail_iopen:
198 gfs2_glock_put(io_gl);
199fail_put:
200 ip->i_gl->gl_object = NULL;
201 gfs2_glock_put(ip->i_gl);
202fail:
203 iput(inode);
204 return ERR_PTR(error);
205}
206
207/**
208 * gfs2_inode_refresh - Refresh the incore copy of the dinode
209 * @ip: The GFS2 inode
210 *
211 * Returns: errno
212 */
213
214int gfs2_inode_refresh(struct gfs2_inode *ip)
215{
216 struct buffer_head *dibh;
217 int error;
218
219 error = gfs2_meta_inode_buffer(ip, &dibh);
220 if (error)
221 return error;
222
223 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
224 brelse(dibh);
225 return -EIO;
226 }
227
228 gfs2_dinode_in(&ip->i_di, dibh->b_data);
229
230 brelse(dibh);
231
232 if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
233 if (gfs2_consist_inode(ip))
234 gfs2_dinode_print(&ip->i_di);
235 return -EIO;
236 }
237 if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
238 return -ESTALE;
239
240 ip->i_vn = ip->i_gl->gl_vn;
241
242 return 0;
243}
244
245int gfs2_dinode_dealloc(struct gfs2_inode *ip)
246{
247 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
248 struct gfs2_alloc *al;
249 struct gfs2_rgrpd *rgd;
250 int error;
251
252 if (ip->i_di.di_blocks != 1) {
253 if (gfs2_consist_inode(ip))
254 gfs2_dinode_print(&ip->i_di);
255 return -EIO;
256 }
257
258 al = gfs2_alloc_get(ip);
259
260 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
261 if (error)
262 goto out;
263
264 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
265 if (error)
266 goto out_qs;
267
268 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
269 if (!rgd) {
270 gfs2_consist_inode(ip);
271 error = -EIO;
272 goto out_rindex_relse;
273 }
274
275 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
276 &al->al_rgd_gh);
277 if (error)
278 goto out_rindex_relse;
279
280 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
281 if (error)
282 goto out_rg_gunlock;
283
284 gfs2_trans_add_gl(ip->i_gl);
285
286 gfs2_free_di(rgd, ip);
287
288 gfs2_trans_end(sdp);
289 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
290
291out_rg_gunlock:
292 gfs2_glock_dq_uninit(&al->al_rgd_gh);
293out_rindex_relse:
294 gfs2_glock_dq_uninit(&al->al_ri_gh);
295out_qs:
296 gfs2_quota_unhold(ip);
297out:
298 gfs2_alloc_put(ip);
299 return error;
300}
301
302/**
303 * gfs2_change_nlink - Change nlink count on inode
304 * @ip: The GFS2 inode
305 * @diff: The change in the nlink count required
306 *
307 * Returns: errno
308 */
309
310int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
311{
312 struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info;
313 struct buffer_head *dibh;
314 u32 nlink;
315 int error;
316
317 BUG_ON(ip->i_di.di_nlink != ip->i_inode.i_nlink);
318 nlink = ip->i_di.di_nlink + diff;
319
320 /* If we are reducing the nlink count, but the new value ends up being
321 bigger than the old one, we must have underflowed. */
322 if (diff < 0 && nlink > ip->i_di.di_nlink) {
323 if (gfs2_consist_inode(ip))
324 gfs2_dinode_print(&ip->i_di);
325 return -EIO;
326 }
327
328 error = gfs2_meta_inode_buffer(ip, &dibh);
329 if (error)
330 return error;
331
332 ip->i_di.di_nlink = nlink;
333 ip->i_di.di_ctime = get_seconds();
334 ip->i_inode.i_nlink = nlink;
335
336 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
337 gfs2_dinode_out(&ip->i_di, dibh->b_data);
338 brelse(dibh);
339 mark_inode_dirty(&ip->i_inode);
340
341 if (ip->i_di.di_nlink == 0) {
342 struct gfs2_rgrpd *rgd;
343 struct gfs2_holder ri_gh, rg_gh;
344
345 error = gfs2_rindex_hold(sdp, &ri_gh);
346 if (error)
347 goto out;
348 error = -EIO;
349 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
350 if (!rgd)
351 goto out_norgrp;
352 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
353 if (error)
354 goto out_norgrp;
355
356 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
357 gfs2_glock_dq_uninit(&rg_gh);
358out_norgrp:
359 gfs2_glock_dq_uninit(&ri_gh);
360 }
361out:
362 return error;
363}
364
365struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
366{
367 struct qstr qstr;
368 gfs2_str2qstr(&qstr, name);
369 return gfs2_lookupi(dip, &qstr, 1, NULL);
370}
371
372
373/**
374 * gfs2_lookupi - Look up a filename in a directory and return its inode
375 * @d_gh: An initialized holder for the directory glock
376 * @name: The name of the inode to look for
377 * @is_root: If 1, ignore the caller's permissions
378 * @i_gh: An uninitialized holder for the new inode glock
379 *
380 * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
381 * @is_root is true.
382 *
383 * Returns: errno
384 */
385
386struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
387 int is_root, struct nameidata *nd)
388{
389 struct super_block *sb = dir->i_sb;
390 struct gfs2_inode *dip = GFS2_I(dir);
391 struct gfs2_holder d_gh;
392 struct gfs2_inum inum;
393 unsigned int type;
394 int error = 0;
395 struct inode *inode = NULL;
396
397 if (!name->len || name->len > GFS2_FNAMESIZE)
398 return ERR_PTR(-ENAMETOOLONG);
399
400 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) ||
401 (name->len == 2 && memcmp(name->name, "..", 2) == 0 &&
402 dir == sb->s_root->d_inode)) {
403 igrab(dir);
404 return dir;
405 }
406
407 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
408 if (error)
409 return ERR_PTR(error);
410
411 if (!is_root) {
412 error = permission(dir, MAY_EXEC, NULL);
413 if (error)
414 goto out;
415 }
416
417 error = gfs2_dir_search(dir, name, &inum, &type);
418 if (error)
419 goto out;
420
421 inode = gfs2_inode_lookup(sb, &inum, type);
422
423out:
424 gfs2_glock_dq_uninit(&d_gh);
425 if (error == -ENOENT)
426 return NULL;
427 return inode;
428}
429
430static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
431{
432 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
433 struct buffer_head *bh;
434 struct gfs2_inum_range ir;
435 int error;
436
437 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
438 if (error)
439 return error;
440 mutex_lock(&sdp->sd_inum_mutex);
441
442 error = gfs2_meta_inode_buffer(ip, &bh);
443 if (error) {
444 mutex_unlock(&sdp->sd_inum_mutex);
445 gfs2_trans_end(sdp);
446 return error;
447 }
448
449 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
450
451 if (ir.ir_length) {
452 *formal_ino = ir.ir_start++;
453 ir.ir_length--;
454 gfs2_trans_add_bh(ip->i_gl, bh, 1);
455 gfs2_inum_range_out(&ir,
456 bh->b_data + sizeof(struct gfs2_dinode));
457 brelse(bh);
458 mutex_unlock(&sdp->sd_inum_mutex);
459 gfs2_trans_end(sdp);
460 return 0;
461 }
462
463 brelse(bh);
464
465 mutex_unlock(&sdp->sd_inum_mutex);
466 gfs2_trans_end(sdp);
467
468 return 1;
469}
470
471static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
472{
473 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
474 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode);
475 struct gfs2_holder gh;
476 struct buffer_head *bh;
477 struct gfs2_inum_range ir;
478 int error;
479
480 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
481 if (error)
482 return error;
483
484 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
485 if (error)
486 goto out;
487 mutex_lock(&sdp->sd_inum_mutex);
488
489 error = gfs2_meta_inode_buffer(ip, &bh);
490 if (error)
491 goto out_end_trans;
492
493 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
494
495 if (!ir.ir_length) {
496 struct buffer_head *m_bh;
497 u64 x, y;
498
499 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
500 if (error)
501 goto out_brelse;
502
503 x = *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
504 x = y = be64_to_cpu(x);
505 ir.ir_start = x;
506 ir.ir_length = GFS2_INUM_QUANTUM;
507 x += GFS2_INUM_QUANTUM;
508 if (x < y)
509 gfs2_consist_inode(m_ip);
510 x = cpu_to_be64(x);
511 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
512 *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
513
514 brelse(m_bh);
515 }
516
517 *formal_ino = ir.ir_start++;
518 ir.ir_length--;
519
520 gfs2_trans_add_bh(ip->i_gl, bh, 1);
521 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
522
523out_brelse:
524 brelse(bh);
525out_end_trans:
526 mutex_unlock(&sdp->sd_inum_mutex);
527 gfs2_trans_end(sdp);
528out:
529 gfs2_glock_dq_uninit(&gh);
530 return error;
531}
532
533static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum)
534{
535 int error;
536
537 error = pick_formal_ino_1(sdp, inum);
538 if (error <= 0)
539 return error;
540
541 error = pick_formal_ino_2(sdp, inum);
542
543 return error;
544}
545
546/**
547 * create_ok - OK to create a new on-disk inode here?
548 * @dip: Directory in which dinode is to be created
549 * @name: Name of new dinode
550 * @mode:
551 *
552 * Returns: errno
553 */
554
555static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
556 unsigned int mode)
557{
558 int error;
559
560 error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL);
561 if (error)
562 return error;
563
564 /* Don't create entries in an unlinked directory */
565 if (!dip->i_di.di_nlink)
566 return -EPERM;
567
568 error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
569 switch (error) {
570 case -ENOENT:
571 error = 0;
572 break;
573 case 0:
574 return -EEXIST;
575 default:
576 return error;
577 }
578
579 if (dip->i_di.di_entries == (u32)-1)
580 return -EFBIG;
581 if (S_ISDIR(mode) && dip->i_di.di_nlink == (u32)-1)
582 return -EMLINK;
583
584 return 0;
585}
586
587static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
588 unsigned int *uid, unsigned int *gid)
589{
590 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
591 (dip->i_di.di_mode & S_ISUID) && dip->i_di.di_uid) {
592 if (S_ISDIR(*mode))
593 *mode |= S_ISUID;
594 else if (dip->i_di.di_uid != current->fsuid)
595 *mode &= ~07111;
596 *uid = dip->i_di.di_uid;
597 } else
598 *uid = current->fsuid;
599
600 if (dip->i_di.di_mode & S_ISGID) {
601 if (S_ISDIR(*mode))
602 *mode |= S_ISGID;
603 *gid = dip->i_di.di_gid;
604 } else
605 *gid = current->fsgid;
606}
607
608static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum *inum,
609 u64 *generation)
610{
611 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
612 int error;
613
614 gfs2_alloc_get(dip);
615
616 dip->i_alloc.al_requested = RES_DINODE;
617 error = gfs2_inplace_reserve(dip);
618 if (error)
619 goto out;
620
621 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0);
622 if (error)
623 goto out_ipreserv;
624
625 inum->no_addr = gfs2_alloc_di(dip, generation);
626
627 gfs2_trans_end(sdp);
628
629out_ipreserv:
630 gfs2_inplace_release(dip);
631out:
632 gfs2_alloc_put(dip);
633 return error;
634}
635
636/**
637 * init_dinode - Fill in a new dinode structure
638 * @dip: the directory this inode is being created in
639 * @gl: The glock covering the new inode
640 * @inum: the inode number
641 * @mode: the file permissions
642 * @uid:
643 * @gid:
644 *
645 */
646
647static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
648 const struct gfs2_inum *inum, unsigned int mode,
649 unsigned int uid, unsigned int gid,
650 const u64 *generation)
651{
652 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
653 struct gfs2_dinode *di;
654 struct buffer_head *dibh;
655
656 dibh = gfs2_meta_new(gl, inum->no_addr);
657 gfs2_trans_add_bh(gl, dibh, 1);
658 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
659 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
660 di = (struct gfs2_dinode *)dibh->b_data;
661
662 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino);
663 di->di_num.no_addr = cpu_to_be64(inum->no_addr);
664 di->di_mode = cpu_to_be32(mode);
665 di->di_uid = cpu_to_be32(uid);
666 di->di_gid = cpu_to_be32(gid);
667 di->di_nlink = cpu_to_be32(0);
668 di->di_size = cpu_to_be64(0);
669 di->di_blocks = cpu_to_be64(1);
670 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
671 di->di_major = di->di_minor = cpu_to_be32(0);
672 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
673 di->di_generation = cpu_to_be64(*generation);
674 di->di_flags = cpu_to_be32(0);
675
676 if (S_ISREG(mode)) {
677 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
678 gfs2_tune_get(sdp, gt_new_files_jdata))
679 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
680 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
681 gfs2_tune_get(sdp, gt_new_files_directio))
682 di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO);
683 } else if (S_ISDIR(mode)) {
684 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
685 GFS2_DIF_INHERIT_DIRECTIO);
686 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
687 GFS2_DIF_INHERIT_JDATA);
688 }
689
690 di->__pad1 = 0;
691 di->di_payload_format = cpu_to_be32(0);
692 di->di_height = cpu_to_be32(0);
693 di->__pad2 = 0;
694 di->__pad3 = 0;
695 di->di_depth = cpu_to_be16(0);
696 di->di_entries = cpu_to_be32(0);
697 memset(&di->__pad4, 0, sizeof(di->__pad4));
698 di->di_eattr = cpu_to_be64(0);
699 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
700
701 brelse(dibh);
702}
703
704static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
705 unsigned int mode, const struct gfs2_inum *inum,
706 const u64 *generation)
707{
708 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
709 unsigned int uid, gid;
710 int error;
711
712 munge_mode_uid_gid(dip, &mode, &uid, &gid);
713 gfs2_alloc_get(dip);
714
715 error = gfs2_quota_lock(dip, uid, gid);
716 if (error)
717 goto out;
718
719 error = gfs2_quota_check(dip, uid, gid);
720 if (error)
721 goto out_quota;
722
723 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0);
724 if (error)
725 goto out_quota;
726
727 init_dinode(dip, gl, inum, mode, uid, gid, generation);
728 gfs2_quota_change(dip, +1, uid, gid);
729 gfs2_trans_end(sdp);
730
731out_quota:
732 gfs2_quota_unlock(dip);
733out:
734 gfs2_alloc_put(dip);
735 return error;
736}
737
738static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
739 struct gfs2_inode *ip)
740{
741 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
742 struct gfs2_alloc *al;
743 int alloc_required;
744 struct buffer_head *dibh;
745 int error;
746
747 al = gfs2_alloc_get(dip);
748
749 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
750 if (error)
751 goto fail;
752
753 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name);
754 if (alloc_required < 0)
755 goto fail;
756 if (alloc_required) {
757 error = gfs2_quota_check(dip, dip->i_di.di_uid,
758 dip->i_di.di_gid);
759 if (error)
760 goto fail_quota_locks;
761
762 al->al_requested = sdp->sd_max_dirres;
763
764 error = gfs2_inplace_reserve(dip);
765 if (error)
766 goto fail_quota_locks;
767
768 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
769 al->al_rgd->rd_ri.ri_length +
770 2 * RES_DINODE +
771 RES_STATFS + RES_QUOTA, 0);
772 if (error)
773 goto fail_ipreserv;
774 } else {
775 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0);
776 if (error)
777 goto fail_quota_locks;
778 }
779
780 error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
781 if (error)
782 goto fail_end_trans;
783
784 error = gfs2_meta_inode_buffer(ip, &dibh);
785 if (error)
786 goto fail_end_trans;
787 ip->i_di.di_nlink = 1;
788 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
789 gfs2_dinode_out(&ip->i_di, dibh->b_data);
790 brelse(dibh);
791 return 0;
792
793fail_end_trans:
794 gfs2_trans_end(sdp);
795
796fail_ipreserv:
797 if (dip->i_alloc.al_rgd)
798 gfs2_inplace_release(dip);
799
800fail_quota_locks:
801 gfs2_quota_unlock(dip);
802
803fail:
804 gfs2_alloc_put(dip);
805 return error;
806}
807
808/**
809 * gfs2_createi - Create a new inode
810 * @ghs: An array of two holders
811 * @name: The name of the new file
812 * @mode: the permissions on the new inode
813 *
814 * @ghs[0] is an initialized holder for the directory
815 * @ghs[1] is the holder for the inode lock
816 *
817 * If the return value is not NULL, the glocks on both the directory and the new
818 * file are held. A transaction has been started and an inplace reservation
819 * is held, as well.
820 *
821 * Returns: An inode
822 */
823
824struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
825 unsigned int mode)
826{
827 struct inode *inode;
828 struct gfs2_inode *dip = ghs->gh_gl->gl_object;
829 struct inode *dir = &dip->i_inode;
830 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
831 struct gfs2_inum inum;
832 int error;
833 u64 generation;
834
835 if (!name->len || name->len > GFS2_FNAMESIZE)
836 return ERR_PTR(-ENAMETOOLONG);
837
838 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
839 error = gfs2_glock_nq(ghs);
840 if (error)
841 goto fail;
842
843 error = create_ok(dip, name, mode);
844 if (error)
845 goto fail_gunlock;
846
847 error = pick_formal_ino(sdp, &inum.no_formal_ino);
848 if (error)
849 goto fail_gunlock;
850
851 error = alloc_dinode(dip, &inum, &generation);
852 if (error)
853 goto fail_gunlock;
854
855 if (inum.no_addr < dip->i_num.no_addr) {
856 gfs2_glock_dq(ghs);
857
858 error = gfs2_glock_nq_num(sdp, inum.no_addr,
859 &gfs2_inode_glops, LM_ST_EXCLUSIVE,
860 GL_SKIP, ghs + 1);
861 if (error) {
862 return ERR_PTR(error);
863 }
864
865 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
866 error = gfs2_glock_nq(ghs);
867 if (error) {
868 gfs2_glock_dq_uninit(ghs + 1);
869 return ERR_PTR(error);
870 }
871
872 error = create_ok(dip, name, mode);
873 if (error)
874 goto fail_gunlock2;
875 } else {
876 error = gfs2_glock_nq_num(sdp, inum.no_addr,
877 &gfs2_inode_glops, LM_ST_EXCLUSIVE,
878 GL_SKIP, ghs + 1);
879 if (error)
880 goto fail_gunlock;
881 }
882
883 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation);
884 if (error)
885 goto fail_gunlock2;
886
887 inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode));
888 if (IS_ERR(inode))
889 goto fail_gunlock2;
890
891 error = gfs2_inode_refresh(GFS2_I(inode));
892 if (error)
893 goto fail_iput;
894
895 error = gfs2_acl_create(dip, GFS2_I(inode));
896 if (error)
897 goto fail_iput;
898
899 error = link_dinode(dip, name, GFS2_I(inode));
900 if (error)
901 goto fail_iput;
902
903 if (!inode)
904 return ERR_PTR(-ENOMEM);
905 return inode;
906
907fail_iput:
908 iput(inode);
909fail_gunlock2:
910 gfs2_glock_dq_uninit(ghs + 1);
911fail_gunlock:
912 gfs2_glock_dq(ghs);
913fail:
914 return ERR_PTR(error);
915}
916
917/**
918 * gfs2_rmdiri - Remove a directory
919 * @dip: The parent directory of the directory to be removed
920 * @name: The name of the directory to be removed
921 * @ip: The GFS2 inode of the directory to be removed
922 *
923 * Assumes Glocks on dip and ip are held
924 *
925 * Returns: errno
926 */
927
928int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
929 struct gfs2_inode *ip)
930{
931 struct qstr dotname;
932 int error;
933
934 if (ip->i_di.di_entries != 2) {
935 if (gfs2_consist_inode(ip))
936 gfs2_dinode_print(&ip->i_di);
937 return -EIO;
938 }
939
940 error = gfs2_dir_del(dip, name);
941 if (error)
942 return error;
943
944 error = gfs2_change_nlink(dip, -1);
945 if (error)
946 return error;
947
948 gfs2_str2qstr(&dotname, ".");
949 error = gfs2_dir_del(ip, &dotname);
950 if (error)
951 return error;
952
953 gfs2_str2qstr(&dotname, "..");
954 error = gfs2_dir_del(ip, &dotname);
955 if (error)
956 return error;
957
958 error = gfs2_change_nlink(ip, -2);
959 if (error)
960 return error;
961
962 return error;
963}
964
965/*
966 * gfs2_unlink_ok - check to see that a inode is still in a directory
967 * @dip: the directory
968 * @name: the name of the file
969 * @ip: the inode
970 *
971 * Assumes that the lock on (at least) @dip is held.
972 *
973 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
974 */
975
976int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
977 struct gfs2_inode *ip)
978{
979 struct gfs2_inum inum;
980 unsigned int type;
981 int error;
982
983 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
984 return -EPERM;
985
986 if ((dip->i_di.di_mode & S_ISVTX) &&
987 dip->i_di.di_uid != current->fsuid &&
988 ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
989 return -EPERM;
990
991 if (IS_APPEND(&dip->i_inode))
992 return -EPERM;
993
994 error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL);
995 if (error)
996 return error;
997
998 error = gfs2_dir_search(&dip->i_inode, name, &inum, &type);
999 if (error)
1000 return error;
1001
1002 if (!gfs2_inum_equal(&inum, &ip->i_num))
1003 return -ENOENT;
1004
1005 if (IF2DT(ip->i_di.di_mode) != type) {
1006 gfs2_consist_inode(dip);
1007 return -EIO;
1008 }
1009
1010 return 0;
1011}
1012
1013/*
1014 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1015 * @this: move this
1016 * @to: to here
1017 *
1018 * Follow @to back to the root and make sure we don't encounter @this
1019 * Assumes we already hold the rename lock.
1020 *
1021 * Returns: errno
1022 */
1023
1024int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1025{
1026 struct inode *dir = &to->i_inode;
1027 struct super_block *sb = dir->i_sb;
1028 struct inode *tmp;
1029 struct qstr dotdot;
1030 int error = 0;
1031
1032 gfs2_str2qstr(&dotdot, "..");
1033
1034 igrab(dir);
1035
1036 for (;;) {
1037 if (dir == &this->i_inode) {
1038 error = -EINVAL;
1039 break;
1040 }
1041 if (dir == sb->s_root->d_inode) {
1042 error = 0;
1043 break;
1044 }
1045
1046 tmp = gfs2_lookupi(dir, &dotdot, 1, NULL);
1047 if (IS_ERR(tmp)) {
1048 error = PTR_ERR(tmp);
1049 break;
1050 }
1051
1052 iput(dir);
1053 dir = tmp;
1054 }
1055
1056 iput(dir);
1057
1058 return error;
1059}
1060
1061/**
1062 * gfs2_readlinki - return the contents of a symlink
1063 * @ip: the symlink's inode
1064 * @buf: a pointer to the buffer to be filled
1065 * @len: a pointer to the length of @buf
1066 *
1067 * If @buf is too small, a piece of memory is kmalloc()ed and needs
1068 * to be freed by the caller.
1069 *
1070 * Returns: errno
1071 */
1072
1073int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1074{
1075 struct gfs2_holder i_gh;
1076 struct buffer_head *dibh;
1077 unsigned int x;
1078 int error;
1079
1080 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
1081 error = gfs2_glock_nq_atime(&i_gh);
1082 if (error) {
1083 gfs2_holder_uninit(&i_gh);
1084 return error;
1085 }
1086
1087 if (!ip->i_di.di_size) {
1088 gfs2_consist_inode(ip);
1089 error = -EIO;
1090 goto out;
1091 }
1092
1093 error = gfs2_meta_inode_buffer(ip, &dibh);
1094 if (error)
1095 goto out;
1096
1097 x = ip->i_di.di_size + 1;
1098 if (x > *len) {
1099 *buf = kmalloc(x, GFP_KERNEL);
1100 if (!*buf) {
1101 error = -ENOMEM;
1102 goto out_brelse;
1103 }
1104 }
1105
1106 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1107 *len = x;
1108
1109out_brelse:
1110 brelse(dibh);
1111out:
1112 gfs2_glock_dq_uninit(&i_gh);
1113 return error;
1114}
1115
1116/**
1117 * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
1118 * conditionally update the inode's atime
1119 * @gh: the holder to acquire
1120 *
1121 * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
1122 * Update if the difference between the current time and the inode's current
1123 * atime is greater than an interval specified at mount.
1124 *
1125 * Returns: errno
1126 */
1127
1128int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1129{
1130 struct gfs2_glock *gl = gh->gh_gl;
1131 struct gfs2_sbd *sdp = gl->gl_sbd;
1132 struct gfs2_inode *ip = gl->gl_object;
1133 s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1134 unsigned int state;
1135 int flags;
1136 int error;
1137
1138 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1139 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
1140 gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
1141 return -EINVAL;
1142
1143 state = gh->gh_state;
1144 flags = gh->gh_flags;
1145
1146 error = gfs2_glock_nq(gh);
1147 if (error)
1148 return error;
1149
1150 if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
1151 (sdp->sd_vfs->s_flags & MS_RDONLY))
1152 return 0;
1153
1154 curtime = get_seconds();
1155 if (curtime - ip->i_di.di_atime >= quantum) {
1156 gfs2_glock_dq(gh);
1157 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
1158 gh);
1159 error = gfs2_glock_nq(gh);
1160 if (error)
1161 return error;
1162
1163 /* Verify that atime hasn't been updated while we were
1164 trying to get exclusive lock. */
1165
1166 curtime = get_seconds();
1167 if (curtime - ip->i_di.di_atime >= quantum) {
1168 struct buffer_head *dibh;
1169
1170 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1171 if (error == -EROFS)
1172 return 0;
1173 if (error)
1174 goto fail;
1175
1176 error = gfs2_meta_inode_buffer(ip, &dibh);
1177 if (error)
1178 goto fail_end_trans;
1179
1180 ip->i_di.di_atime = curtime;
1181
1182 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1183 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1184 brelse(dibh);
1185
1186 gfs2_trans_end(sdp);
1187 }
1188
1189 /* If someone else has asked for the glock,
1190 unlock and let them have it. Then reacquire
1191 in the original state. */
1192 if (gfs2_glock_is_blocking(gl)) {
1193 gfs2_glock_dq(gh);
1194 gfs2_holder_reinit(state, flags, gh);
1195 return gfs2_glock_nq(gh);
1196 }
1197 }
1198
1199 return 0;
1200
1201fail_end_trans:
1202 gfs2_trans_end(sdp);
1203fail:
1204 gfs2_glock_dq(gh);
1205 return error;
1206}
1207
1208/**
1209 * glock_compare_atime - Compare two struct gfs2_glock structures for sort
1210 * @arg_a: the first structure
1211 * @arg_b: the second structure
1212 *
1213 * Returns: 1 if A > B
1214 * -1 if A < B
1215 * 0 if A == B
1216 */
1217
1218static int glock_compare_atime(const void *arg_a, const void *arg_b)
1219{
1220 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1221 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1222 const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1223 const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1224
1225 if (a->ln_number > b->ln_number)
1226 return 1;
1227 if (a->ln_number < b->ln_number)
1228 return -1;
1229 if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
1230 return 1;
1231 if (gh_a->gh_state == LM_ST_SHARED && (gh_b->gh_flags & GL_ATIME))
1232 return 1;
1233
1234 return 0;
1235}
1236
1237/**
1238 * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
1239 * atime update
1240 * @num_gh: the number of structures
1241 * @ghs: an array of struct gfs2_holder structures
1242 *
1243 * Returns: 0 on success (all glocks acquired),
1244 * errno on failure (no glocks acquired)
1245 */
1246
1247int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
1248{
1249 struct gfs2_holder **p;
1250 unsigned int x;
1251 int error = 0;
1252
1253 if (!num_gh)
1254 return 0;
1255
1256 if (num_gh == 1) {
1257 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1258 if (ghs->gh_flags & GL_ATIME)
1259 error = gfs2_glock_nq_atime(ghs);
1260 else
1261 error = gfs2_glock_nq(ghs);
1262 return error;
1263 }
1264
1265 p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1266 if (!p)
1267 return -ENOMEM;
1268
1269 for (x = 0; x < num_gh; x++)
1270 p[x] = &ghs[x];
1271
1272 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
1273
1274 for (x = 0; x < num_gh; x++) {
1275 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1276
1277 if (p[x]->gh_flags & GL_ATIME)
1278 error = gfs2_glock_nq_atime(p[x]);
1279 else
1280 error = gfs2_glock_nq(p[x]);
1281
1282 if (error) {
1283 while (x--)
1284 gfs2_glock_dq(p[x]);
1285 break;
1286 }
1287 }
1288
1289 kfree(p);
1290 return error;
1291}
1292
1293
1294static int
1295__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1296{
1297 struct buffer_head *dibh;
1298 int error;
1299
1300 error = gfs2_meta_inode_buffer(ip, &dibh);
1301 if (!error) {
1302 error = inode_setattr(&ip->i_inode, attr);
1303 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
1304 gfs2_inode_attr_out(ip);
1305
1306 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1307 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1308 brelse(dibh);
1309 }
1310 return error;
1311}
1312
1313/**
1314 * gfs2_setattr_simple -
1315 * @ip:
1316 * @attr:
1317 *
1318 * Called with a reference on the vnode.
1319 *
1320 * Returns: errno
1321 */
1322
1323int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1324{
1325 int error;
1326
1327 if (current->journal_info)
1328 return __gfs2_setattr_simple(ip, attr);
1329
1330 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0);
1331 if (error)
1332 return error;
1333
1334 error = __gfs2_setattr_simple(ip, attr);
1335 gfs2_trans_end(GFS2_SB(&ip->i_inode));
1336 return error;
1337}
1338
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
new file mode 100644
index 000000000000..f5d861760579
--- /dev/null
+++ b/fs/gfs2/inode.h
@@ -0,0 +1,56 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__
12
13static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
14{
15 return !ip->i_di.di_height;
16}
17
18static inline int gfs2_is_jdata(struct gfs2_inode *ip)
19{
20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21}
22
23static inline int gfs2_is_dir(struct gfs2_inode *ip)
24{
25 return S_ISDIR(ip->i_di.di_mode);
26}
27
28void gfs2_inode_attr_in(struct gfs2_inode *ip);
29void gfs2_inode_attr_out(struct gfs2_inode *ip);
30struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned type);
31struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum);
32
33int gfs2_inode_refresh(struct gfs2_inode *ip);
34
35int gfs2_dinode_dealloc(struct gfs2_inode *inode);
36int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
37struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
38 int is_root, struct nameidata *nd);
39struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
40 unsigned int mode);
41int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
42 struct gfs2_inode *ip);
43int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
44 struct gfs2_inode *ip);
45int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
46int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
47
48int gfs2_glock_nq_atime(struct gfs2_holder *gh);
49int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
50
51int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
52
53struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
54
55#endif /* __INODE_DOT_H__ */
56
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
new file mode 100644
index 000000000000..effe4a337c1d
--- /dev/null
+++ b/fs/gfs2/lm.c
@@ -0,0 +1,217 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "glock.h"
22#include "lm.h"
23#include "super.h"
24#include "util.h"
25
26/**
27 * gfs2_lm_mount - mount a locking protocol
28 * @sdp: the filesystem
29 * @args: mount arguements
30 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
31 *
32 * Returns: errno
33 */
34
35int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
36{
37 char *proto = sdp->sd_proto_name;
38 char *table = sdp->sd_table_name;
39 int flags = 0;
40 int error;
41
42 if (sdp->sd_args.ar_spectator)
43 flags |= LM_MFLAG_SPECTATOR;
44
45 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
46
47 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
48 gfs2_glock_cb, sdp,
49 GFS2_MIN_LVB_SIZE, flags,
50 &sdp->sd_lockstruct, &sdp->sd_kobj);
51 if (error) {
52 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
53 proto, table, sdp->sd_args.ar_hostdata);
54 goto out;
55 }
56
57 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
58 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
59 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
60 GFS2_MIN_LVB_SIZE)) {
61 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
62 goto out;
63 }
64
65 if (sdp->sd_args.ar_spectator)
66 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
67 else
68 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
69 sdp->sd_lockstruct.ls_jid);
70
71 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
72
73 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
74 !sdp->sd_args.ar_ignore_local_fs) {
75 sdp->sd_args.ar_localflocks = 1;
76 sdp->sd_args.ar_localcaching = 1;
77 }
78
79out:
80 return error;
81}
82
83void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
84{
85 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
86 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
87 sdp->sd_lockstruct.ls_lockspace);
88}
89
90void gfs2_lm_unmount(struct gfs2_sbd *sdp)
91{
92 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
93 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
94}
95
96int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
97{
98 va_list args;
99
100 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
101 return 0;
102
103 va_start(args, fmt);
104 vprintk(fmt, args);
105 va_end(args);
106
107 fs_err(sdp, "about to withdraw from the cluster\n");
108 BUG_ON(sdp->sd_args.ar_debug);
109
110
111 fs_err(sdp, "waiting for outstanding I/O\n");
112
113 /* FIXME: suspend dm device so oustanding bio's complete
114 and all further io requests fail */
115
116 fs_err(sdp, "telling LM to withdraw\n");
117 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
118 fs_err(sdp, "withdrawn\n");
119 dump_stack();
120
121 return -1;
122}
123
124int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
125 void **lockp)
126{
127 int error = -EIO;
128 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
129 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
130 sdp->sd_lockstruct.ls_lockspace, name, lockp);
131 return error;
132}
133
134void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock)
135{
136 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
137 sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
138}
139
140unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
141 unsigned int cur_state, unsigned int req_state,
142 unsigned int flags)
143{
144 int ret = 0;
145 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
146 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
147 req_state, flags);
148 return ret;
149}
150
151unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
152 unsigned int cur_state)
153{
154 int ret = 0;
155 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
156 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
157 return ret;
158}
159
160void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock)
161{
162 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
163 sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
164}
165
166int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
167{
168 int error = -EIO;
169 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
170 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
171 return error;
172}
173
174void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb)
175{
176 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
177 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
178}
179
180int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
181 struct file *file, struct file_lock *fl)
182{
183 int error = -EIO;
184 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
185 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
186 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
187 return error;
188}
189
190int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
191 struct file *file, int cmd, struct file_lock *fl)
192{
193 int error = -EIO;
194 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
195 error = sdp->sd_lockstruct.ls_ops->lm_plock(
196 sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
197 return error;
198}
199
200int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
201 struct file *file, struct file_lock *fl)
202{
203 int error = -EIO;
204 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
205 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
206 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
207 return error;
208}
209
210void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
211 unsigned int message)
212{
213 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
214 sdp->sd_lockstruct.ls_ops->lm_recovery_done(
215 sdp->sd_lockstruct.ls_lockspace, jid, message);
216}
217
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h
new file mode 100644
index 000000000000..21cdc30ee08c
--- /dev/null
+++ b/fs/gfs2/lm.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __LM_DOT_H__
11#define __LM_DOT_H__
12
13struct gfs2_sbd;
14
15#define GFS2_MIN_LVB_SIZE 32
16
17int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
18void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
19void gfs2_lm_unmount(struct gfs2_sbd *sdp);
20int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
21 __attribute__ ((format(printf, 2, 3)));
22int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
23 void **lockp);
24void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock);
25unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
26 unsigned int cur_state, unsigned int req_state,
27 unsigned int flags);
28unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
29 unsigned int cur_state);
30void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock);
31int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp);
32void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb);
33int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
34 struct file *file, struct file_lock *fl);
35int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
36 struct file *file, int cmd, struct file_lock *fl);
37int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
38 struct file *file, struct file_lock *fl);
39void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
40 unsigned int message);
41
42#endif /* __LM_DOT_H__ */
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
new file mode 100644
index 000000000000..663fee728783
--- /dev/null
+++ b/fs/gfs2/locking.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/string.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/kmod.h>
17#include <linux/fs.h>
18#include <linux/delay.h>
19#include <linux/lm_interface.h>
20
21struct lmh_wrapper {
22 struct list_head lw_list;
23 const struct lm_lockops *lw_ops;
24};
25
26/* List of registered low-level locking protocols. A file system selects one
27 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
28
29static LIST_HEAD(lmh_list);
30static DEFINE_MUTEX(lmh_lock);
31
32/**
33 * gfs2_register_lockproto - Register a low-level locking protocol
34 * @proto: the protocol definition
35 *
36 * Returns: 0 on success, -EXXX on failure
37 */
38
39int gfs2_register_lockproto(const struct lm_lockops *proto)
40{
41 struct lmh_wrapper *lw;
42
43 mutex_lock(&lmh_lock);
44
45 list_for_each_entry(lw, &lmh_list, lw_list) {
46 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
47 mutex_unlock(&lmh_lock);
48 printk(KERN_INFO "GFS2: protocol %s already exists\n",
49 proto->lm_proto_name);
50 return -EEXIST;
51 }
52 }
53
54 lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
55 if (!lw) {
56 mutex_unlock(&lmh_lock);
57 return -ENOMEM;
58 }
59
60 lw->lw_ops = proto;
61 list_add(&lw->lw_list, &lmh_list);
62
63 mutex_unlock(&lmh_lock);
64
65 return 0;
66}
67
68/**
69 * gfs2_unregister_lockproto - Unregister a low-level locking protocol
70 * @proto: the protocol definition
71 *
72 */
73
74void gfs2_unregister_lockproto(const struct lm_lockops *proto)
75{
76 struct lmh_wrapper *lw;
77
78 mutex_lock(&lmh_lock);
79
80 list_for_each_entry(lw, &lmh_list, lw_list) {
81 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
82 list_del(&lw->lw_list);
83 mutex_unlock(&lmh_lock);
84 kfree(lw);
85 return;
86 }
87 }
88
89 mutex_unlock(&lmh_lock);
90
91 printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
92 proto->lm_proto_name);
93}
94
95/**
96 * gfs2_mount_lockproto - Mount a lock protocol
97 * @proto_name - the name of the protocol
98 * @table_name - the name of the lock space
99 * @host_data - data specific to this host
100 * @cb - the callback to the code using the lock module
101 * @sdp - The GFS2 superblock
102 * @min_lvb_size - the mininum LVB size that the caller can deal with
103 * @flags - LM_MFLAG_*
104 * @lockstruct - a structure returned describing the mount
105 *
106 * Returns: 0 on success, -EXXX on failure
107 */
108
109int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
110 lm_callback_t cb, void *cb_data,
111 unsigned int min_lvb_size, int flags,
112 struct lm_lockstruct *lockstruct,
113 struct kobject *fskobj)
114{
115 struct lmh_wrapper *lw = NULL;
116 int try = 0;
117 int error, found;
118
119retry:
120 mutex_lock(&lmh_lock);
121
122 found = 0;
123 list_for_each_entry(lw, &lmh_list, lw_list) {
124 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
125 found = 1;
126 break;
127 }
128 }
129
130 if (!found) {
131 if (!try && capable(CAP_SYS_MODULE)) {
132 try = 1;
133 mutex_unlock(&lmh_lock);
134 request_module(proto_name);
135 goto retry;
136 }
137 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
138 error = -ENOENT;
139 goto out;
140 }
141
142 if (!try_module_get(lw->lw_ops->lm_owner)) {
143 try = 0;
144 mutex_unlock(&lmh_lock);
145 msleep(1000);
146 goto retry;
147 }
148
149 error = lw->lw_ops->lm_mount(table_name, host_data, cb, cb_data,
150 min_lvb_size, flags, lockstruct, fskobj);
151 if (error)
152 module_put(lw->lw_ops->lm_owner);
153out:
154 mutex_unlock(&lmh_lock);
155 return error;
156}
157
158void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
159{
160 mutex_lock(&lmh_lock);
161 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
162 if (lockstruct->ls_ops->lm_owner)
163 module_put(lockstruct->ls_ops->lm_owner);
164 mutex_unlock(&lmh_lock);
165}
166
167/**
168 * gfs2_withdraw_lockproto - abnormally unmount a lock module
169 * @lockstruct: the lockstruct passed into mount
170 *
171 */
172
173void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
174{
175 mutex_lock(&lmh_lock);
176 lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
177 if (lockstruct->ls_ops->lm_owner)
178 module_put(lockstruct->ls_ops->lm_owner);
179 mutex_unlock(&lmh_lock);
180}
181
182EXPORT_SYMBOL_GPL(gfs2_register_lockproto);
183EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto);
184
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
new file mode 100644
index 000000000000..89b93b6b45cf
--- /dev/null
+++ b/fs/gfs2/locking/dlm/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
2lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o
3
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
new file mode 100644
index 000000000000..b167addf9fd1
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -0,0 +1,524 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12static char junk_lvb[GDLM_LVB_SIZE];
13
14static void queue_complete(struct gdlm_lock *lp)
15{
16 struct gdlm_ls *ls = lp->ls;
17
18 clear_bit(LFL_ACTIVE, &lp->flags);
19
20 spin_lock(&ls->async_lock);
21 list_add_tail(&lp->clist, &ls->complete);
22 spin_unlock(&ls->async_lock);
23 wake_up(&ls->thread_wait);
24}
25
26static inline void gdlm_ast(void *astarg)
27{
28 queue_complete(astarg);
29}
30
31static inline void gdlm_bast(void *astarg, int mode)
32{
33 struct gdlm_lock *lp = astarg;
34 struct gdlm_ls *ls = lp->ls;
35
36 if (!mode) {
37 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
38 lp->lockname.ln_type,
39 (unsigned long long)lp->lockname.ln_number);
40 return;
41 }
42
43 spin_lock(&ls->async_lock);
44 if (!lp->bast_mode) {
45 list_add_tail(&lp->blist, &ls->blocking);
46 lp->bast_mode = mode;
47 } else if (lp->bast_mode < mode)
48 lp->bast_mode = mode;
49 spin_unlock(&ls->async_lock);
50 wake_up(&ls->thread_wait);
51}
52
53void gdlm_queue_delayed(struct gdlm_lock *lp)
54{
55 struct gdlm_ls *ls = lp->ls;
56
57 spin_lock(&ls->async_lock);
58 list_add_tail(&lp->delay_list, &ls->delayed);
59 spin_unlock(&ls->async_lock);
60}
61
62/* convert gfs lock-state to dlm lock-mode */
63
64static s16 make_mode(s16 lmstate)
65{
66 switch (lmstate) {
67 case LM_ST_UNLOCKED:
68 return DLM_LOCK_NL;
69 case LM_ST_EXCLUSIVE:
70 return DLM_LOCK_EX;
71 case LM_ST_DEFERRED:
72 return DLM_LOCK_CW;
73 case LM_ST_SHARED:
74 return DLM_LOCK_PR;
75 }
76 gdlm_assert(0, "unknown LM state %d", lmstate);
77 return -1;
78}
79
80/* convert dlm lock-mode to gfs lock-state */
81
82s16 gdlm_make_lmstate(s16 dlmmode)
83{
84 switch (dlmmode) {
85 case DLM_LOCK_IV:
86 case DLM_LOCK_NL:
87 return LM_ST_UNLOCKED;
88 case DLM_LOCK_EX:
89 return LM_ST_EXCLUSIVE;
90 case DLM_LOCK_CW:
91 return LM_ST_DEFERRED;
92 case DLM_LOCK_PR:
93 return LM_ST_SHARED;
94 }
95 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
96 return -1;
97}
98
99/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
100 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
101
102static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
103{
104 s16 cur = make_mode(cur_state);
105 if (lp->cur != DLM_LOCK_IV)
106 gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
107}
108
109static inline unsigned int make_flags(struct gdlm_lock *lp,
110 unsigned int gfs_flags,
111 s16 cur, s16 req)
112{
113 unsigned int lkf = 0;
114
115 if (gfs_flags & LM_FLAG_TRY)
116 lkf |= DLM_LKF_NOQUEUE;
117
118 if (gfs_flags & LM_FLAG_TRY_1CB) {
119 lkf |= DLM_LKF_NOQUEUE;
120 lkf |= DLM_LKF_NOQUEUEBAST;
121 }
122
123 if (gfs_flags & LM_FLAG_PRIORITY) {
124 lkf |= DLM_LKF_NOORDER;
125 lkf |= DLM_LKF_HEADQUE;
126 }
127
128 if (gfs_flags & LM_FLAG_ANY) {
129 if (req == DLM_LOCK_PR)
130 lkf |= DLM_LKF_ALTCW;
131 else if (req == DLM_LOCK_CW)
132 lkf |= DLM_LKF_ALTPR;
133 }
134
135 if (lp->lksb.sb_lkid != 0) {
136 lkf |= DLM_LKF_CONVERT;
137
138 /* Conversion deadlock avoidance by DLM */
139
140 if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
141 !(lkf & DLM_LKF_NOQUEUE) &&
142 cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
143 lkf |= DLM_LKF_CONVDEADLK;
144 }
145
146 if (lp->lvb)
147 lkf |= DLM_LKF_VALBLK;
148
149 return lkf;
150}
151
152/* make_strname - convert GFS lock numbers to a string */
153
154static inline void make_strname(struct lm_lockname *lockname,
155 struct gdlm_strname *str)
156{
157 sprintf(str->name, "%8x%16llx", lockname->ln_type,
158 (unsigned long long)lockname->ln_number);
159 str->namelen = GDLM_STRNAME_BYTES;
160}
161
162static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
163 struct gdlm_lock **lpp)
164{
165 struct gdlm_lock *lp;
166
167 lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
168 if (!lp)
169 return -ENOMEM;
170
171 lp->lockname = *name;
172 lp->ls = ls;
173 lp->cur = DLM_LOCK_IV;
174 lp->lvb = NULL;
175 lp->hold_null = NULL;
176 init_completion(&lp->ast_wait);
177 INIT_LIST_HEAD(&lp->clist);
178 INIT_LIST_HEAD(&lp->blist);
179 INIT_LIST_HEAD(&lp->delay_list);
180
181 spin_lock(&ls->async_lock);
182 list_add(&lp->all_list, &ls->all_locks);
183 ls->all_locks_count++;
184 spin_unlock(&ls->async_lock);
185
186 *lpp = lp;
187 return 0;
188}
189
190void gdlm_delete_lp(struct gdlm_lock *lp)
191{
192 struct gdlm_ls *ls = lp->ls;
193
194 spin_lock(&ls->async_lock);
195 if (!list_empty(&lp->clist))
196 list_del_init(&lp->clist);
197 if (!list_empty(&lp->blist))
198 list_del_init(&lp->blist);
199 if (!list_empty(&lp->delay_list))
200 list_del_init(&lp->delay_list);
201 gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type,
202 (unsigned long long)lp->lockname.ln_number);
203 list_del_init(&lp->all_list);
204 ls->all_locks_count--;
205 spin_unlock(&ls->async_lock);
206
207 kfree(lp);
208}
209
210int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
211 void **lockp)
212{
213 struct gdlm_lock *lp;
214 int error;
215
216 error = gdlm_create_lp(lockspace, name, &lp);
217
218 *lockp = lp;
219 return error;
220}
221
222void gdlm_put_lock(void *lock)
223{
224 gdlm_delete_lp(lock);
225}
226
227unsigned int gdlm_do_lock(struct gdlm_lock *lp)
228{
229 struct gdlm_ls *ls = lp->ls;
230 struct gdlm_strname str;
231 int error, bast = 1;
232
233 /*
234 * When recovery is in progress, delay lock requests for submission
235 * once recovery is done. Requests for recovery (NOEXP) and unlocks
236 * can pass.
237 */
238
239 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
240 !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
241 gdlm_queue_delayed(lp);
242 return LM_OUT_ASYNC;
243 }
244
245 /*
246 * Submit the actual lock request.
247 */
248
249 if (test_bit(LFL_NOBAST, &lp->flags))
250 bast = 0;
251
252 make_strname(&lp->lockname, &str);
253
254 set_bit(LFL_ACTIVE, &lp->flags);
255
256 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
257 (unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
258 lp->cur, lp->req, lp->lkf);
259
260 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
261 str.name, str.namelen, 0, gdlm_ast, lp,
262 bast ? gdlm_bast : NULL);
263
264 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
265 lp->lksb.sb_status = -EAGAIN;
266 queue_complete(lp);
267 error = 0;
268 }
269
270 if (error) {
271 log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
272 "flags=%lx", ls->fsname, lp->lockname.ln_type,
273 (unsigned long long)lp->lockname.ln_number, error,
274 lp->cur, lp->req, lp->lkf, lp->flags);
275 return LM_OUT_ERROR;
276 }
277 return LM_OUT_ASYNC;
278}
279
280static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
281{
282 struct gdlm_ls *ls = lp->ls;
283 unsigned int lkf = 0;
284 int error;
285
286 set_bit(LFL_DLM_UNLOCK, &lp->flags);
287 set_bit(LFL_ACTIVE, &lp->flags);
288
289 if (lp->lvb)
290 lkf = DLM_LKF_VALBLK;
291
292 log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
293 (unsigned long long)lp->lockname.ln_number,
294 lp->lksb.sb_lkid, lp->cur, lkf);
295
296 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
297
298 if (error) {
299 log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
300 "flags=%lx", ls->fsname, lp->lockname.ln_type,
301 (unsigned long long)lp->lockname.ln_number, error,
302 lp->cur, lp->req, lp->lkf, lp->flags);
303 return LM_OUT_ERROR;
304 }
305 return LM_OUT_ASYNC;
306}
307
308unsigned int gdlm_lock(void *lock, unsigned int cur_state,
309 unsigned int req_state, unsigned int flags)
310{
311 struct gdlm_lock *lp = lock;
312
313 clear_bit(LFL_DLM_CANCEL, &lp->flags);
314 if (flags & LM_FLAG_NOEXP)
315 set_bit(LFL_NOBLOCK, &lp->flags);
316
317 check_cur_state(lp, cur_state);
318 lp->req = make_mode(req_state);
319 lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
320
321 return gdlm_do_lock(lp);
322}
323
324unsigned int gdlm_unlock(void *lock, unsigned int cur_state)
325{
326 struct gdlm_lock *lp = lock;
327
328 clear_bit(LFL_DLM_CANCEL, &lp->flags);
329 if (lp->cur == DLM_LOCK_IV)
330 return 0;
331 return gdlm_do_unlock(lp);
332}
333
334void gdlm_cancel(void *lock)
335{
336 struct gdlm_lock *lp = lock;
337 struct gdlm_ls *ls = lp->ls;
338 int error, delay_list = 0;
339
340 if (test_bit(LFL_DLM_CANCEL, &lp->flags))
341 return;
342
343 log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
344 (unsigned long long)lp->lockname.ln_number, lp->flags);
345
346 spin_lock(&ls->async_lock);
347 if (!list_empty(&lp->delay_list)) {
348 list_del_init(&lp->delay_list);
349 delay_list = 1;
350 }
351 spin_unlock(&ls->async_lock);
352
353 if (delay_list) {
354 set_bit(LFL_CANCEL, &lp->flags);
355 set_bit(LFL_ACTIVE, &lp->flags);
356 queue_complete(lp);
357 return;
358 }
359
360 if (!test_bit(LFL_ACTIVE, &lp->flags) ||
361 test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
362 log_info("gdlm_cancel skip %x,%llx flags %lx",
363 lp->lockname.ln_type,
364 (unsigned long long)lp->lockname.ln_number, lp->flags);
365 return;
366 }
367
368 /* the lock is blocked in the dlm */
369
370 set_bit(LFL_DLM_CANCEL, &lp->flags);
371 set_bit(LFL_ACTIVE, &lp->flags);
372
373 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
374 NULL, lp);
375
376 log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
377 lp->lockname.ln_type,
378 (unsigned long long)lp->lockname.ln_number, lp->flags);
379
380 if (error == -EBUSY)
381 clear_bit(LFL_DLM_CANCEL, &lp->flags);
382}
383
384static int gdlm_add_lvb(struct gdlm_lock *lp)
385{
386 char *lvb;
387
388 lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL);
389 if (!lvb)
390 return -ENOMEM;
391
392 lp->lksb.sb_lvbptr = lvb;
393 lp->lvb = lvb;
394 return 0;
395}
396
397static void gdlm_del_lvb(struct gdlm_lock *lp)
398{
399 kfree(lp->lvb);
400 lp->lvb = NULL;
401 lp->lksb.sb_lvbptr = NULL;
402}
403
404/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
405 the completion) because gfs won't call hold_lvb() during a callback (from
406 the context of a lock_dlm thread). */
407
408static int hold_null_lock(struct gdlm_lock *lp)
409{
410 struct gdlm_lock *lpn = NULL;
411 int error;
412
413 if (lp->hold_null) {
414 printk(KERN_INFO "lock_dlm: lvb already held\n");
415 return 0;
416 }
417
418 error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
419 if (error)
420 goto out;
421
422 lpn->lksb.sb_lvbptr = junk_lvb;
423 lpn->lvb = junk_lvb;
424
425 lpn->req = DLM_LOCK_NL;
426 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
427 set_bit(LFL_NOBAST, &lpn->flags);
428 set_bit(LFL_INLOCK, &lpn->flags);
429
430 init_completion(&lpn->ast_wait);
431 gdlm_do_lock(lpn);
432 wait_for_completion(&lpn->ast_wait);
433 error = lpn->lksb.sb_status;
434 if (error) {
435 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
436 error);
437 gdlm_delete_lp(lpn);
438 lpn = NULL;
439 }
440out:
441 lp->hold_null = lpn;
442 return error;
443}
444
445/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
446 the completion) because gfs may call unhold_lvb() during a callback (from
447 the context of a lock_dlm thread) which could cause a deadlock since the
448 other lock_dlm thread could be engaged in recovery. */
449
450static void unhold_null_lock(struct gdlm_lock *lp)
451{
452 struct gdlm_lock *lpn = lp->hold_null;
453
454 gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
455 (unsigned long long)lp->lockname.ln_number);
456 lpn->lksb.sb_lvbptr = NULL;
457 lpn->lvb = NULL;
458 set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
459 gdlm_do_unlock(lpn);
460 lp->hold_null = NULL;
461}
462
463/* Acquire a NL lock because gfs requires the value block to remain
464 intact on the resource while the lvb is "held" even if it's holding no locks
465 on the resource. */
466
467int gdlm_hold_lvb(void *lock, char **lvbp)
468{
469 struct gdlm_lock *lp = lock;
470 int error;
471
472 error = gdlm_add_lvb(lp);
473 if (error)
474 return error;
475
476 *lvbp = lp->lvb;
477
478 error = hold_null_lock(lp);
479 if (error)
480 gdlm_del_lvb(lp);
481
482 return error;
483}
484
485void gdlm_unhold_lvb(void *lock, char *lvb)
486{
487 struct gdlm_lock *lp = lock;
488
489 unhold_null_lock(lp);
490 gdlm_del_lvb(lp);
491}
492
493void gdlm_submit_delayed(struct gdlm_ls *ls)
494{
495 struct gdlm_lock *lp, *safe;
496
497 spin_lock(&ls->async_lock);
498 list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
499 list_del_init(&lp->delay_list);
500 list_add_tail(&lp->delay_list, &ls->submit);
501 }
502 spin_unlock(&ls->async_lock);
503 wake_up(&ls->thread_wait);
504}
505
506int gdlm_release_all_locks(struct gdlm_ls *ls)
507{
508 struct gdlm_lock *lp, *safe;
509 int count = 0;
510
511 spin_lock(&ls->async_lock);
512 list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
513 list_del_init(&lp->all_list);
514
515 if (lp->lvb && lp->lvb != junk_lvb)
516 kfree(lp->lvb);
517 kfree(lp);
518 count++;
519 }
520 spin_unlock(&ls->async_lock);
521
522 return count;
523}
524
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
new file mode 100644
index 000000000000..33af707a4d3f
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -0,0 +1,187 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef LOCK_DLM_DOT_H
11#define LOCK_DLM_DOT_H
12
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/string.h>
19#include <linux/list.h>
20#include <linux/socket.h>
21#include <linux/delay.h>
22#include <linux/kthread.h>
23#include <linux/kobject.h>
24#include <linux/fcntl.h>
25#include <linux/wait.h>
26#include <net/sock.h>
27
28#include <linux/dlm.h>
29#include <linux/lm_interface.h>
30
31/*
32 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
33 * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
34 * as "lock_dlm".
35 */
36
37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 50000
40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128
42
43/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
44 We sprintf these numbers into a 24 byte string of hex values to make them
45 human-readable (to make debugging simpler.) */
46
47struct gdlm_strname {
48 unsigned char name[GDLM_STRNAME_BYTES];
49 unsigned short namelen;
50};
51
52enum {
53 DFL_BLOCK_LOCKS = 0,
54 DFL_SPECTATOR = 1,
55 DFL_WITHDRAW = 2,
56};
57
58struct gdlm_ls {
59 u32 id;
60 int jid;
61 int first;
62 int first_done;
63 unsigned long flags;
64 struct kobject kobj;
65 char clustername[GDLM_NAME_LEN];
66 char fsname[GDLM_NAME_LEN];
67 int fsflags;
68 dlm_lockspace_t *dlm_lockspace;
69 lm_callback_t fscb;
70 struct gfs2_sbd *sdp;
71 int recover_jid;
72 int recover_jid_done;
73 int recover_jid_status;
74 spinlock_t async_lock;
75 struct list_head complete;
76 struct list_head blocking;
77 struct list_head delayed;
78 struct list_head submit;
79 struct list_head all_locks;
80 u32 all_locks_count;
81 wait_queue_head_t wait_control;
82 struct task_struct *thread1;
83 struct task_struct *thread2;
84 wait_queue_head_t thread_wait;
85 unsigned long drop_time;
86 int drop_locks_count;
87 int drop_locks_period;
88};
89
90enum {
91 LFL_NOBLOCK = 0,
92 LFL_NOCACHE = 1,
93 LFL_DLM_UNLOCK = 2,
94 LFL_DLM_CANCEL = 3,
95 LFL_SYNC_LVB = 4,
96 LFL_FORCE_PROMOTE = 5,
97 LFL_REREQUEST = 6,
98 LFL_ACTIVE = 7,
99 LFL_INLOCK = 8,
100 LFL_CANCEL = 9,
101 LFL_NOBAST = 10,
102 LFL_HEADQUE = 11,
103 LFL_UNLOCK_DELETE = 12,
104};
105
106struct gdlm_lock {
107 struct gdlm_ls *ls;
108 struct lm_lockname lockname;
109 char *lvb;
110 struct dlm_lksb lksb;
111
112 s16 cur;
113 s16 req;
114 s16 prev_req;
115 u32 lkf; /* dlm flags DLM_LKF_ */
116 unsigned long flags; /* lock_dlm flags LFL_ */
117
118 int bast_mode; /* protected by async_lock */
119 struct completion ast_wait;
120
121 struct list_head clist; /* complete */
122 struct list_head blist; /* blocking */
123 struct list_head delay_list; /* delayed */
124 struct list_head all_list; /* all locks for the fs */
125 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
126};
127
128#define gdlm_assert(assertion, fmt, args...) \
129do { \
130 if (unlikely(!(assertion))) { \
131 printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
132 "lock_dlm: " fmt "\n", \
133 #assertion, ##args); \
134 BUG(); \
135 } \
136} while (0)
137
138#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
139#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
140#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
141#ifdef LOCK_DLM_LOG_DEBUG
142#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
143#else
144#define log_debug(fmt, arg...)
145#endif
146
147/* sysfs.c */
148
149int gdlm_sysfs_init(void);
150void gdlm_sysfs_exit(void);
151int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
152void gdlm_kobject_release(struct gdlm_ls *);
153
154/* thread.c */
155
156int gdlm_init_threads(struct gdlm_ls *);
157void gdlm_release_threads(struct gdlm_ls *);
158
159/* lock.c */
160
161s16 gdlm_make_lmstate(s16);
162void gdlm_queue_delayed(struct gdlm_lock *);
163void gdlm_submit_delayed(struct gdlm_ls *);
164int gdlm_release_all_locks(struct gdlm_ls *);
165void gdlm_delete_lp(struct gdlm_lock *);
166unsigned int gdlm_do_lock(struct gdlm_lock *);
167
168int gdlm_get_lock(void *, struct lm_lockname *, void **);
169void gdlm_put_lock(void *);
170unsigned int gdlm_lock(void *, unsigned int, unsigned int, unsigned int);
171unsigned int gdlm_unlock(void *, unsigned int);
172void gdlm_cancel(void *);
173int gdlm_hold_lvb(void *, char **);
174void gdlm_unhold_lvb(void *, char *);
175
176/* plock.c */
177
178int gdlm_plock_init(void);
179void gdlm_plock_exit(void);
180int gdlm_plock(void *, struct lm_lockname *, struct file *, int,
181 struct file_lock *);
182int gdlm_plock_get(void *, struct lm_lockname *, struct file *,
183 struct file_lock *);
184int gdlm_punlock(void *, struct lm_lockname *, struct file *,
185 struct file_lock *);
186#endif
187
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
new file mode 100644
index 000000000000..2194b1d5b5ec
--- /dev/null
+++ b/fs/gfs2/locking/dlm/main.c
@@ -0,0 +1,64 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/init.h>
11
12#include "lock_dlm.h"
13
14extern int gdlm_drop_count;
15extern int gdlm_drop_period;
16
17extern struct lm_lockops gdlm_ops;
18
19static int __init init_lock_dlm(void)
20{
21 int error;
22
23 error = gfs2_register_lockproto(&gdlm_ops);
24 if (error) {
25 printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
26 error);
27 return error;
28 }
29
30 error = gdlm_sysfs_init();
31 if (error) {
32 gfs2_unregister_lockproto(&gdlm_ops);
33 return error;
34 }
35
36 error = gdlm_plock_init();
37 if (error) {
38 gdlm_sysfs_exit();
39 gfs2_unregister_lockproto(&gdlm_ops);
40 return error;
41 }
42
43 gdlm_drop_count = GDLM_DROP_COUNT;
44 gdlm_drop_period = GDLM_DROP_PERIOD;
45
46 printk(KERN_INFO
47 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
48 return 0;
49}
50
51static void __exit exit_lock_dlm(void)
52{
53 gdlm_plock_exit();
54 gdlm_sysfs_exit();
55 gfs2_unregister_lockproto(&gdlm_ops);
56}
57
58module_init(init_lock_dlm);
59module_exit(exit_lock_dlm);
60
61MODULE_DESCRIPTION("GFS DLM Locking Module");
62MODULE_AUTHOR("Red Hat, Inc.");
63MODULE_LICENSE("GPL");
64
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
new file mode 100644
index 000000000000..1f94dd35a943
--- /dev/null
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -0,0 +1,255 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12int gdlm_drop_count;
13int gdlm_drop_period;
14const struct lm_lockops gdlm_ops;
15
16
17static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
18 int flags, char *table_name)
19{
20 struct gdlm_ls *ls;
21 char buf[256], *p;
22
23 ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
24 if (!ls)
25 return NULL;
26
27 ls->drop_locks_count = gdlm_drop_count;
28 ls->drop_locks_period = gdlm_drop_period;
29 ls->fscb = cb;
30 ls->sdp = sdp;
31 ls->fsflags = flags;
32 spin_lock_init(&ls->async_lock);
33 INIT_LIST_HEAD(&ls->complete);
34 INIT_LIST_HEAD(&ls->blocking);
35 INIT_LIST_HEAD(&ls->delayed);
36 INIT_LIST_HEAD(&ls->submit);
37 INIT_LIST_HEAD(&ls->all_locks);
38 init_waitqueue_head(&ls->thread_wait);
39 init_waitqueue_head(&ls->wait_control);
40 ls->thread1 = NULL;
41 ls->thread2 = NULL;
42 ls->drop_time = jiffies;
43 ls->jid = -1;
44
45 strncpy(buf, table_name, 256);
46 buf[255] = '\0';
47
48 p = strstr(buf, ":");
49 if (!p) {
50 log_info("invalid table_name \"%s\"", table_name);
51 kfree(ls);
52 return NULL;
53 }
54 *p = '\0';
55 p++;
56
57 strncpy(ls->clustername, buf, GDLM_NAME_LEN);
58 strncpy(ls->fsname, p, GDLM_NAME_LEN);
59
60 return ls;
61}
62
63static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
64{
65 char data[256];
66 char *options, *x, *y;
67 int error = 0;
68
69 memset(data, 0, 256);
70 strncpy(data, data_arg, 255);
71
72 for (options = data; (x = strsep(&options, ":")); ) {
73 if (!*x)
74 continue;
75
76 y = strchr(x, '=');
77 if (y)
78 *y++ = 0;
79
80 if (!strcmp(x, "jid")) {
81 if (!y) {
82 log_error("need argument to jid");
83 error = -EINVAL;
84 break;
85 }
86 sscanf(y, "%u", &ls->jid);
87
88 } else if (!strcmp(x, "first")) {
89 if (!y) {
90 log_error("need argument to first");
91 error = -EINVAL;
92 break;
93 }
94 sscanf(y, "%u", &ls->first);
95
96 } else if (!strcmp(x, "id")) {
97 if (!y) {
98 log_error("need argument to id");
99 error = -EINVAL;
100 break;
101 }
102 sscanf(y, "%u", &ls->id);
103
104 } else if (!strcmp(x, "nodir")) {
105 if (!y) {
106 log_error("need argument to nodir");
107 error = -EINVAL;
108 break;
109 }
110 sscanf(y, "%u", nodir);
111
112 } else {
113 log_error("unkonwn option: %s", x);
114 error = -EINVAL;
115 break;
116 }
117 }
118
119 return error;
120}
121
122static int gdlm_mount(char *table_name, char *host_data,
123 lm_callback_t cb, void *cb_data,
124 unsigned int min_lvb_size, int flags,
125 struct lm_lockstruct *lockstruct,
126 struct kobject *fskobj)
127{
128 struct gdlm_ls *ls;
129 int error = -ENOMEM, nodir = 0;
130
131 if (min_lvb_size > GDLM_LVB_SIZE)
132 goto out;
133
134 ls = init_gdlm(cb, cb_data, flags, table_name);
135 if (!ls)
136 goto out;
137
138 error = make_args(ls, host_data, &nodir);
139 if (error)
140 goto out;
141
142 error = gdlm_init_threads(ls);
143 if (error)
144 goto out_free;
145
146 error = gdlm_kobject_setup(ls, fskobj);
147 if (error)
148 goto out_thread;
149
150 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
151 &ls->dlm_lockspace,
152 nodir ? DLM_LSFL_NODIR : 0,
153 GDLM_LVB_SIZE);
154 if (error) {
155 log_error("dlm_new_lockspace error %d", error);
156 goto out_kobj;
157 }
158
159 lockstruct->ls_jid = ls->jid;
160 lockstruct->ls_first = ls->first;
161 lockstruct->ls_lockspace = ls;
162 lockstruct->ls_ops = &gdlm_ops;
163 lockstruct->ls_flags = 0;
164 lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
165 return 0;
166
167out_kobj:
168 gdlm_kobject_release(ls);
169out_thread:
170 gdlm_release_threads(ls);
171out_free:
172 kfree(ls);
173out:
174 return error;
175}
176
177static void gdlm_unmount(void *lockspace)
178{
179 struct gdlm_ls *ls = lockspace;
180 int rv;
181
182 log_debug("unmount flags %lx", ls->flags);
183
184 /* FIXME: serialize unmount and withdraw in case they
185 happen at once. Also, if unmount follows withdraw,
186 wait for withdraw to finish. */
187
188 if (test_bit(DFL_WITHDRAW, &ls->flags))
189 goto out;
190
191 gdlm_kobject_release(ls);
192 dlm_release_lockspace(ls->dlm_lockspace, 2);
193 gdlm_release_threads(ls);
194 rv = gdlm_release_all_locks(ls);
195 if (rv)
196 log_info("gdlm_unmount: %d stray locks freed", rv);
197out:
198 kfree(ls);
199}
200
201static void gdlm_recovery_done(void *lockspace, unsigned int jid,
202 unsigned int message)
203{
204 struct gdlm_ls *ls = lockspace;
205 ls->recover_jid_done = jid;
206 ls->recover_jid_status = message;
207 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
208}
209
210static void gdlm_others_may_mount(void *lockspace)
211{
212 struct gdlm_ls *ls = lockspace;
213 ls->first_done = 1;
214 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
215}
216
217/* Userspace gets the offline uevent, blocks new gfs locks on
218 other mounters, and lets us know (sets WITHDRAW flag). Then,
219 userspace leaves the mount group while we leave the lockspace. */
220
221static void gdlm_withdraw(void *lockspace)
222{
223 struct gdlm_ls *ls = lockspace;
224
225 kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
226
227 wait_event_interruptible(ls->wait_control,
228 test_bit(DFL_WITHDRAW, &ls->flags));
229
230 dlm_release_lockspace(ls->dlm_lockspace, 2);
231 gdlm_release_threads(ls);
232 gdlm_release_all_locks(ls);
233 gdlm_kobject_release(ls);
234}
235
236const struct lm_lockops gdlm_ops = {
237 .lm_proto_name = "lock_dlm",
238 .lm_mount = gdlm_mount,
239 .lm_others_may_mount = gdlm_others_may_mount,
240 .lm_unmount = gdlm_unmount,
241 .lm_withdraw = gdlm_withdraw,
242 .lm_get_lock = gdlm_get_lock,
243 .lm_put_lock = gdlm_put_lock,
244 .lm_lock = gdlm_lock,
245 .lm_unlock = gdlm_unlock,
246 .lm_plock = gdlm_plock,
247 .lm_punlock = gdlm_punlock,
248 .lm_plock_get = gdlm_plock_get,
249 .lm_cancel = gdlm_cancel,
250 .lm_hold_lvb = gdlm_hold_lvb,
251 .lm_unhold_lvb = gdlm_unhold_lvb,
252 .lm_recovery_done = gdlm_recovery_done,
253 .lm_owner = THIS_MODULE,
254};
255
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
new file mode 100644
index 000000000000..7365aec9511b
--- /dev/null
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -0,0 +1,301 @@
1/*
2 * Copyright (C) 2005 Red Hat, Inc. All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License version 2.
7 */
8
9#include <linux/miscdevice.h>
10#include <linux/lock_dlm_plock.h>
11
12#include "lock_dlm.h"
13
14
15static spinlock_t ops_lock;
16static struct list_head send_list;
17static struct list_head recv_list;
18static wait_queue_head_t send_wq;
19static wait_queue_head_t recv_wq;
20
21struct plock_op {
22 struct list_head list;
23 int done;
24 struct gdlm_plock_info info;
25};
26
27static inline void set_version(struct gdlm_plock_info *info)
28{
29 info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
30 info->version[1] = GDLM_PLOCK_VERSION_MINOR;
31 info->version[2] = GDLM_PLOCK_VERSION_PATCH;
32}
33
34static int check_version(struct gdlm_plock_info *info)
35{
36 if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
37 (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
38 log_error("plock device version mismatch: "
39 "kernel (%u.%u.%u), user (%u.%u.%u)",
40 GDLM_PLOCK_VERSION_MAJOR,
41 GDLM_PLOCK_VERSION_MINOR,
42 GDLM_PLOCK_VERSION_PATCH,
43 info->version[0],
44 info->version[1],
45 info->version[2]);
46 return -EINVAL;
47 }
48 return 0;
49}
50
51static void send_op(struct plock_op *op)
52{
53 set_version(&op->info);
54 INIT_LIST_HEAD(&op->list);
55 spin_lock(&ops_lock);
56 list_add_tail(&op->list, &send_list);
57 spin_unlock(&ops_lock);
58 wake_up(&send_wq);
59}
60
61int gdlm_plock(void *lockspace, struct lm_lockname *name,
62 struct file *file, int cmd, struct file_lock *fl)
63{
64 struct gdlm_ls *ls = lockspace;
65 struct plock_op *op;
66 int rv;
67
68 op = kzalloc(sizeof(*op), GFP_KERNEL);
69 if (!op)
70 return -ENOMEM;
71
72 op->info.optype = GDLM_PLOCK_OP_LOCK;
73 op->info.pid = fl->fl_pid;
74 op->info.ex = (fl->fl_type == F_WRLCK);
75 op->info.wait = IS_SETLKW(cmd);
76 op->info.fsid = ls->id;
77 op->info.number = name->ln_number;
78 op->info.start = fl->fl_start;
79 op->info.end = fl->fl_end;
80 op->info.owner = (__u64)(long) fl->fl_owner;
81
82 send_op(op);
83 wait_event(recv_wq, (op->done != 0));
84
85 spin_lock(&ops_lock);
86 if (!list_empty(&op->list)) {
87 printk(KERN_INFO "plock op on list\n");
88 list_del(&op->list);
89 }
90 spin_unlock(&ops_lock);
91
92 rv = op->info.rv;
93
94 if (!rv) {
95 if (posix_lock_file_wait(file, fl) < 0)
96 log_error("gdlm_plock: vfs lock error %x,%llx",
97 name->ln_type,
98 (unsigned long long)name->ln_number);
99 }
100
101 kfree(op);
102 return rv;
103}
104
105int gdlm_punlock(void *lockspace, struct lm_lockname *name,
106 struct file *file, struct file_lock *fl)
107{
108 struct gdlm_ls *ls = lockspace;
109 struct plock_op *op;
110 int rv;
111
112 op = kzalloc(sizeof(*op), GFP_KERNEL);
113 if (!op)
114 return -ENOMEM;
115
116 if (posix_lock_file_wait(file, fl) < 0)
117 log_error("gdlm_punlock: vfs unlock error %x,%llx",
118 name->ln_type, (unsigned long long)name->ln_number);
119
120 op->info.optype = GDLM_PLOCK_OP_UNLOCK;
121 op->info.pid = fl->fl_pid;
122 op->info.fsid = ls->id;
123 op->info.number = name->ln_number;
124 op->info.start = fl->fl_start;
125 op->info.end = fl->fl_end;
126 op->info.owner = (__u64)(long) fl->fl_owner;
127
128 send_op(op);
129 wait_event(recv_wq, (op->done != 0));
130
131 spin_lock(&ops_lock);
132 if (!list_empty(&op->list)) {
133 printk(KERN_INFO "punlock op on list\n");
134 list_del(&op->list);
135 }
136 spin_unlock(&ops_lock);
137
138 rv = op->info.rv;
139
140 kfree(op);
141 return rv;
142}
143
144int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
145 struct file *file, struct file_lock *fl)
146{
147 struct gdlm_ls *ls = lockspace;
148 struct plock_op *op;
149 int rv;
150
151 op = kzalloc(sizeof(*op), GFP_KERNEL);
152 if (!op)
153 return -ENOMEM;
154
155 op->info.optype = GDLM_PLOCK_OP_GET;
156 op->info.pid = fl->fl_pid;
157 op->info.ex = (fl->fl_type == F_WRLCK);
158 op->info.fsid = ls->id;
159 op->info.number = name->ln_number;
160 op->info.start = fl->fl_start;
161 op->info.end = fl->fl_end;
162
163 send_op(op);
164 wait_event(recv_wq, (op->done != 0));
165
166 spin_lock(&ops_lock);
167 if (!list_empty(&op->list)) {
168 printk(KERN_INFO "plock_get op on list\n");
169 list_del(&op->list);
170 }
171 spin_unlock(&ops_lock);
172
173 rv = op->info.rv;
174
175 if (rv == 0)
176 fl->fl_type = F_UNLCK;
177 else if (rv > 0) {
178 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
179 fl->fl_pid = op->info.pid;
180 fl->fl_start = op->info.start;
181 fl->fl_end = op->info.end;
182 }
183
184 kfree(op);
185 return rv;
186}
187
188/* a read copies out one plock request from the send list */
189static ssize_t dev_read(struct file *file, char __user *u, size_t count,
190 loff_t *ppos)
191{
192 struct gdlm_plock_info info;
193 struct plock_op *op = NULL;
194
195 if (count < sizeof(info))
196 return -EINVAL;
197
198 spin_lock(&ops_lock);
199 if (!list_empty(&send_list)) {
200 op = list_entry(send_list.next, struct plock_op, list);
201 list_move(&op->list, &recv_list);
202 memcpy(&info, &op->info, sizeof(info));
203 }
204 spin_unlock(&ops_lock);
205
206 if (!op)
207 return -EAGAIN;
208
209 if (copy_to_user(u, &info, sizeof(info)))
210 return -EFAULT;
211 return sizeof(info);
212}
213
214/* a write copies in one plock result that should match a plock_op
215 on the recv list */
216static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
217 loff_t *ppos)
218{
219 struct gdlm_plock_info info;
220 struct plock_op *op;
221 int found = 0;
222
223 if (count != sizeof(info))
224 return -EINVAL;
225
226 if (copy_from_user(&info, u, sizeof(info)))
227 return -EFAULT;
228
229 if (check_version(&info))
230 return -EINVAL;
231
232 spin_lock(&ops_lock);
233 list_for_each_entry(op, &recv_list, list) {
234 if (op->info.fsid == info.fsid && op->info.number == info.number &&
235 op->info.owner == info.owner) {
236 list_del_init(&op->list);
237 found = 1;
238 op->done = 1;
239 memcpy(&op->info, &info, sizeof(info));
240 break;
241 }
242 }
243 spin_unlock(&ops_lock);
244
245 if (found)
246 wake_up(&recv_wq);
247 else
248 printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
249 (unsigned long long)info.number);
250 return count;
251}
252
253static unsigned int dev_poll(struct file *file, poll_table *wait)
254{
255 poll_wait(file, &send_wq, wait);
256
257 spin_lock(&ops_lock);
258 if (!list_empty(&send_list)) {
259 spin_unlock(&ops_lock);
260 return POLLIN | POLLRDNORM;
261 }
262 spin_unlock(&ops_lock);
263 return 0;
264}
265
266static struct file_operations dev_fops = {
267 .read = dev_read,
268 .write = dev_write,
269 .poll = dev_poll,
270 .owner = THIS_MODULE
271};
272
273static struct miscdevice plock_dev_misc = {
274 .minor = MISC_DYNAMIC_MINOR,
275 .name = GDLM_PLOCK_MISC_NAME,
276 .fops = &dev_fops
277};
278
279int gdlm_plock_init(void)
280{
281 int rv;
282
283 spin_lock_init(&ops_lock);
284 INIT_LIST_HEAD(&send_list);
285 INIT_LIST_HEAD(&recv_list);
286 init_waitqueue_head(&send_wq);
287 init_waitqueue_head(&recv_wq);
288
289 rv = misc_register(&plock_dev_misc);
290 if (rv)
291 printk(KERN_INFO "gdlm_plock_init: misc_register failed %d",
292 rv);
293 return rv;
294}
295
296void gdlm_plock_exit(void)
297{
298 if (misc_deregister(&plock_dev_misc) < 0)
299 printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed");
300}
301
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
new file mode 100644
index 000000000000..29ae06f94944
--- /dev/null
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -0,0 +1,226 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/ctype.h>
11#include <linux/stat.h>
12
13#include "lock_dlm.h"
14
15extern struct lm_lockops gdlm_ops;
16
17static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
18{
19 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
20}
21
22static ssize_t block_show(struct gdlm_ls *ls, char *buf)
23{
24 ssize_t ret;
25 int val = 0;
26
27 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
28 val = 1;
29 ret = sprintf(buf, "%d\n", val);
30 return ret;
31}
32
33static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
34{
35 ssize_t ret = len;
36 int val;
37
38 val = simple_strtol(buf, NULL, 0);
39
40 if (val == 1)
41 set_bit(DFL_BLOCK_LOCKS, &ls->flags);
42 else if (val == 0) {
43 clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
44 gdlm_submit_delayed(ls);
45 } else {
46 ret = -EINVAL;
47 }
48 return ret;
49}
50
51static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
52{
53 ssize_t ret;
54 int val = 0;
55
56 if (test_bit(DFL_WITHDRAW, &ls->flags))
57 val = 1;
58 ret = sprintf(buf, "%d\n", val);
59 return ret;
60}
61
62static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
63{
64 ssize_t ret = len;
65 int val;
66
67 val = simple_strtol(buf, NULL, 0);
68
69 if (val == 1)
70 set_bit(DFL_WITHDRAW, &ls->flags);
71 else
72 ret = -EINVAL;
73 wake_up(&ls->wait_control);
74 return ret;
75}
76
77static ssize_t id_show(struct gdlm_ls *ls, char *buf)
78{
79 return sprintf(buf, "%u\n", ls->id);
80}
81
82static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
83{
84 return sprintf(buf, "%d\n", ls->jid);
85}
86
87static ssize_t first_show(struct gdlm_ls *ls, char *buf)
88{
89 return sprintf(buf, "%d\n", ls->first);
90}
91
92static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
93{
94 return sprintf(buf, "%d\n", ls->first_done);
95}
96
97static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
98{
99 return sprintf(buf, "%d\n", ls->recover_jid);
100}
101
102static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
103{
104 ls->recover_jid = simple_strtol(buf, NULL, 0);
105 ls->fscb(ls->sdp, LM_CB_NEED_RECOVERY, &ls->recover_jid);
106 return len;
107}
108
109static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
110{
111 return sprintf(buf, "%d\n", ls->recover_jid_done);
112}
113
114static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
115{
116 return sprintf(buf, "%d\n", ls->recover_jid_status);
117}
118
119struct gdlm_attr {
120 struct attribute attr;
121 ssize_t (*show)(struct gdlm_ls *, char *);
122 ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
123};
124
125#define GDLM_ATTR(_name,_mode,_show,_store) \
126static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
127
128GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
129GDLM_ATTR(block, 0644, block_show, block_store);
130GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
131GDLM_ATTR(id, 0444, id_show, NULL);
132GDLM_ATTR(jid, 0444, jid_show, NULL);
133GDLM_ATTR(first, 0444, first_show, NULL);
134GDLM_ATTR(first_done, 0444, first_done_show, NULL);
135GDLM_ATTR(recover, 0644, recover_show, recover_store);
136GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
137GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
138
139static struct attribute *gdlm_attrs[] = {
140 &gdlm_attr_proto_name.attr,
141 &gdlm_attr_block.attr,
142 &gdlm_attr_withdraw.attr,
143 &gdlm_attr_id.attr,
144 &gdlm_attr_jid.attr,
145 &gdlm_attr_first.attr,
146 &gdlm_attr_first_done.attr,
147 &gdlm_attr_recover.attr,
148 &gdlm_attr_recover_done.attr,
149 &gdlm_attr_recover_status.attr,
150 NULL,
151};
152
153static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
154 char *buf)
155{
156 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
157 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
158 return a->show ? a->show(ls, buf) : 0;
159}
160
161static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
162 const char *buf, size_t len)
163{
164 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
165 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
166 return a->store ? a->store(ls, buf, len) : len;
167}
168
169static struct sysfs_ops gdlm_attr_ops = {
170 .show = gdlm_attr_show,
171 .store = gdlm_attr_store,
172};
173
174static struct kobj_type gdlm_ktype = {
175 .default_attrs = gdlm_attrs,
176 .sysfs_ops = &gdlm_attr_ops,
177};
178
179static struct kset gdlm_kset = {
180 .subsys = &kernel_subsys,
181 .kobj = {.name = "lock_dlm",},
182 .ktype = &gdlm_ktype,
183};
184
185int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
186{
187 int error;
188
189 error = kobject_set_name(&ls->kobj, "%s", "lock_module");
190 if (error) {
191 log_error("can't set kobj name %d", error);
192 return error;
193 }
194
195 ls->kobj.kset = &gdlm_kset;
196 ls->kobj.ktype = &gdlm_ktype;
197 ls->kobj.parent = fskobj;
198
199 error = kobject_register(&ls->kobj);
200 if (error)
201 log_error("can't register kobj %d", error);
202
203 return error;
204}
205
206void gdlm_kobject_release(struct gdlm_ls *ls)
207{
208 kobject_unregister(&ls->kobj);
209}
210
211int gdlm_sysfs_init(void)
212{
213 int error;
214
215 error = kset_register(&gdlm_kset);
216 if (error)
217 printk("lock_dlm: cannot register kset %d\n", error);
218
219 return error;
220}
221
222void gdlm_sysfs_exit(void)
223{
224 kset_unregister(&gdlm_kset);
225}
226
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
new file mode 100644
index 000000000000..9cf1f168eaf8
--- /dev/null
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -0,0 +1,359 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
13 thread gets to it. */
14
15static void queue_submit(struct gdlm_lock *lp)
16{
17 struct gdlm_ls *ls = lp->ls;
18
19 spin_lock(&ls->async_lock);
20 list_add_tail(&lp->delay_list, &ls->submit);
21 spin_unlock(&ls->async_lock);
22 wake_up(&ls->thread_wait);
23}
24
25static void process_blocking(struct gdlm_lock *lp, int bast_mode)
26{
27 struct gdlm_ls *ls = lp->ls;
28 unsigned int cb = 0;
29
30 switch (gdlm_make_lmstate(bast_mode)) {
31 case LM_ST_EXCLUSIVE:
32 cb = LM_CB_NEED_E;
33 break;
34 case LM_ST_DEFERRED:
35 cb = LM_CB_NEED_D;
36 break;
37 case LM_ST_SHARED:
38 cb = LM_CB_NEED_S;
39 break;
40 default:
41 gdlm_assert(0, "unknown bast mode %u", lp->bast_mode);
42 }
43
44 ls->fscb(ls->sdp, cb, &lp->lockname);
45}
46
47static void process_complete(struct gdlm_lock *lp)
48{
49 struct gdlm_ls *ls = lp->ls;
50 struct lm_async_cb acb;
51 s16 prev_mode = lp->cur;
52
53 memset(&acb, 0, sizeof(acb));
54
55 if (lp->lksb.sb_status == -DLM_ECANCEL) {
56 log_info("complete dlm cancel %x,%llx flags %lx",
57 lp->lockname.ln_type,
58 (unsigned long long)lp->lockname.ln_number,
59 lp->flags);
60
61 lp->req = lp->cur;
62 acb.lc_ret |= LM_OUT_CANCELED;
63 if (lp->cur == DLM_LOCK_IV)
64 lp->lksb.sb_lkid = 0;
65 goto out;
66 }
67
68 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
69 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
70 log_info("unlock sb_status %d %x,%llx flags %lx",
71 lp->lksb.sb_status, lp->lockname.ln_type,
72 (unsigned long long)lp->lockname.ln_number,
73 lp->flags);
74 return;
75 }
76
77 lp->cur = DLM_LOCK_IV;
78 lp->req = DLM_LOCK_IV;
79 lp->lksb.sb_lkid = 0;
80
81 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
82 gdlm_delete_lp(lp);
83 return;
84 }
85 goto out;
86 }
87
88 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
89 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
90
91 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
92 if (lp->req == DLM_LOCK_PR)
93 lp->req = DLM_LOCK_CW;
94 else if (lp->req == DLM_LOCK_CW)
95 lp->req = DLM_LOCK_PR;
96 }
97
98 /*
99 * A canceled lock request. The lock was just taken off the delayed
100 * list and was never even submitted to dlm.
101 */
102
103 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
104 log_info("complete internal cancel %x,%llx",
105 lp->lockname.ln_type,
106 (unsigned long long)lp->lockname.ln_number);
107 lp->req = lp->cur;
108 acb.lc_ret |= LM_OUT_CANCELED;
109 goto out;
110 }
111
112 /*
113 * An error occured.
114 */
115
116 if (lp->lksb.sb_status) {
117 /* a "normal" error */
118 if ((lp->lksb.sb_status == -EAGAIN) &&
119 (lp->lkf & DLM_LKF_NOQUEUE)) {
120 lp->req = lp->cur;
121 if (lp->cur == DLM_LOCK_IV)
122 lp->lksb.sb_lkid = 0;
123 goto out;
124 }
125
126 /* this could only happen with cancels I think */
127 log_info("ast sb_status %d %x,%llx flags %lx",
128 lp->lksb.sb_status, lp->lockname.ln_type,
129 (unsigned long long)lp->lockname.ln_number,
130 lp->flags);
131 return;
132 }
133
134 /*
135 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
136 */
137
138 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
139 complete(&lp->ast_wait);
140 return;
141 }
142
143 /*
144 * A lock has been demoted to NL because it initially completed during
145 * BLOCK_LOCKS. Now it must be requested in the originally requested
146 * mode.
147 */
148
149 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
150 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
151 lp->lockname.ln_type,
152 (unsigned long long)lp->lockname.ln_number);
153 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
154 lp->lockname.ln_type,
155 (unsigned long long)lp->lockname.ln_number);
156
157 lp->cur = DLM_LOCK_NL;
158 lp->req = lp->prev_req;
159 lp->prev_req = DLM_LOCK_IV;
160 lp->lkf &= ~DLM_LKF_CONVDEADLK;
161
162 set_bit(LFL_NOCACHE, &lp->flags);
163
164 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
165 !test_bit(LFL_NOBLOCK, &lp->flags))
166 gdlm_queue_delayed(lp);
167 else
168 queue_submit(lp);
169 return;
170 }
171
172 /*
173 * A request is granted during dlm recovery. It may be granted
174 * because the locks of a failed node were cleared. In that case,
175 * there may be inconsistent data beneath this lock and we must wait
176 * for recovery to complete to use it. When gfs recovery is done this
177 * granted lock will be converted to NL and then reacquired in this
178 * granted state.
179 */
180
181 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
182 !test_bit(LFL_NOBLOCK, &lp->flags) &&
183 lp->req != DLM_LOCK_NL) {
184
185 lp->cur = lp->req;
186 lp->prev_req = lp->req;
187 lp->req = DLM_LOCK_NL;
188 lp->lkf |= DLM_LKF_CONVERT;
189 lp->lkf &= ~DLM_LKF_CONVDEADLK;
190
191 log_debug("rereq %x,%llx id %x %d,%d",
192 lp->lockname.ln_type,
193 (unsigned long long)lp->lockname.ln_number,
194 lp->lksb.sb_lkid, lp->cur, lp->req);
195
196 set_bit(LFL_REREQUEST, &lp->flags);
197 queue_submit(lp);
198 return;
199 }
200
201 /*
202 * DLM demoted the lock to NL before it was granted so GFS must be
203 * told it cannot cache data for this lock.
204 */
205
206 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
207 set_bit(LFL_NOCACHE, &lp->flags);
208
209out:
210 /*
211 * This is an internal lock_dlm lock
212 */
213
214 if (test_bit(LFL_INLOCK, &lp->flags)) {
215 clear_bit(LFL_NOBLOCK, &lp->flags);
216 lp->cur = lp->req;
217 complete(&lp->ast_wait);
218 return;
219 }
220
221 /*
222 * Normal completion of a lock request. Tell GFS it now has the lock.
223 */
224
225 clear_bit(LFL_NOBLOCK, &lp->flags);
226 lp->cur = lp->req;
227
228 acb.lc_name = lp->lockname;
229 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
230
231 if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
232 (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
233 acb.lc_ret |= LM_OUT_CACHEABLE;
234
235 ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
236}
237
238static inline int no_work(struct gdlm_ls *ls, int blocking)
239{
240 int ret;
241
242 spin_lock(&ls->async_lock);
243 ret = list_empty(&ls->complete) && list_empty(&ls->submit);
244 if (ret && blocking)
245 ret = list_empty(&ls->blocking);
246 spin_unlock(&ls->async_lock);
247
248 return ret;
249}
250
251static inline int check_drop(struct gdlm_ls *ls)
252{
253 if (!ls->drop_locks_count)
254 return 0;
255
256 if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
257 ls->drop_time = jiffies;
258 if (ls->all_locks_count >= ls->drop_locks_count)
259 return 1;
260 }
261 return 0;
262}
263
264static int gdlm_thread(void *data)
265{
266 struct gdlm_ls *ls = (struct gdlm_ls *) data;
267 struct gdlm_lock *lp = NULL;
268 int blist = 0;
269 uint8_t complete, blocking, submit, drop;
270 DECLARE_WAITQUEUE(wait, current);
271
272 /* Only thread1 is allowed to do blocking callbacks since gfs
273 may wait for a completion callback within a blocking cb. */
274
275 if (current == ls->thread1)
276 blist = 1;
277
278 while (!kthread_should_stop()) {
279 set_current_state(TASK_INTERRUPTIBLE);
280 add_wait_queue(&ls->thread_wait, &wait);
281 if (no_work(ls, blist))
282 schedule();
283 remove_wait_queue(&ls->thread_wait, &wait);
284 set_current_state(TASK_RUNNING);
285
286 complete = blocking = submit = drop = 0;
287
288 spin_lock(&ls->async_lock);
289
290 if (blist && !list_empty(&ls->blocking)) {
291 lp = list_entry(ls->blocking.next, struct gdlm_lock,
292 blist);
293 list_del_init(&lp->blist);
294 blocking = lp->bast_mode;
295 lp->bast_mode = 0;
296 } else if (!list_empty(&ls->complete)) {
297 lp = list_entry(ls->complete.next, struct gdlm_lock,
298 clist);
299 list_del_init(&lp->clist);
300 complete = 1;
301 } else if (!list_empty(&ls->submit)) {
302 lp = list_entry(ls->submit.next, struct gdlm_lock,
303 delay_list);
304 list_del_init(&lp->delay_list);
305 submit = 1;
306 }
307
308 drop = check_drop(ls);
309 spin_unlock(&ls->async_lock);
310
311 if (complete)
312 process_complete(lp);
313
314 else if (blocking)
315 process_blocking(lp, blocking);
316
317 else if (submit)
318 gdlm_do_lock(lp);
319
320 if (drop)
321 ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL);
322
323 schedule();
324 }
325
326 return 0;
327}
328
329int gdlm_init_threads(struct gdlm_ls *ls)
330{
331 struct task_struct *p;
332 int error;
333
334 p = kthread_run(gdlm_thread, ls, "lock_dlm1");
335 error = IS_ERR(p);
336 if (error) {
337 log_error("can't start lock_dlm1 thread %d", error);
338 return error;
339 }
340 ls->thread1 = p;
341
342 p = kthread_run(gdlm_thread, ls, "lock_dlm2");
343 error = IS_ERR(p);
344 if (error) {
345 log_error("can't start lock_dlm2 thread %d", error);
346 kthread_stop(ls->thread1);
347 return error;
348 }
349 ls->thread2 = p;
350
351 return 0;
352}
353
354void gdlm_release_threads(struct gdlm_ls *ls)
355{
356 kthread_stop(ls->thread1);
357 kthread_stop(ls->thread2);
358}
359
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile
new file mode 100644
index 000000000000..35e9730bc3a8
--- /dev/null
+++ b/fs/gfs2/locking/nolock/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o
2lock_nolock-y := main.o
3
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
new file mode 100644
index 000000000000..acfbc941f319
--- /dev/null
+++ b/fs/gfs2/locking/nolock/main.c
@@ -0,0 +1,246 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/types.h>
15#include <linux/fs.h>
16#include <linux/smp_lock.h>
17#include <linux/lm_interface.h>
18
19struct nolock_lockspace {
20 unsigned int nl_lvb_size;
21};
22
23static const struct lm_lockops nolock_ops;
24
25static int nolock_mount(char *table_name, char *host_data,
26 lm_callback_t cb, void *cb_data,
27 unsigned int min_lvb_size, int flags,
28 struct lm_lockstruct *lockstruct,
29 struct kobject *fskobj)
30{
31 char *c;
32 unsigned int jid;
33 struct nolock_lockspace *nl;
34
35 c = strstr(host_data, "jid=");
36 if (!c)
37 jid = 0;
38 else {
39 c += 4;
40 sscanf(c, "%u", &jid);
41 }
42
43 nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
44 if (!nl)
45 return -ENOMEM;
46
47 nl->nl_lvb_size = min_lvb_size;
48
49 lockstruct->ls_jid = jid;
50 lockstruct->ls_first = 1;
51 lockstruct->ls_lvb_size = min_lvb_size;
52 lockstruct->ls_lockspace = nl;
53 lockstruct->ls_ops = &nolock_ops;
54 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
55
56 return 0;
57}
58
59static void nolock_others_may_mount(void *lockspace)
60{
61}
62
63static void nolock_unmount(void *lockspace)
64{
65 struct nolock_lockspace *nl = lockspace;
66 kfree(nl);
67}
68
69static void nolock_withdraw(void *lockspace)
70{
71}
72
73/**
74 * nolock_get_lock - get a lm_lock_t given a descripton of the lock
75 * @lockspace: the lockspace the lock lives in
76 * @name: the name of the lock
77 * @lockp: return the lm_lock_t here
78 *
79 * Returns: 0 on success, -EXXX on failure
80 */
81
82static int nolock_get_lock(void *lockspace, struct lm_lockname *name,
83 void **lockp)
84{
85 *lockp = lockspace;
86 return 0;
87}
88
89/**
90 * nolock_put_lock - get rid of a lock structure
91 * @lock: the lock to throw away
92 *
93 */
94
95static void nolock_put_lock(void *lock)
96{
97}
98
99/**
100 * nolock_lock - acquire a lock
101 * @lock: the lock to manipulate
102 * @cur_state: the current state
103 * @req_state: the requested state
104 * @flags: modifier flags
105 *
106 * Returns: A bitmap of LM_OUT_*
107 */
108
109static unsigned int nolock_lock(void *lock, unsigned int cur_state,
110 unsigned int req_state, unsigned int flags)
111{
112 return req_state | LM_OUT_CACHEABLE;
113}
114
115/**
116 * nolock_unlock - unlock a lock
117 * @lock: the lock to manipulate
118 * @cur_state: the current state
119 *
120 * Returns: 0
121 */
122
123static unsigned int nolock_unlock(void *lock, unsigned int cur_state)
124{
125 return 0;
126}
127
128static void nolock_cancel(void *lock)
129{
130}
131
132/**
133 * nolock_hold_lvb - hold on to a lock value block
134 * @lock: the lock the LVB is associated with
135 * @lvbp: return the lm_lvb_t here
136 *
137 * Returns: 0 on success, -EXXX on failure
138 */
139
140static int nolock_hold_lvb(void *lock, char **lvbp)
141{
142 struct nolock_lockspace *nl = lock;
143 int error = 0;
144
145 *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL);
146 if (!*lvbp)
147 error = -ENOMEM;
148
149 return error;
150}
151
152/**
153 * nolock_unhold_lvb - release a LVB
154 * @lock: the lock the LVB is associated with
155 * @lvb: the lock value block
156 *
157 */
158
159static void nolock_unhold_lvb(void *lock, char *lvb)
160{
161 kfree(lvb);
162}
163
164static int nolock_plock_get(void *lockspace, struct lm_lockname *name,
165 struct file *file, struct file_lock *fl)
166{
167 struct file_lock tmp;
168 int ret;
169
170 ret = posix_test_lock(file, fl, &tmp);
171 fl->fl_type = F_UNLCK;
172 if (ret)
173 memcpy(fl, &tmp, sizeof(struct file_lock));
174
175 return 0;
176}
177
178static int nolock_plock(void *lockspace, struct lm_lockname *name,
179 struct file *file, int cmd, struct file_lock *fl)
180{
181 int error;
182 error = posix_lock_file_wait(file, fl);
183 return error;
184}
185
186static int nolock_punlock(void *lockspace, struct lm_lockname *name,
187 struct file *file, struct file_lock *fl)
188{
189 int error;
190 error = posix_lock_file_wait(file, fl);
191 return error;
192}
193
194static void nolock_recovery_done(void *lockspace, unsigned int jid,
195 unsigned int message)
196{
197}
198
199static const struct lm_lockops nolock_ops = {
200 .lm_proto_name = "lock_nolock",
201 .lm_mount = nolock_mount,
202 .lm_others_may_mount = nolock_others_may_mount,
203 .lm_unmount = nolock_unmount,
204 .lm_withdraw = nolock_withdraw,
205 .lm_get_lock = nolock_get_lock,
206 .lm_put_lock = nolock_put_lock,
207 .lm_lock = nolock_lock,
208 .lm_unlock = nolock_unlock,
209 .lm_cancel = nolock_cancel,
210 .lm_hold_lvb = nolock_hold_lvb,
211 .lm_unhold_lvb = nolock_unhold_lvb,
212 .lm_plock_get = nolock_plock_get,
213 .lm_plock = nolock_plock,
214 .lm_punlock = nolock_punlock,
215 .lm_recovery_done = nolock_recovery_done,
216 .lm_owner = THIS_MODULE,
217};
218
219static int __init init_nolock(void)
220{
221 int error;
222
223 error = gfs2_register_lockproto(&nolock_ops);
224 if (error) {
225 printk(KERN_WARNING
226 "lock_nolock: can't register protocol: %d\n", error);
227 return error;
228 }
229
230 printk(KERN_INFO
231 "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
232 return 0;
233}
234
235static void __exit exit_nolock(void)
236{
237 gfs2_unregister_lockproto(&nolock_ops);
238}
239
240module_init(init_nolock);
241module_exit(exit_nolock);
242
243MODULE_DESCRIPTION("GFS Nolock Locking Module");
244MODULE_AUTHOR("Red Hat, Inc.");
245MODULE_LICENSE("GPL");
246
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
new file mode 100644
index 000000000000..6112d648b7ee
--- /dev/null
+++ b/fs/gfs2/log.c
@@ -0,0 +1,578 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "log.h"
24#include "lops.h"
25#include "meta_io.h"
26#include "util.h"
27#include "dir.h"
28
29#define PULL 1
30
31/**
32 * gfs2_struct2blk - compute stuff
33 * @sdp: the filesystem
34 * @nstruct: the number of structures
35 * @ssize: the size of the structures
36 *
37 * Compute the number of log descriptor blocks needed to hold a certain number
38 * of structures of a certain size.
39 *
40 * Returns: the number of blocks needed (minimum is always 1)
41 */
42
43unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
44 unsigned int ssize)
45{
46 unsigned int blks;
47 unsigned int first, second;
48
49 blks = 1;
50 first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
51
52 if (nstruct > first) {
53 second = (sdp->sd_sb.sb_bsize -
54 sizeof(struct gfs2_meta_header)) / ssize;
55 blks += DIV_ROUND_UP(nstruct - first, second);
56 }
57
58 return blks;
59}
60
61void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
62{
63 struct list_head *head = &sdp->sd_ail1_list;
64 u64 sync_gen;
65 struct list_head *first;
66 struct gfs2_ail *first_ai, *ai, *tmp;
67 int done = 0;
68
69 gfs2_log_lock(sdp);
70 if (list_empty(head)) {
71 gfs2_log_unlock(sdp);
72 return;
73 }
74 sync_gen = sdp->sd_ail_sync_gen++;
75
76 first = head->prev;
77 first_ai = list_entry(first, struct gfs2_ail, ai_list);
78 first_ai->ai_sync_gen = sync_gen;
79 gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */
80
81 if (flags & DIO_ALL)
82 first = NULL;
83
84 while(!done) {
85 if (first && (head->prev != first ||
86 gfs2_ail1_empty_one(sdp, first_ai, 0)))
87 break;
88
89 done = 1;
90 list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) {
91 if (ai->ai_sync_gen >= sync_gen)
92 continue;
93 ai->ai_sync_gen = sync_gen;
94 gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */
95 done = 0;
96 break;
97 }
98 }
99
100 gfs2_log_unlock(sdp);
101}
102
103int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
104{
105 struct gfs2_ail *ai, *s;
106 int ret;
107
108 gfs2_log_lock(sdp);
109
110 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
111 if (gfs2_ail1_empty_one(sdp, ai, flags))
112 list_move(&ai->ai_list, &sdp->sd_ail2_list);
113 else if (!(flags & DIO_ALL))
114 break;
115 }
116
117 ret = list_empty(&sdp->sd_ail1_list);
118
119 gfs2_log_unlock(sdp);
120
121 return ret;
122}
123
124static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
125{
126 struct gfs2_ail *ai, *safe;
127 unsigned int old_tail = sdp->sd_log_tail;
128 int wrap = (new_tail < old_tail);
129 int a, b, rm;
130
131 gfs2_log_lock(sdp);
132
133 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
134 a = (old_tail <= ai->ai_first);
135 b = (ai->ai_first < new_tail);
136 rm = (wrap) ? (a || b) : (a && b);
137 if (!rm)
138 continue;
139
140 gfs2_ail2_empty_one(sdp, ai);
141 list_del(&ai->ai_list);
142 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
143 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
144 kfree(ai);
145 }
146
147 gfs2_log_unlock(sdp);
148}
149
150/**
151 * gfs2_log_reserve - Make a log reservation
152 * @sdp: The GFS2 superblock
153 * @blks: The number of blocks to reserve
154 *
155 * Returns: errno
156 */
157
158int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
159{
160 unsigned int try = 0;
161
162 if (gfs2_assert_warn(sdp, blks) ||
163 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
164 return -EINVAL;
165
166 mutex_lock(&sdp->sd_log_reserve_mutex);
167 gfs2_log_lock(sdp);
168 while(sdp->sd_log_blks_free <= blks) {
169 gfs2_log_unlock(sdp);
170 gfs2_ail1_empty(sdp, 0);
171 gfs2_log_flush(sdp, NULL);
172
173 if (try++)
174 gfs2_ail1_start(sdp, 0);
175 gfs2_log_lock(sdp);
176 }
177 sdp->sd_log_blks_free -= blks;
178 gfs2_log_unlock(sdp);
179 mutex_unlock(&sdp->sd_log_reserve_mutex);
180
181 down_read(&sdp->sd_log_flush_lock);
182
183 return 0;
184}
185
186/**
187 * gfs2_log_release - Release a given number of log blocks
188 * @sdp: The GFS2 superblock
189 * @blks: The number of blocks
190 *
191 */
192
193void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
194{
195
196 gfs2_log_lock(sdp);
197 sdp->sd_log_blks_free += blks;
198 gfs2_assert_withdraw(sdp,
199 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
200 gfs2_log_unlock(sdp);
201 up_read(&sdp->sd_log_flush_lock);
202}
203
204static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
205{
206 int error;
207 struct buffer_head bh_map;
208
209 error = gfs2_block_map(sdp->sd_jdesc->jd_inode, lbn, 0, &bh_map, 1);
210 if (error || !bh_map.b_blocknr)
211 printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn);
212 gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
213
214 return bh_map.b_blocknr;
215}
216
217/**
218 * log_distance - Compute distance between two journal blocks
219 * @sdp: The GFS2 superblock
220 * @newer: The most recent journal block of the pair
221 * @older: The older journal block of the pair
222 *
223 * Compute the distance (in the journal direction) between two
224 * blocks in the journal
225 *
226 * Returns: the distance in blocks
227 */
228
229static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
230 unsigned int older)
231{
232 int dist;
233
234 dist = newer - older;
235 if (dist < 0)
236 dist += sdp->sd_jdesc->jd_blocks;
237
238 return dist;
239}
240
241static unsigned int current_tail(struct gfs2_sbd *sdp)
242{
243 struct gfs2_ail *ai;
244 unsigned int tail;
245
246 gfs2_log_lock(sdp);
247
248 if (list_empty(&sdp->sd_ail1_list)) {
249 tail = sdp->sd_log_head;
250 } else {
251 ai = list_entry(sdp->sd_ail1_list.prev, struct gfs2_ail, ai_list);
252 tail = ai->ai_first;
253 }
254
255 gfs2_log_unlock(sdp);
256
257 return tail;
258}
259
260static inline void log_incr_head(struct gfs2_sbd *sdp)
261{
262 if (sdp->sd_log_flush_head == sdp->sd_log_tail)
263 gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head);
264
265 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
266 sdp->sd_log_flush_head = 0;
267 sdp->sd_log_flush_wrapped = 1;
268 }
269}
270
271/**
272 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
273 * @sdp: The GFS2 superblock
274 *
275 * Returns: the buffer_head
276 */
277
278struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
279{
280 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
281 struct gfs2_log_buf *lb;
282 struct buffer_head *bh;
283
284 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
285 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
286
287 bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
288 lock_buffer(bh);
289 memset(bh->b_data, 0, bh->b_size);
290 set_buffer_uptodate(bh);
291 clear_buffer_dirty(bh);
292 unlock_buffer(bh);
293
294 log_incr_head(sdp);
295
296 return bh;
297}
298
299/**
300 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
301 * @sdp: the filesystem
302 * @data: the data the buffer_head should point to
303 *
304 * Returns: the log buffer descriptor
305 */
306
307struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
308 struct buffer_head *real)
309{
310 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
311 struct gfs2_log_buf *lb;
312 struct buffer_head *bh;
313
314 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
315 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
316 lb->lb_real = real;
317
318 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
319 atomic_set(&bh->b_count, 1);
320 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
321 set_bh_page(bh, real->b_page, bh_offset(real));
322 bh->b_blocknr = blkno;
323 bh->b_size = sdp->sd_sb.sb_bsize;
324 bh->b_bdev = sdp->sd_vfs->s_bdev;
325
326 log_incr_head(sdp);
327
328 return bh;
329}
330
331static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
332{
333 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
334
335 ail2_empty(sdp, new_tail);
336
337 gfs2_log_lock(sdp);
338 sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
339 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
340 gfs2_log_unlock(sdp);
341
342 sdp->sd_log_tail = new_tail;
343}
344
345/**
346 * log_write_header - Get and initialize a journal header buffer
347 * @sdp: The GFS2 superblock
348 *
349 * Returns: the initialized log buffer descriptor
350 */
351
352static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
353{
354 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
355 struct buffer_head *bh;
356 struct gfs2_log_header *lh;
357 unsigned int tail;
358 u32 hash;
359
360 bh = sb_getblk(sdp->sd_vfs, blkno);
361 lock_buffer(bh);
362 memset(bh->b_data, 0, bh->b_size);
363 set_buffer_uptodate(bh);
364 clear_buffer_dirty(bh);
365 unlock_buffer(bh);
366
367 gfs2_ail1_empty(sdp, 0);
368 tail = current_tail(sdp);
369
370 lh = (struct gfs2_log_header *)bh->b_data;
371 memset(lh, 0, sizeof(struct gfs2_log_header));
372 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
373 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
374 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
375 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
376 lh->lh_flags = cpu_to_be32(flags);
377 lh->lh_tail = cpu_to_be32(tail);
378 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
379 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
380 lh->lh_hash = cpu_to_be32(hash);
381
382 set_buffer_dirty(bh);
383 if (sync_dirty_buffer(bh))
384 gfs2_io_error_bh(sdp, bh);
385 brelse(bh);
386
387 if (sdp->sd_log_tail != tail)
388 log_pull_tail(sdp, tail, pull);
389 else
390 gfs2_assert_withdraw(sdp, !pull);
391
392 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
393 log_incr_head(sdp);
394}
395
396static void log_flush_commit(struct gfs2_sbd *sdp)
397{
398 struct list_head *head = &sdp->sd_log_flush_list;
399 struct gfs2_log_buf *lb;
400 struct buffer_head *bh;
401
402 while (!list_empty(head)) {
403 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
404 list_del(&lb->lb_list);
405 bh = lb->lb_bh;
406
407 wait_on_buffer(bh);
408 if (!buffer_uptodate(bh))
409 gfs2_io_error_bh(sdp, bh);
410 if (lb->lb_real) {
411 while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
412 schedule();
413 free_buffer_head(bh);
414 } else
415 brelse(bh);
416 kfree(lb);
417 }
418
419 log_write_header(sdp, 0, 0);
420}
421
422/**
423 * gfs2_log_flush - flush incore transaction(s)
424 * @sdp: the filesystem
425 * @gl: The glock structure to flush. If NULL, flush the whole incore log
426 *
427 */
428
429void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
430{
431 struct gfs2_ail *ai;
432
433 down_write(&sdp->sd_log_flush_lock);
434
435 if (gl) {
436 gfs2_log_lock(sdp);
437 if (list_empty(&gl->gl_le.le_list)) {
438 gfs2_log_unlock(sdp);
439 up_write(&sdp->sd_log_flush_lock);
440 return;
441 }
442 gfs2_log_unlock(sdp);
443 }
444
445 ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
446 INIT_LIST_HEAD(&ai->ai_ail1_list);
447 INIT_LIST_HEAD(&ai->ai_ail2_list);
448
449 gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
450 gfs2_assert_withdraw(sdp,
451 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
452
453 sdp->sd_log_flush_head = sdp->sd_log_head;
454 sdp->sd_log_flush_wrapped = 0;
455 ai->ai_first = sdp->sd_log_flush_head;
456
457 lops_before_commit(sdp);
458 if (!list_empty(&sdp->sd_log_flush_list))
459 log_flush_commit(sdp);
460 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
461 log_write_header(sdp, 0, PULL);
462 lops_after_commit(sdp, ai);
463 sdp->sd_log_head = sdp->sd_log_flush_head;
464
465 sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
466
467 sdp->sd_log_blks_reserved = 0;
468 sdp->sd_log_commited_buf = 0;
469 sdp->sd_log_num_hdrs = 0;
470 sdp->sd_log_commited_revoke = 0;
471
472 gfs2_log_lock(sdp);
473 if (!list_empty(&ai->ai_ail1_list)) {
474 list_add(&ai->ai_list, &sdp->sd_ail1_list);
475 ai = NULL;
476 }
477 gfs2_log_unlock(sdp);
478
479 sdp->sd_vfs->s_dirt = 0;
480 up_write(&sdp->sd_log_flush_lock);
481
482 kfree(ai);
483}
484
485static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
486{
487 unsigned int reserved = 0;
488 unsigned int old;
489
490 gfs2_log_lock(sdp);
491
492 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
493 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
494 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
495 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
496
497 if (sdp->sd_log_commited_buf)
498 reserved += sdp->sd_log_commited_buf;
499 if (sdp->sd_log_commited_revoke)
500 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
501 sizeof(u64));
502 if (reserved)
503 reserved++;
504
505 old = sdp->sd_log_blks_free;
506 sdp->sd_log_blks_free += tr->tr_reserved -
507 (reserved - sdp->sd_log_blks_reserved);
508
509 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
510 gfs2_assert_withdraw(sdp,
511 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
512 sdp->sd_log_num_hdrs);
513
514 sdp->sd_log_blks_reserved = reserved;
515
516 gfs2_log_unlock(sdp);
517}
518
519/**
520 * gfs2_log_commit - Commit a transaction to the log
521 * @sdp: the filesystem
522 * @tr: the transaction
523 *
524 * Returns: errno
525 */
526
527void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
528{
529 log_refund(sdp, tr);
530 lops_incore_commit(sdp, tr);
531
532 sdp->sd_vfs->s_dirt = 1;
533 up_read(&sdp->sd_log_flush_lock);
534
535 gfs2_log_lock(sdp);
536 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
537 gfs2_log_unlock(sdp);
538 gfs2_log_flush(sdp, NULL);
539 } else {
540 gfs2_log_unlock(sdp);
541 }
542}
543
544/**
545 * gfs2_log_shutdown - write a shutdown header into a journal
546 * @sdp: the filesystem
547 *
548 */
549
550void gfs2_log_shutdown(struct gfs2_sbd *sdp)
551{
552 down_write(&sdp->sd_log_flush_lock);
553
554 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
555 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
556 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
557 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
558 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
559 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
560 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
561 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
562 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
563
564 sdp->sd_log_flush_head = sdp->sd_log_head;
565 sdp->sd_log_flush_wrapped = 0;
566
567 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
568
569 gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
570 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
571 gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
572
573 sdp->sd_log_head = sdp->sd_log_flush_head;
574 sdp->sd_log_tail = sdp->sd_log_head;
575
576 up_write(&sdp->sd_log_flush_lock);
577}
578
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
new file mode 100644
index 000000000000..7f5737d55612
--- /dev/null
+++ b/fs/gfs2/log.h
@@ -0,0 +1,65 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __LOG_DOT_H__
11#define __LOG_DOT_H__
12
13#include <linux/list.h>
14#include <linux/spinlock.h>
15#include "incore.h"
16
17/**
18 * gfs2_log_lock - acquire the right to mess with the log manager
19 * @sdp: the filesystem
20 *
21 */
22
23static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
24{
25 spin_lock(&sdp->sd_log_lock);
26}
27
28/**
29 * gfs2_log_unlock - release the right to mess with the log manager
30 * @sdp: the filesystem
31 *
32 */
33
34static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
35{
36 spin_unlock(&sdp->sd_log_lock);
37}
38
39static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
40 unsigned int value)
41{
42 if (++value == sdp->sd_jdesc->jd_blocks) {
43 value = 0;
44 }
45 sdp->sd_log_head = sdp->sd_log_tail = value;
46}
47
48unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
49 unsigned int ssize);
50
51void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
52int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
53
54int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
55void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
56
57struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
58struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59 struct buffer_head *real);
60void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
61void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62
63void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64
65#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
new file mode 100644
index 000000000000..881e337b6a70
--- /dev/null
+++ b/fs/gfs2/lops.c
@@ -0,0 +1,809 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "glock.h"
21#include "log.h"
22#include "lops.h"
23#include "meta_io.h"
24#include "recovery.h"
25#include "rgrp.h"
26#include "trans.h"
27#include "util.h"
28
29static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
30{
31 struct gfs2_glock *gl;
32 struct gfs2_trans *tr = current->journal_info;
33
34 tr->tr_touched = 1;
35
36 if (!list_empty(&le->le_list))
37 return;
38
39 gl = container_of(le, struct gfs2_glock, gl_le);
40 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
41 return;
42 gfs2_glock_hold(gl);
43 set_bit(GLF_DIRTY, &gl->gl_flags);
44
45 gfs2_log_lock(sdp);
46 sdp->sd_log_num_gl++;
47 list_add(&le->le_list, &sdp->sd_log_le_gl);
48 gfs2_log_unlock(sdp);
49}
50
51static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
52{
53 struct list_head *head = &sdp->sd_log_le_gl;
54 struct gfs2_glock *gl;
55
56 while (!list_empty(head)) {
57 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
58 list_del_init(&gl->gl_le.le_list);
59 sdp->sd_log_num_gl--;
60
61 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
62 gfs2_glock_put(gl);
63 }
64 gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
65}
66
67static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
68{
69 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
70 struct gfs2_trans *tr;
71
72 if (!list_empty(&bd->bd_list_tr))
73 return;
74
75 tr = current->journal_info;
76 tr->tr_touched = 1;
77 tr->tr_num_buf++;
78 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
79
80 if (!list_empty(&le->le_list))
81 return;
82
83 gfs2_trans_add_gl(bd->bd_gl);
84
85 gfs2_meta_check(sdp, bd->bd_bh);
86 gfs2_pin(sdp, bd->bd_bh);
87
88 gfs2_log_lock(sdp);
89 sdp->sd_log_num_buf++;
90 list_add(&le->le_list, &sdp->sd_log_le_buf);
91 gfs2_log_unlock(sdp);
92
93 tr->tr_num_buf_new++;
94}
95
96static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
97{
98 struct list_head *head = &tr->tr_list_buf;
99 struct gfs2_bufdata *bd;
100
101 while (!list_empty(head)) {
102 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
103 list_del_init(&bd->bd_list_tr);
104 tr->tr_num_buf--;
105 }
106 gfs2_assert_warn(sdp, !tr->tr_num_buf);
107}
108
109static void buf_lo_before_commit(struct gfs2_sbd *sdp)
110{
111 struct buffer_head *bh;
112 struct gfs2_log_descriptor *ld;
113 struct gfs2_bufdata *bd1 = NULL, *bd2;
114 unsigned int total = sdp->sd_log_num_buf;
115 unsigned int offset = sizeof(struct gfs2_log_descriptor);
116 unsigned int limit;
117 unsigned int num;
118 unsigned n;
119 __be64 *ptr;
120
121 offset += sizeof(__be64) - 1;
122 offset &= ~(sizeof(__be64) - 1);
123 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
124 /* for 4k blocks, limit = 503 */
125
126 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
127 while(total) {
128 num = total;
129 if (total > limit)
130 num = limit;
131 bh = gfs2_log_get_buf(sdp);
132 sdp->sd_log_num_hdrs++;
133 ld = (struct gfs2_log_descriptor *)bh->b_data;
134 ptr = (__be64 *)(bh->b_data + offset);
135 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
136 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
137 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
138 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
139 ld->ld_length = cpu_to_be32(num + 1);
140 ld->ld_data1 = cpu_to_be32(num);
141 ld->ld_data2 = cpu_to_be32(0);
142 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
143
144 n = 0;
145 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
146 bd_le.le_list) {
147 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
148 if (++n >= num)
149 break;
150 }
151
152 set_buffer_dirty(bh);
153 ll_rw_block(WRITE, 1, &bh);
154
155 n = 0;
156 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
157 bd_le.le_list) {
158 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
159 set_buffer_dirty(bh);
160 ll_rw_block(WRITE, 1, &bh);
161 if (++n >= num)
162 break;
163 }
164
165 total -= num;
166 }
167}
168
169static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
170{
171 struct list_head *head = &sdp->sd_log_le_buf;
172 struct gfs2_bufdata *bd;
173
174 while (!list_empty(head)) {
175 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
176 list_del_init(&bd->bd_le.le_list);
177 sdp->sd_log_num_buf--;
178
179 gfs2_unpin(sdp, bd->bd_bh, ai);
180 }
181 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
182}
183
184static void buf_lo_before_scan(struct gfs2_jdesc *jd,
185 struct gfs2_log_header *head, int pass)
186{
187 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
188
189 if (pass != 0)
190 return;
191
192 sdp->sd_found_blocks = 0;
193 sdp->sd_replayed_blocks = 0;
194}
195
196static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
197 struct gfs2_log_descriptor *ld, __be64 *ptr,
198 int pass)
199{
200 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
201 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
202 struct gfs2_glock *gl = ip->i_gl;
203 unsigned int blks = be32_to_cpu(ld->ld_data1);
204 struct buffer_head *bh_log, *bh_ip;
205 u64 blkno;
206 int error = 0;
207
208 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
209 return 0;
210
211 gfs2_replay_incr_blk(sdp, &start);
212
213 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
214 blkno = be64_to_cpu(*ptr++);
215
216 sdp->sd_found_blocks++;
217
218 if (gfs2_revoke_check(sdp, blkno, start))
219 continue;
220
221 error = gfs2_replay_read_block(jd, start, &bh_log);
222 if (error)
223 return error;
224
225 bh_ip = gfs2_meta_new(gl, blkno);
226 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
227
228 if (gfs2_meta_check(sdp, bh_ip))
229 error = -EIO;
230 else
231 mark_buffer_dirty(bh_ip);
232
233 brelse(bh_log);
234 brelse(bh_ip);
235
236 if (error)
237 break;
238
239 sdp->sd_replayed_blocks++;
240 }
241
242 return error;
243}
244
245static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
246{
247 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
248 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
249
250 if (error) {
251 gfs2_meta_sync(ip->i_gl);
252 return;
253 }
254 if (pass != 1)
255 return;
256
257 gfs2_meta_sync(ip->i_gl);
258
259 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
260 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
261}
262
263static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
264{
265 struct gfs2_trans *tr;
266
267 tr = current->journal_info;
268 tr->tr_touched = 1;
269 tr->tr_num_revoke++;
270
271 gfs2_log_lock(sdp);
272 sdp->sd_log_num_revoke++;
273 list_add(&le->le_list, &sdp->sd_log_le_revoke);
274 gfs2_log_unlock(sdp);
275}
276
277static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
278{
279 struct gfs2_log_descriptor *ld;
280 struct gfs2_meta_header *mh;
281 struct buffer_head *bh;
282 unsigned int offset;
283 struct list_head *head = &sdp->sd_log_le_revoke;
284 struct gfs2_revoke *rv;
285
286 if (!sdp->sd_log_num_revoke)
287 return;
288
289 bh = gfs2_log_get_buf(sdp);
290 ld = (struct gfs2_log_descriptor *)bh->b_data;
291 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
292 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
293 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
294 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
295 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
296 sizeof(u64)));
297 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
298 ld->ld_data2 = cpu_to_be32(0);
299 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
300 offset = sizeof(struct gfs2_log_descriptor);
301
302 while (!list_empty(head)) {
303 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
304 list_del_init(&rv->rv_le.le_list);
305 sdp->sd_log_num_revoke--;
306
307 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
308 set_buffer_dirty(bh);
309 ll_rw_block(WRITE, 1, &bh);
310
311 bh = gfs2_log_get_buf(sdp);
312 mh = (struct gfs2_meta_header *)bh->b_data;
313 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
314 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
315 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
316 offset = sizeof(struct gfs2_meta_header);
317 }
318
319 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
320 kfree(rv);
321
322 offset += sizeof(u64);
323 }
324 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
325
326 set_buffer_dirty(bh);
327 ll_rw_block(WRITE, 1, &bh);
328}
329
330static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
331 struct gfs2_log_header *head, int pass)
332{
333 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
334
335 if (pass != 0)
336 return;
337
338 sdp->sd_found_revokes = 0;
339 sdp->sd_replay_tail = head->lh_tail;
340}
341
342static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
343 struct gfs2_log_descriptor *ld, __be64 *ptr,
344 int pass)
345{
346 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
347 unsigned int blks = be32_to_cpu(ld->ld_length);
348 unsigned int revokes = be32_to_cpu(ld->ld_data1);
349 struct buffer_head *bh;
350 unsigned int offset;
351 u64 blkno;
352 int first = 1;
353 int error;
354
355 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
356 return 0;
357
358 offset = sizeof(struct gfs2_log_descriptor);
359
360 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
361 error = gfs2_replay_read_block(jd, start, &bh);
362 if (error)
363 return error;
364
365 if (!first)
366 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
367
368 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
369 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
370
371 error = gfs2_revoke_add(sdp, blkno, start);
372 if (error < 0)
373 return error;
374 else if (error)
375 sdp->sd_found_revokes++;
376
377 if (!--revokes)
378 break;
379 offset += sizeof(u64);
380 }
381
382 brelse(bh);
383 offset = sizeof(struct gfs2_meta_header);
384 first = 0;
385 }
386
387 return 0;
388}
389
390static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
391{
392 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
393
394 if (error) {
395 gfs2_revoke_clean(sdp);
396 return;
397 }
398 if (pass != 1)
399 return;
400
401 fs_info(sdp, "jid=%u: Found %u revoke tags\n",
402 jd->jd_jid, sdp->sd_found_revokes);
403
404 gfs2_revoke_clean(sdp);
405}
406
407static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
408{
409 struct gfs2_rgrpd *rgd;
410 struct gfs2_trans *tr = current->journal_info;
411
412 tr->tr_touched = 1;
413
414 if (!list_empty(&le->le_list))
415 return;
416
417 rgd = container_of(le, struct gfs2_rgrpd, rd_le);
418 gfs2_rgrp_bh_hold(rgd);
419
420 gfs2_log_lock(sdp);
421 sdp->sd_log_num_rg++;
422 list_add(&le->le_list, &sdp->sd_log_le_rg);
423 gfs2_log_unlock(sdp);
424}
425
426static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
427{
428 struct list_head *head = &sdp->sd_log_le_rg;
429 struct gfs2_rgrpd *rgd;
430
431 while (!list_empty(head)) {
432 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
433 list_del_init(&rgd->rd_le.le_list);
434 sdp->sd_log_num_rg--;
435
436 gfs2_rgrp_repolish_clones(rgd);
437 gfs2_rgrp_bh_put(rgd);
438 }
439 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
440}
441
442/**
443 * databuf_lo_add - Add a databuf to the transaction.
444 *
445 * This is used in two distinct cases:
446 * i) In ordered write mode
447 * We put the data buffer on a list so that we can ensure that its
448 * synced to disk at the right time
449 * ii) In journaled data mode
450 * We need to journal the data block in the same way as metadata in
451 * the functions above. The difference is that here we have a tag
452 * which is two __be64's being the block number (as per meta data)
453 * and a flag which says whether the data block needs escaping or
454 * not. This means we need a new log entry for each 251 or so data
455 * blocks, which isn't an enormous overhead but twice as much as
456 * for normal metadata blocks.
457 */
458static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
459{
460 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
461 struct gfs2_trans *tr = current->journal_info;
462 struct address_space *mapping = bd->bd_bh->b_page->mapping;
463 struct gfs2_inode *ip = GFS2_I(mapping->host);
464
465 tr->tr_touched = 1;
466 if (list_empty(&bd->bd_list_tr) &&
467 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
468 tr->tr_num_buf++;
469 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
470 gfs2_pin(sdp, bd->bd_bh);
471 tr->tr_num_buf_new++;
472 }
473 gfs2_trans_add_gl(bd->bd_gl);
474 gfs2_log_lock(sdp);
475 if (list_empty(&le->le_list)) {
476 if (ip->i_di.di_flags & GFS2_DIF_JDATA)
477 sdp->sd_log_num_jdata++;
478 sdp->sd_log_num_databuf++;
479 list_add(&le->le_list, &sdp->sd_log_le_databuf);
480 }
481 gfs2_log_unlock(sdp);
482}
483
484static int gfs2_check_magic(struct buffer_head *bh)
485{
486 struct page *page = bh->b_page;
487 void *kaddr;
488 __be32 *ptr;
489 int rv = 0;
490
491 kaddr = kmap_atomic(page, KM_USER0);
492 ptr = kaddr + bh_offset(bh);
493 if (*ptr == cpu_to_be32(GFS2_MAGIC))
494 rv = 1;
495 kunmap_atomic(page, KM_USER0);
496
497 return rv;
498}
499
500/**
501 * databuf_lo_before_commit - Scan the data buffers, writing as we go
502 *
503 * Here we scan through the lists of buffers and make the assumption
504 * that any buffer thats been pinned is being journaled, and that
505 * any unpinned buffer is an ordered write data buffer and therefore
506 * will be written back rather than journaled.
507 */
508static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
509{
510 LIST_HEAD(started);
511 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
512 struct buffer_head *bh = NULL;
513 unsigned int offset = sizeof(struct gfs2_log_descriptor);
514 struct gfs2_log_descriptor *ld;
515 unsigned int limit;
516 unsigned int total_dbuf = sdp->sd_log_num_databuf;
517 unsigned int total_jdata = sdp->sd_log_num_jdata;
518 unsigned int num, n;
519 __be64 *ptr = NULL;
520
521 offset += 2*sizeof(__be64) - 1;
522 offset &= ~(2*sizeof(__be64) - 1);
523 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
524
525 /*
526 * Start writing ordered buffers, write journaled buffers
527 * into the log along with a header
528 */
529 gfs2_log_lock(sdp);
530 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
531 bd_le.le_list);
532 while(total_dbuf) {
533 num = total_jdata;
534 if (num > limit)
535 num = limit;
536 n = 0;
537 list_for_each_entry_safe_continue(bd1, bdt,
538 &sdp->sd_log_le_databuf,
539 bd_le.le_list) {
540 /* An ordered write buffer */
541 if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
542 list_move(&bd1->bd_le.le_list, &started);
543 if (bd1 == bd2) {
544 bd2 = NULL;
545 bd2 = list_prepare_entry(bd2,
546 &sdp->sd_log_le_databuf,
547 bd_le.le_list);
548 }
549 total_dbuf--;
550 if (bd1->bd_bh) {
551 get_bh(bd1->bd_bh);
552 if (buffer_dirty(bd1->bd_bh)) {
553 gfs2_log_unlock(sdp);
554 wait_on_buffer(bd1->bd_bh);
555 ll_rw_block(WRITE, 1,
556 &bd1->bd_bh);
557 gfs2_log_lock(sdp);
558 }
559 brelse(bd1->bd_bh);
560 continue;
561 }
562 continue;
563 } else if (bd1->bd_bh) { /* A journaled buffer */
564 int magic;
565 gfs2_log_unlock(sdp);
566 if (!bh) {
567 bh = gfs2_log_get_buf(sdp);
568 sdp->sd_log_num_hdrs++;
569 ld = (struct gfs2_log_descriptor *)
570 bh->b_data;
571 ptr = (__be64 *)(bh->b_data + offset);
572 ld->ld_header.mh_magic =
573 cpu_to_be32(GFS2_MAGIC);
574 ld->ld_header.mh_type =
575 cpu_to_be32(GFS2_METATYPE_LD);
576 ld->ld_header.mh_format =
577 cpu_to_be32(GFS2_FORMAT_LD);
578 ld->ld_type =
579 cpu_to_be32(GFS2_LOG_DESC_JDATA);
580 ld->ld_length = cpu_to_be32(num + 1);
581 ld->ld_data1 = cpu_to_be32(num);
582 ld->ld_data2 = cpu_to_be32(0);
583 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
584 }
585 magic = gfs2_check_magic(bd1->bd_bh);
586 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
587 *ptr++ = cpu_to_be64((__u64)magic);
588 clear_buffer_escaped(bd1->bd_bh);
589 if (unlikely(magic != 0))
590 set_buffer_escaped(bd1->bd_bh);
591 gfs2_log_lock(sdp);
592 if (n++ > num)
593 break;
594 } else if (!bd1->bd_bh) {
595 total_dbuf--;
596 sdp->sd_log_num_databuf--;
597 list_del_init(&bd1->bd_le.le_list);
598 if (bd1 == bd2) {
599 bd2 = NULL;
600 bd2 = list_prepare_entry(bd2,
601 &sdp->sd_log_le_databuf,
602 bd_le.le_list);
603 }
604 kmem_cache_free(gfs2_bufdata_cachep, bd1);
605 }
606 }
607 gfs2_log_unlock(sdp);
608 if (bh) {
609 set_buffer_dirty(bh);
610 ll_rw_block(WRITE, 1, &bh);
611 bh = NULL;
612 }
613 n = 0;
614 gfs2_log_lock(sdp);
615 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
616 bd_le.le_list) {
617 if (!bd2->bd_bh)
618 continue;
619 /* copy buffer if it needs escaping */
620 gfs2_log_unlock(sdp);
621 if (unlikely(buffer_escaped(bd2->bd_bh))) {
622 void *kaddr;
623 struct page *page = bd2->bd_bh->b_page;
624 bh = gfs2_log_get_buf(sdp);
625 kaddr = kmap_atomic(page, KM_USER0);
626 memcpy(bh->b_data,
627 kaddr + bh_offset(bd2->bd_bh),
628 sdp->sd_sb.sb_bsize);
629 kunmap_atomic(page, KM_USER0);
630 *(__be32 *)bh->b_data = 0;
631 } else {
632 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
633 }
634 set_buffer_dirty(bh);
635 ll_rw_block(WRITE, 1, &bh);
636 gfs2_log_lock(sdp);
637 if (++n >= num)
638 break;
639 }
640 bh = NULL;
641 total_dbuf -= num;
642 total_jdata -= num;
643 }
644 gfs2_log_unlock(sdp);
645
646 /* Wait on all ordered buffers */
647 while (!list_empty(&started)) {
648 gfs2_log_lock(sdp);
649 bd1 = list_entry(started.next, struct gfs2_bufdata,
650 bd_le.le_list);
651 list_del_init(&bd1->bd_le.le_list);
652 sdp->sd_log_num_databuf--;
653 bh = bd1->bd_bh;
654 if (bh) {
655 bh->b_private = NULL;
656 get_bh(bh);
657 gfs2_log_unlock(sdp);
658 wait_on_buffer(bh);
659 brelse(bh);
660 } else
661 gfs2_log_unlock(sdp);
662
663 kmem_cache_free(gfs2_bufdata_cachep, bd1);
664 }
665
666 /* We've removed all the ordered write bufs here, so only jdata left */
667 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
668}
669
670static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
671 struct gfs2_log_descriptor *ld,
672 __be64 *ptr, int pass)
673{
674 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
675 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
676 struct gfs2_glock *gl = ip->i_gl;
677 unsigned int blks = be32_to_cpu(ld->ld_data1);
678 struct buffer_head *bh_log, *bh_ip;
679 u64 blkno;
680 u64 esc;
681 int error = 0;
682
683 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
684 return 0;
685
686 gfs2_replay_incr_blk(sdp, &start);
687 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
688 blkno = be64_to_cpu(*ptr++);
689 esc = be64_to_cpu(*ptr++);
690
691 sdp->sd_found_blocks++;
692
693 if (gfs2_revoke_check(sdp, blkno, start))
694 continue;
695
696 error = gfs2_replay_read_block(jd, start, &bh_log);
697 if (error)
698 return error;
699
700 bh_ip = gfs2_meta_new(gl, blkno);
701 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
702
703 /* Unescape */
704 if (esc) {
705 __be32 *eptr = (__be32 *)bh_ip->b_data;
706 *eptr = cpu_to_be32(GFS2_MAGIC);
707 }
708 mark_buffer_dirty(bh_ip);
709
710 brelse(bh_log);
711 brelse(bh_ip);
712 if (error)
713 break;
714
715 sdp->sd_replayed_blocks++;
716 }
717
718 return error;
719}
720
721/* FIXME: sort out accounting for log blocks etc. */
722
723static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
724{
725 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
726 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
727
728 if (error) {
729 gfs2_meta_sync(ip->i_gl);
730 return;
731 }
732 if (pass != 1)
733 return;
734
735 /* data sync? */
736 gfs2_meta_sync(ip->i_gl);
737
738 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
739 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
740}
741
742static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
743{
744 struct list_head *head = &sdp->sd_log_le_databuf;
745 struct gfs2_bufdata *bd;
746
747 while (!list_empty(head)) {
748 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
749 list_del_init(&bd->bd_le.le_list);
750 sdp->sd_log_num_databuf--;
751 sdp->sd_log_num_jdata--;
752 gfs2_unpin(sdp, bd->bd_bh, ai);
753 }
754 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
755 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
756}
757
758
759const struct gfs2_log_operations gfs2_glock_lops = {
760 .lo_add = glock_lo_add,
761 .lo_after_commit = glock_lo_after_commit,
762 .lo_name = "glock",
763};
764
765const struct gfs2_log_operations gfs2_buf_lops = {
766 .lo_add = buf_lo_add,
767 .lo_incore_commit = buf_lo_incore_commit,
768 .lo_before_commit = buf_lo_before_commit,
769 .lo_after_commit = buf_lo_after_commit,
770 .lo_before_scan = buf_lo_before_scan,
771 .lo_scan_elements = buf_lo_scan_elements,
772 .lo_after_scan = buf_lo_after_scan,
773 .lo_name = "buf",
774};
775
776const struct gfs2_log_operations gfs2_revoke_lops = {
777 .lo_add = revoke_lo_add,
778 .lo_before_commit = revoke_lo_before_commit,
779 .lo_before_scan = revoke_lo_before_scan,
780 .lo_scan_elements = revoke_lo_scan_elements,
781 .lo_after_scan = revoke_lo_after_scan,
782 .lo_name = "revoke",
783};
784
785const struct gfs2_log_operations gfs2_rg_lops = {
786 .lo_add = rg_lo_add,
787 .lo_after_commit = rg_lo_after_commit,
788 .lo_name = "rg",
789};
790
791const struct gfs2_log_operations gfs2_databuf_lops = {
792 .lo_add = databuf_lo_add,
793 .lo_incore_commit = buf_lo_incore_commit,
794 .lo_before_commit = databuf_lo_before_commit,
795 .lo_after_commit = databuf_lo_after_commit,
796 .lo_scan_elements = databuf_lo_scan_elements,
797 .lo_after_scan = databuf_lo_after_scan,
798 .lo_name = "databuf",
799};
800
801const struct gfs2_log_operations *gfs2_log_ops[] = {
802 &gfs2_glock_lops,
803 &gfs2_buf_lops,
804 &gfs2_revoke_lops,
805 &gfs2_rg_lops,
806 &gfs2_databuf_lops,
807 NULL,
808};
809
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
new file mode 100644
index 000000000000..5839c05ae6be
--- /dev/null
+++ b/fs/gfs2/lops.h
@@ -0,0 +1,99 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __LOPS_DOT_H__
11#define __LOPS_DOT_H__
12
13#include <linux/list.h>
14#include "incore.h"
15
16extern const struct gfs2_log_operations gfs2_glock_lops;
17extern const struct gfs2_log_operations gfs2_buf_lops;
18extern const struct gfs2_log_operations gfs2_revoke_lops;
19extern const struct gfs2_log_operations gfs2_rg_lops;
20extern const struct gfs2_log_operations gfs2_databuf_lops;
21
22extern const struct gfs2_log_operations *gfs2_log_ops[];
23
24static inline void lops_init_le(struct gfs2_log_element *le,
25 const struct gfs2_log_operations *lops)
26{
27 INIT_LIST_HEAD(&le->le_list);
28 le->le_ops = lops;
29}
30
31static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
32{
33 if (le->le_ops->lo_add)
34 le->le_ops->lo_add(sdp, le);
35}
36
37static inline void lops_incore_commit(struct gfs2_sbd *sdp,
38 struct gfs2_trans *tr)
39{
40 int x;
41 for (x = 0; gfs2_log_ops[x]; x++)
42 if (gfs2_log_ops[x]->lo_incore_commit)
43 gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
44}
45
46static inline void lops_before_commit(struct gfs2_sbd *sdp)
47{
48 int x;
49 for (x = 0; gfs2_log_ops[x]; x++)
50 if (gfs2_log_ops[x]->lo_before_commit)
51 gfs2_log_ops[x]->lo_before_commit(sdp);
52}
53
54static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
55{
56 int x;
57 for (x = 0; gfs2_log_ops[x]; x++)
58 if (gfs2_log_ops[x]->lo_after_commit)
59 gfs2_log_ops[x]->lo_after_commit(sdp, ai);
60}
61
62static inline void lops_before_scan(struct gfs2_jdesc *jd,
63 struct gfs2_log_header *head,
64 unsigned int pass)
65{
66 int x;
67 for (x = 0; gfs2_log_ops[x]; x++)
68 if (gfs2_log_ops[x]->lo_before_scan)
69 gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
70}
71
72static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
73 struct gfs2_log_descriptor *ld,
74 __be64 *ptr,
75 unsigned int pass)
76{
77 int x, error;
78 for (x = 0; gfs2_log_ops[x]; x++)
79 if (gfs2_log_ops[x]->lo_scan_elements) {
80 error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
81 ld, ptr, pass);
82 if (error)
83 return error;
84 }
85
86 return 0;
87}
88
89static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
90 unsigned int pass)
91{
92 int x;
93 for (x = 0; gfs2_log_ops[x]; x++)
94 if (gfs2_log_ops[x]->lo_before_scan)
95 gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
96}
97
98#endif /* __LOPS_DOT_H__ */
99
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
new file mode 100644
index 000000000000..21508a13bb78
--- /dev/null
+++ b/fs/gfs2/main.c
@@ -0,0 +1,150 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19#include <asm/atomic.h>
20
21#include "gfs2.h"
22#include "incore.h"
23#include "ops_fstype.h"
24#include "sys.h"
25#include "util.h"
26#include "glock.h"
27
28static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
29{
30 struct gfs2_inode *ip = foo;
31 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
32 SLAB_CTOR_CONSTRUCTOR) {
33 inode_init_once(&ip->i_inode);
34 spin_lock_init(&ip->i_spin);
35 init_rwsem(&ip->i_rw_mutex);
36 memset(ip->i_cache, 0, sizeof(ip->i_cache));
37 }
38}
39
40static void gfs2_init_glock_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
41{
42 struct gfs2_glock *gl = foo;
43 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
44 SLAB_CTOR_CONSTRUCTOR) {
45 INIT_HLIST_NODE(&gl->gl_list);
46 spin_lock_init(&gl->gl_spin);
47 INIT_LIST_HEAD(&gl->gl_holders);
48 INIT_LIST_HEAD(&gl->gl_waiters1);
49 INIT_LIST_HEAD(&gl->gl_waiters2);
50 INIT_LIST_HEAD(&gl->gl_waiters3);
51 gl->gl_lvb = NULL;
52 atomic_set(&gl->gl_lvb_count, 0);
53 INIT_LIST_HEAD(&gl->gl_reclaim);
54 INIT_LIST_HEAD(&gl->gl_ail_list);
55 atomic_set(&gl->gl_ail_count, 0);
56 }
57}
58
59/**
60 * init_gfs2_fs - Register GFS2 as a filesystem
61 *
62 * Returns: 0 on success, error code on failure
63 */
64
65static int __init init_gfs2_fs(void)
66{
67 int error;
68
69 error = gfs2_sys_init();
70 if (error)
71 return error;
72
73 error = gfs2_glock_init();
74 if (error)
75 goto fail;
76
77 error = -ENOMEM;
78 gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
79 sizeof(struct gfs2_glock),
80 0, 0,
81 gfs2_init_glock_once, NULL);
82 if (!gfs2_glock_cachep)
83 goto fail;
84
85 gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
86 sizeof(struct gfs2_inode),
87 0, (SLAB_RECLAIM_ACCOUNT|
88 SLAB_PANIC|SLAB_MEM_SPREAD),
89 gfs2_init_inode_once, NULL);
90 if (!gfs2_inode_cachep)
91 goto fail;
92
93 gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
94 sizeof(struct gfs2_bufdata),
95 0, 0, NULL, NULL);
96 if (!gfs2_bufdata_cachep)
97 goto fail;
98
99 error = register_filesystem(&gfs2_fs_type);
100 if (error)
101 goto fail;
102
103 error = register_filesystem(&gfs2meta_fs_type);
104 if (error)
105 goto fail_unregister;
106
107 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
108
109 return 0;
110
111fail_unregister:
112 unregister_filesystem(&gfs2_fs_type);
113fail:
114 if (gfs2_bufdata_cachep)
115 kmem_cache_destroy(gfs2_bufdata_cachep);
116
117 if (gfs2_inode_cachep)
118 kmem_cache_destroy(gfs2_inode_cachep);
119
120 if (gfs2_glock_cachep)
121 kmem_cache_destroy(gfs2_glock_cachep);
122
123 gfs2_sys_uninit();
124 return error;
125}
126
127/**
128 * exit_gfs2_fs - Unregister the file system
129 *
130 */
131
132static void __exit exit_gfs2_fs(void)
133{
134 unregister_filesystem(&gfs2_fs_type);
135 unregister_filesystem(&gfs2meta_fs_type);
136
137 kmem_cache_destroy(gfs2_bufdata_cachep);
138 kmem_cache_destroy(gfs2_inode_cachep);
139 kmem_cache_destroy(gfs2_glock_cachep);
140
141 gfs2_sys_uninit();
142}
143
144MODULE_DESCRIPTION("Global File System");
145MODULE_AUTHOR("Red Hat, Inc.");
146MODULE_LICENSE("GPL");
147
148module_init(init_gfs2_fs);
149module_exit(exit_gfs2_fs);
150
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
new file mode 100644
index 000000000000..731799153187
--- /dev/null
+++ b/fs/gfs2/meta_io.c
@@ -0,0 +1,753 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/writeback.h>
18#include <linux/swap.h>
19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/lm_interface.h>
22
23#include "gfs2.h"
24#include "incore.h"
25#include "glock.h"
26#include "glops.h"
27#include "inode.h"
28#include "log.h"
29#include "lops.h"
30#include "meta_io.h"
31#include "rgrp.h"
32#include "trans.h"
33#include "util.h"
34#include "ops_address.h"
35
36#define buffer_busy(bh) \
37((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
38#define buffer_in_io(bh) \
39((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
40
41static int aspace_get_block(struct inode *inode, sector_t lblock,
42 struct buffer_head *bh_result, int create)
43{
44 gfs2_assert_warn(inode->i_sb->s_fs_info, 0);
45 return -EOPNOTSUPP;
46}
47
48static int gfs2_aspace_writepage(struct page *page,
49 struct writeback_control *wbc)
50{
51 return block_write_full_page(page, aspace_get_block, wbc);
52}
53
54static const struct address_space_operations aspace_aops = {
55 .writepage = gfs2_aspace_writepage,
56 .releasepage = gfs2_releasepage,
57};
58
59/**
60 * gfs2_aspace_get - Create and initialize a struct inode structure
61 * @sdp: the filesystem the aspace is in
62 *
63 * Right now a struct inode is just a struct inode. Maybe Linux
64 * will supply a more lightweight address space construct (that works)
65 * in the future.
66 *
67 * Make sure pages/buffers in this aspace aren't in high memory.
68 *
69 * Returns: the aspace
70 */
71
72struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
73{
74 struct inode *aspace;
75
76 aspace = new_inode(sdp->sd_vfs);
77 if (aspace) {
78 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
79 aspace->i_mapping->a_ops = &aspace_aops;
80 aspace->i_size = ~0ULL;
81 aspace->i_private = NULL;
82 insert_inode_hash(aspace);
83 }
84 return aspace;
85}
86
87void gfs2_aspace_put(struct inode *aspace)
88{
89 remove_inode_hash(aspace);
90 iput(aspace);
91}
92
93/**
94 * gfs2_ail1_start_one - Start I/O on a part of the AIL
95 * @sdp: the filesystem
96 * @tr: the part of the AIL
97 *
98 */
99
100void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
101{
102 struct gfs2_bufdata *bd, *s;
103 struct buffer_head *bh;
104 int retry;
105
106 BUG_ON(!spin_is_locked(&sdp->sd_log_lock));
107
108 do {
109 retry = 0;
110
111 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
112 bd_ail_st_list) {
113 bh = bd->bd_bh;
114
115 gfs2_assert(sdp, bd->bd_ail == ai);
116
117 if (!buffer_busy(bh)) {
118 if (!buffer_uptodate(bh)) {
119 gfs2_log_unlock(sdp);
120 gfs2_io_error_bh(sdp, bh);
121 gfs2_log_lock(sdp);
122 }
123 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
124 continue;
125 }
126
127 if (!buffer_dirty(bh))
128 continue;
129
130 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
131
132 gfs2_log_unlock(sdp);
133 wait_on_buffer(bh);
134 ll_rw_block(WRITE, 1, &bh);
135 gfs2_log_lock(sdp);
136
137 retry = 1;
138 break;
139 }
140 } while (retry);
141}
142
143/**
144 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
145 * @sdp: the filesystem
146 * @ai: the AIL entry
147 *
148 */
149
150int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
151{
152 struct gfs2_bufdata *bd, *s;
153 struct buffer_head *bh;
154
155 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
156 bd_ail_st_list) {
157 bh = bd->bd_bh;
158
159 gfs2_assert(sdp, bd->bd_ail == ai);
160
161 if (buffer_busy(bh)) {
162 if (flags & DIO_ALL)
163 continue;
164 else
165 break;
166 }
167
168 if (!buffer_uptodate(bh))
169 gfs2_io_error_bh(sdp, bh);
170
171 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
172 }
173
174 return list_empty(&ai->ai_ail1_list);
175}
176
177/**
178 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
179 * @sdp: the filesystem
180 * @ai: the AIL entry
181 *
182 */
183
184void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
185{
186 struct list_head *head = &ai->ai_ail2_list;
187 struct gfs2_bufdata *bd;
188
189 while (!list_empty(head)) {
190 bd = list_entry(head->prev, struct gfs2_bufdata,
191 bd_ail_st_list);
192 gfs2_assert(sdp, bd->bd_ail == ai);
193 bd->bd_ail = NULL;
194 list_del(&bd->bd_ail_st_list);
195 list_del(&bd->bd_ail_gl_list);
196 atomic_dec(&bd->bd_gl->gl_ail_count);
197 brelse(bd->bd_bh);
198 }
199}
200
201/**
202 * ail_empty_gl - remove all buffers for a given lock from the AIL
203 * @gl: the glock
204 *
205 * None of the buffers should be dirty, locked, or pinned.
206 */
207
208void gfs2_ail_empty_gl(struct gfs2_glock *gl)
209{
210 struct gfs2_sbd *sdp = gl->gl_sbd;
211 unsigned int blocks;
212 struct list_head *head = &gl->gl_ail_list;
213 struct gfs2_bufdata *bd;
214 struct buffer_head *bh;
215 u64 blkno;
216 int error;
217
218 blocks = atomic_read(&gl->gl_ail_count);
219 if (!blocks)
220 return;
221
222 error = gfs2_trans_begin(sdp, 0, blocks);
223 if (gfs2_assert_withdraw(sdp, !error))
224 return;
225
226 gfs2_log_lock(sdp);
227 while (!list_empty(head)) {
228 bd = list_entry(head->next, struct gfs2_bufdata,
229 bd_ail_gl_list);
230 bh = bd->bd_bh;
231 blkno = bh->b_blocknr;
232 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
233
234 bd->bd_ail = NULL;
235 list_del(&bd->bd_ail_st_list);
236 list_del(&bd->bd_ail_gl_list);
237 atomic_dec(&gl->gl_ail_count);
238 brelse(bh);
239 gfs2_log_unlock(sdp);
240
241 gfs2_trans_add_revoke(sdp, blkno);
242
243 gfs2_log_lock(sdp);
244 }
245 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
246 gfs2_log_unlock(sdp);
247
248 gfs2_trans_end(sdp);
249 gfs2_log_flush(sdp, NULL);
250}
251
252/**
253 * gfs2_meta_inval - Invalidate all buffers associated with a glock
254 * @gl: the glock
255 *
256 */
257
258void gfs2_meta_inval(struct gfs2_glock *gl)
259{
260 struct gfs2_sbd *sdp = gl->gl_sbd;
261 struct inode *aspace = gl->gl_aspace;
262 struct address_space *mapping = gl->gl_aspace->i_mapping;
263
264 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
265
266 atomic_inc(&aspace->i_writecount);
267 truncate_inode_pages(mapping, 0);
268 atomic_dec(&aspace->i_writecount);
269
270 gfs2_assert_withdraw(sdp, !mapping->nrpages);
271}
272
273/**
274 * gfs2_meta_sync - Sync all buffers associated with a glock
275 * @gl: The glock
276 *
277 */
278
279void gfs2_meta_sync(struct gfs2_glock *gl)
280{
281 struct address_space *mapping = gl->gl_aspace->i_mapping;
282 int error;
283
284 filemap_fdatawrite(mapping);
285 error = filemap_fdatawait(mapping);
286
287 if (error)
288 gfs2_io_error(gl->gl_sbd);
289}
290
291/**
292 * getbuf - Get a buffer with a given address space
293 * @sdp: the filesystem
294 * @aspace: the address space
295 * @blkno: the block number (filesystem scope)
296 * @create: 1 if the buffer should be created
297 *
298 * Returns: the buffer
299 */
300
301static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
302 u64 blkno, int create)
303{
304 struct page *page;
305 struct buffer_head *bh;
306 unsigned int shift;
307 unsigned long index;
308 unsigned int bufnum;
309
310 shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
311 index = blkno >> shift; /* convert block to page */
312 bufnum = blkno - (index << shift); /* block buf index within page */
313
314 if (create) {
315 for (;;) {
316 page = grab_cache_page(aspace->i_mapping, index);
317 if (page)
318 break;
319 yield();
320 }
321 } else {
322 page = find_lock_page(aspace->i_mapping, index);
323 if (!page)
324 return NULL;
325 }
326
327 if (!page_has_buffers(page))
328 create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
329
330 /* Locate header for our buffer within our page */
331 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
332 /* Do nothing */;
333 get_bh(bh);
334
335 if (!buffer_mapped(bh))
336 map_bh(bh, sdp->sd_vfs, blkno);
337
338 unlock_page(page);
339 mark_page_accessed(page);
340 page_cache_release(page);
341
342 return bh;
343}
344
345static void meta_prep_new(struct buffer_head *bh)
346{
347 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
348
349 lock_buffer(bh);
350 clear_buffer_dirty(bh);
351 set_buffer_uptodate(bh);
352 unlock_buffer(bh);
353
354 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
355}
356
357/**
358 * gfs2_meta_new - Get a block
359 * @gl: The glock associated with this block
360 * @blkno: The block number
361 *
362 * Returns: The buffer
363 */
364
365struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
366{
367 struct buffer_head *bh;
368 bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
369 meta_prep_new(bh);
370 return bh;
371}
372
373/**
374 * gfs2_meta_read - Read a block from disk
375 * @gl: The glock covering the block
376 * @blkno: The block number
377 * @flags: flags
378 * @bhp: the place where the buffer is returned (NULL on failure)
379 *
380 * Returns: errno
381 */
382
383int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
384 struct buffer_head **bhp)
385{
386 *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
387 if (!buffer_uptodate(*bhp))
388 ll_rw_block(READ, 1, bhp);
389 if (flags & DIO_WAIT) {
390 int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
391 if (error) {
392 brelse(*bhp);
393 return error;
394 }
395 }
396
397 return 0;
398}
399
400/**
401 * gfs2_meta_wait - Reread a block from disk
402 * @sdp: the filesystem
403 * @bh: The block to wait for
404 *
405 * Returns: errno
406 */
407
408int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
409{
410 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
411 return -EIO;
412
413 wait_on_buffer(bh);
414
415 if (!buffer_uptodate(bh)) {
416 struct gfs2_trans *tr = current->journal_info;
417 if (tr && tr->tr_touched)
418 gfs2_io_error_bh(sdp, bh);
419 return -EIO;
420 }
421 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
422 return -EIO;
423
424 return 0;
425}
426
427/**
428 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
429 * @gl: the glock the buffer belongs to
430 * @bh: The buffer to be attached to
431 * @meta: Flag to indicate whether its metadata or not
432 */
433
434void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
435 int meta)
436{
437 struct gfs2_bufdata *bd;
438
439 if (meta)
440 lock_page(bh->b_page);
441
442 if (bh->b_private) {
443 if (meta)
444 unlock_page(bh->b_page);
445 return;
446 }
447
448 bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
449 memset(bd, 0, sizeof(struct gfs2_bufdata));
450 bd->bd_bh = bh;
451 bd->bd_gl = gl;
452
453 INIT_LIST_HEAD(&bd->bd_list_tr);
454 if (meta)
455 lops_init_le(&bd->bd_le, &gfs2_buf_lops);
456 else
457 lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
458 bh->b_private = bd;
459
460 if (meta)
461 unlock_page(bh->b_page);
462}
463
464/**
465 * gfs2_pin - Pin a buffer in memory
466 * @sdp: the filesystem the buffer belongs to
467 * @bh: The buffer to be pinned
468 *
469 */
470
471void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
472{
473 struct gfs2_bufdata *bd = bh->b_private;
474
475 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
476
477 if (test_set_buffer_pinned(bh))
478 gfs2_assert_withdraw(sdp, 0);
479
480 wait_on_buffer(bh);
481
482 /* If this buffer is in the AIL and it has already been written
483 to in-place disk block, remove it from the AIL. */
484
485 gfs2_log_lock(sdp);
486 if (bd->bd_ail && !buffer_in_io(bh))
487 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
488 gfs2_log_unlock(sdp);
489
490 clear_buffer_dirty(bh);
491 wait_on_buffer(bh);
492
493 if (!buffer_uptodate(bh))
494 gfs2_io_error_bh(sdp, bh);
495
496 get_bh(bh);
497}
498
499/**
500 * gfs2_unpin - Unpin a buffer
501 * @sdp: the filesystem the buffer belongs to
502 * @bh: The buffer to unpin
503 * @ai:
504 *
505 */
506
507void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
508 struct gfs2_ail *ai)
509{
510 struct gfs2_bufdata *bd = bh->b_private;
511
512 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
513
514 if (!buffer_pinned(bh))
515 gfs2_assert_withdraw(sdp, 0);
516
517 mark_buffer_dirty(bh);
518 clear_buffer_pinned(bh);
519
520 gfs2_log_lock(sdp);
521 if (bd->bd_ail) {
522 list_del(&bd->bd_ail_st_list);
523 brelse(bh);
524 } else {
525 struct gfs2_glock *gl = bd->bd_gl;
526 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
527 atomic_inc(&gl->gl_ail_count);
528 }
529 bd->bd_ail = ai;
530 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
531 gfs2_log_unlock(sdp);
532}
533
534/**
535 * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
536 * @ip: the inode who owns the buffers
537 * @bstart: the first buffer in the run
538 * @blen: the number of buffers in the run
539 *
540 */
541
542void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
543{
544 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
545 struct inode *aspace = ip->i_gl->gl_aspace;
546 struct buffer_head *bh;
547
548 while (blen) {
549 bh = getbuf(sdp, aspace, bstart, NO_CREATE);
550 if (bh) {
551 struct gfs2_bufdata *bd = bh->b_private;
552
553 if (test_clear_buffer_pinned(bh)) {
554 struct gfs2_trans *tr = current->journal_info;
555 gfs2_log_lock(sdp);
556 list_del_init(&bd->bd_le.le_list);
557 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
558 sdp->sd_log_num_buf--;
559 gfs2_log_unlock(sdp);
560 tr->tr_num_buf_rm++;
561 brelse(bh);
562 }
563 if (bd) {
564 gfs2_log_lock(sdp);
565 if (bd->bd_ail) {
566 u64 blkno = bh->b_blocknr;
567 bd->bd_ail = NULL;
568 list_del(&bd->bd_ail_st_list);
569 list_del(&bd->bd_ail_gl_list);
570 atomic_dec(&bd->bd_gl->gl_ail_count);
571 brelse(bh);
572 gfs2_log_unlock(sdp);
573 gfs2_trans_add_revoke(sdp, blkno);
574 } else
575 gfs2_log_unlock(sdp);
576 }
577
578 lock_buffer(bh);
579 clear_buffer_dirty(bh);
580 clear_buffer_uptodate(bh);
581 unlock_buffer(bh);
582
583 brelse(bh);
584 }
585
586 bstart++;
587 blen--;
588 }
589}
590
591/**
592 * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
593 * @ip: The GFS2 inode
594 *
595 * This releases buffers that are in the most-recently-used array of
596 * blocks used for indirect block addressing for this inode.
597 */
598
599void gfs2_meta_cache_flush(struct gfs2_inode *ip)
600{
601 struct buffer_head **bh_slot;
602 unsigned int x;
603
604 spin_lock(&ip->i_spin);
605
606 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
607 bh_slot = &ip->i_cache[x];
608 if (!*bh_slot)
609 break;
610 brelse(*bh_slot);
611 *bh_slot = NULL;
612 }
613
614 spin_unlock(&ip->i_spin);
615}
616
617/**
618 * gfs2_meta_indirect_buffer - Get a metadata buffer
619 * @ip: The GFS2 inode
620 * @height: The level of this buf in the metadata (indir addr) tree (if any)
621 * @num: The block number (device relative) of the buffer
622 * @new: Non-zero if we may create a new buffer
623 * @bhp: the buffer is returned here
624 *
625 * Try to use the gfs2_inode's MRU metadata tree cache.
626 *
627 * Returns: errno
628 */
629
630int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
631 int new, struct buffer_head **bhp)
632{
633 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
634 struct gfs2_glock *gl = ip->i_gl;
635 struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
636 int in_cache = 0;
637
638 spin_lock(&ip->i_spin);
639 if (*bh_slot && (*bh_slot)->b_blocknr == num) {
640 bh = *bh_slot;
641 get_bh(bh);
642 in_cache = 1;
643 }
644 spin_unlock(&ip->i_spin);
645
646 if (!bh)
647 bh = getbuf(gl->gl_sbd, gl->gl_aspace, num, CREATE);
648
649 if (!bh)
650 return -ENOBUFS;
651
652 if (new) {
653 if (gfs2_assert_warn(sdp, height))
654 goto err;
655 meta_prep_new(bh);
656 gfs2_trans_add_bh(ip->i_gl, bh, 1);
657 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
658 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
659 } else {
660 u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
661 if (!buffer_uptodate(bh)) {
662 ll_rw_block(READ, 1, &bh);
663 if (gfs2_meta_wait(sdp, bh))
664 goto err;
665 }
666 if (gfs2_metatype_check(sdp, bh, mtype))
667 goto err;
668 }
669
670 if (!in_cache) {
671 spin_lock(&ip->i_spin);
672 if (*bh_slot)
673 brelse(*bh_slot);
674 *bh_slot = bh;
675 get_bh(bh);
676 spin_unlock(&ip->i_spin);
677 }
678
679 *bhp = bh;
680 return 0;
681err:
682 brelse(bh);
683 return -EIO;
684}
685
686/**
687 * gfs2_meta_ra - start readahead on an extent of a file
688 * @gl: the glock the blocks belong to
689 * @dblock: the starting disk block
690 * @extlen: the number of blocks in the extent
691 *
692 * returns: the first buffer in the extent
693 */
694
695struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
696{
697 struct gfs2_sbd *sdp = gl->gl_sbd;
698 struct inode *aspace = gl->gl_aspace;
699 struct buffer_head *first_bh, *bh;
700 u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
701 sdp->sd_sb.sb_bsize_shift;
702
703 BUG_ON(!extlen);
704
705 if (max_ra < 1)
706 max_ra = 1;
707 if (extlen > max_ra)
708 extlen = max_ra;
709
710 first_bh = getbuf(sdp, aspace, dblock, CREATE);
711
712 if (buffer_uptodate(first_bh))
713 goto out;
714 if (!buffer_locked(first_bh))
715 ll_rw_block(READ, 1, &first_bh);
716
717 dblock++;
718 extlen--;
719
720 while (extlen) {
721 bh = getbuf(sdp, aspace, dblock, CREATE);
722
723 if (!buffer_uptodate(bh) && !buffer_locked(bh))
724 ll_rw_block(READA, 1, &bh);
725 brelse(bh);
726 dblock++;
727 extlen--;
728 if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
729 goto out;
730 }
731
732 wait_on_buffer(first_bh);
733out:
734 return first_bh;
735}
736
737/**
738 * gfs2_meta_syncfs - sync all the buffers in a filesystem
739 * @sdp: the filesystem
740 *
741 */
742
743void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
744{
745 gfs2_log_flush(sdp, NULL);
746 for (;;) {
747 gfs2_ail1_start(sdp, DIO_ALL);
748 if (gfs2_ail1_empty(sdp, DIO_ALL))
749 break;
750 msleep(10);
751 }
752}
753
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
new file mode 100644
index 000000000000..3323e6d0ed8f
--- /dev/null
+++ b/fs/gfs2/meta_io.h
@@ -0,0 +1,78 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __DIO_DOT_H__
11#define __DIO_DOT_H__
12
13#include <linux/buffer_head.h>
14#include <linux/string.h>
15#include "incore.h"
16
17static inline void gfs2_buffer_clear(struct buffer_head *bh)
18{
19 memset(bh->b_data, 0, bh->b_size);
20}
21
22static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
23{
24 BUG_ON(head > bh->b_size);
25 memset(bh->b_data + head, 0, bh->b_size - head);
26}
27
28static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
29 int to_head,
30 struct buffer_head *from_bh,
31 int from_head)
32{
33 BUG_ON(from_head < to_head);
34 memcpy(to_bh->b_data + to_head, from_bh->b_data + from_head,
35 from_bh->b_size - from_head);
36 memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
37 0, from_head - to_head);
38}
39
40struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
41void gfs2_aspace_put(struct inode *aspace);
42
43void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
44int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags);
45void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
46void gfs2_ail_empty_gl(struct gfs2_glock *gl);
47
48void gfs2_meta_inval(struct gfs2_glock *gl);
49void gfs2_meta_sync(struct gfs2_glock *gl);
50
51struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
52int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
53 int flags, struct buffer_head **bhp);
54int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
55
56void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
57 int meta);
58void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
59void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
60 struct gfs2_ail *ai);
61
62void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
63
64void gfs2_meta_cache_flush(struct gfs2_inode *ip);
65int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
66 int new, struct buffer_head **bhp);
67
68static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
69 struct buffer_head **bhp)
70{
71 return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
72}
73
74struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
75void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
76
77#endif /* __DIO_DOT_H__ */
78
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
new file mode 100644
index 000000000000..ef3092e29607
--- /dev/null
+++ b/fs/gfs2/mount.c
@@ -0,0 +1,214 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "mount.h"
21#include "sys.h"
22#include "util.h"
23
24/**
25 * gfs2_mount_args - Parse mount options
26 * @sdp:
27 * @data:
28 *
29 * Return: errno
30 */
31
32int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
33{
34 struct gfs2_args *args = &sdp->sd_args;
35 char *data = data_arg;
36 char *options, *o, *v;
37 int error = 0;
38
39 if (!remount) {
40 /* If someone preloaded options, use those instead */
41 spin_lock(&gfs2_sys_margs_lock);
42 if (gfs2_sys_margs) {
43 data = gfs2_sys_margs;
44 gfs2_sys_margs = NULL;
45 }
46 spin_unlock(&gfs2_sys_margs_lock);
47
48 /* Set some defaults */
49 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
50 args->ar_quota = GFS2_QUOTA_DEFAULT;
51 args->ar_data = GFS2_DATA_DEFAULT;
52 }
53
54 /* Split the options into tokens with the "," character and
55 process them */
56
57 for (options = data; (o = strsep(&options, ",")); ) {
58 if (!*o)
59 continue;
60
61 v = strchr(o, '=');
62 if (v)
63 *v++ = 0;
64
65 if (!strcmp(o, "lockproto")) {
66 if (!v)
67 goto need_value;
68 if (remount && strcmp(v, args->ar_lockproto))
69 goto cant_remount;
70 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
71 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
72 }
73
74 else if (!strcmp(o, "locktable")) {
75 if (!v)
76 goto need_value;
77 if (remount && strcmp(v, args->ar_locktable))
78 goto cant_remount;
79 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
80 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
81 }
82
83 else if (!strcmp(o, "hostdata")) {
84 if (!v)
85 goto need_value;
86 if (remount && strcmp(v, args->ar_hostdata))
87 goto cant_remount;
88 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
89 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
90 }
91
92 else if (!strcmp(o, "spectator")) {
93 if (remount && !args->ar_spectator)
94 goto cant_remount;
95 args->ar_spectator = 1;
96 sdp->sd_vfs->s_flags |= MS_RDONLY;
97 }
98
99 else if (!strcmp(o, "ignore_local_fs")) {
100 if (remount && !args->ar_ignore_local_fs)
101 goto cant_remount;
102 args->ar_ignore_local_fs = 1;
103 }
104
105 else if (!strcmp(o, "localflocks")) {
106 if (remount && !args->ar_localflocks)
107 goto cant_remount;
108 args->ar_localflocks = 1;
109 }
110
111 else if (!strcmp(o, "localcaching")) {
112 if (remount && !args->ar_localcaching)
113 goto cant_remount;
114 args->ar_localcaching = 1;
115 }
116
117 else if (!strcmp(o, "debug"))
118 args->ar_debug = 1;
119
120 else if (!strcmp(o, "nodebug"))
121 args->ar_debug = 0;
122
123 else if (!strcmp(o, "upgrade")) {
124 if (remount && !args->ar_upgrade)
125 goto cant_remount;
126 args->ar_upgrade = 1;
127 }
128
129 else if (!strcmp(o, "num_glockd")) {
130 unsigned int x;
131 if (!v)
132 goto need_value;
133 sscanf(v, "%u", &x);
134 if (remount && x != args->ar_num_glockd)
135 goto cant_remount;
136 if (!x || x > GFS2_GLOCKD_MAX) {
137 fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
138 GFS2_GLOCKD_MAX, x);
139 error = -EINVAL;
140 break;
141 }
142 args->ar_num_glockd = x;
143 }
144
145 else if (!strcmp(o, "acl")) {
146 args->ar_posix_acl = 1;
147 sdp->sd_vfs->s_flags |= MS_POSIXACL;
148 }
149
150 else if (!strcmp(o, "noacl")) {
151 args->ar_posix_acl = 0;
152 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
153 }
154
155 else if (!strcmp(o, "quota")) {
156 if (!v)
157 goto need_value;
158 if (!strcmp(v, "off"))
159 args->ar_quota = GFS2_QUOTA_OFF;
160 else if (!strcmp(v, "account"))
161 args->ar_quota = GFS2_QUOTA_ACCOUNT;
162 else if (!strcmp(v, "on"))
163 args->ar_quota = GFS2_QUOTA_ON;
164 else {
165 fs_info(sdp, "invalid value for quota\n");
166 error = -EINVAL;
167 break;
168 }
169 }
170
171 else if (!strcmp(o, "suiddir"))
172 args->ar_suiddir = 1;
173
174 else if (!strcmp(o, "nosuiddir"))
175 args->ar_suiddir = 0;
176
177 else if (!strcmp(o, "data")) {
178 if (!v)
179 goto need_value;
180 if (!strcmp(v, "writeback"))
181 args->ar_data = GFS2_DATA_WRITEBACK;
182 else if (!strcmp(v, "ordered"))
183 args->ar_data = GFS2_DATA_ORDERED;
184 else {
185 fs_info(sdp, "invalid value for data\n");
186 error = -EINVAL;
187 break;
188 }
189 }
190
191 else {
192 fs_info(sdp, "unknown option: %s\n", o);
193 error = -EINVAL;
194 break;
195 }
196 }
197
198 if (error)
199 fs_info(sdp, "invalid mount option(s)\n");
200
201 if (data != data_arg)
202 kfree(data);
203
204 return error;
205
206need_value:
207 fs_info(sdp, "need value for option %s\n", o);
208 return -EINVAL;
209
210cant_remount:
211 fs_info(sdp, "can't remount with option %s\n", o);
212 return -EINVAL;
213}
214
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
new file mode 100644
index 000000000000..401288acfdf3
--- /dev/null
+++ b/fs/gfs2/mount.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __MOUNT_DOT_H__
11#define __MOUNT_DOT_H__
12
13struct gfs2_sbd;
14
15int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
16
17#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
new file mode 100644
index 000000000000..1025960b0e6e
--- /dev/null
+++ b/fs/gfs2/ondisk.c
@@ -0,0 +1,308 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15
16#include "gfs2.h"
17#include <linux/gfs2_ondisk.h>
18
19#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
20 struct->member);
21
22/*
23 * gfs2_xxx_in - read in an xxx struct
24 * first arg: the cpu-order structure
25 * buf: the disk-order buffer
26 *
27 * gfs2_xxx_out - write out an xxx struct
28 * first arg: the cpu-order structure
29 * buf: the disk-order buffer
30 *
31 * gfs2_xxx_print - print out an xxx struct
32 * first arg: the cpu-order structure
33 */
34
35void gfs2_inum_in(struct gfs2_inum *no, const void *buf)
36{
37 const struct gfs2_inum *str = buf;
38
39 no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
40 no->no_addr = be64_to_cpu(str->no_addr);
41}
42
43void gfs2_inum_out(const struct gfs2_inum *no, void *buf)
44{
45 struct gfs2_inum *str = buf;
46
47 str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
48 str->no_addr = cpu_to_be64(no->no_addr);
49}
50
51static void gfs2_inum_print(const struct gfs2_inum *no)
52{
53 printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
54 printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr);
55}
56
57static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf)
58{
59 const struct gfs2_meta_header *str = buf;
60
61 mh->mh_magic = be32_to_cpu(str->mh_magic);
62 mh->mh_type = be32_to_cpu(str->mh_type);
63 mh->mh_format = be32_to_cpu(str->mh_format);
64}
65
66static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf)
67{
68 struct gfs2_meta_header *str = buf;
69
70 str->mh_magic = cpu_to_be32(mh->mh_magic);
71 str->mh_type = cpu_to_be32(mh->mh_type);
72 str->mh_format = cpu_to_be32(mh->mh_format);
73}
74
75static void gfs2_meta_header_print(const struct gfs2_meta_header *mh)
76{
77 pv(mh, mh_magic, "0x%.8X");
78 pv(mh, mh_type, "%u");
79 pv(mh, mh_format, "%u");
80}
81
82void gfs2_sb_in(struct gfs2_sb *sb, const void *buf)
83{
84 const struct gfs2_sb *str = buf;
85
86 gfs2_meta_header_in(&sb->sb_header, buf);
87
88 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
89 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
90 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
91 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
92
93 gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
94 gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
95
96 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
97 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
98}
99
100void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf)
101{
102 const struct gfs2_rindex *str = buf;
103
104 ri->ri_addr = be64_to_cpu(str->ri_addr);
105 ri->ri_length = be32_to_cpu(str->ri_length);
106 ri->ri_data0 = be64_to_cpu(str->ri_data0);
107 ri->ri_data = be32_to_cpu(str->ri_data);
108 ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
109
110}
111
112void gfs2_rindex_print(const struct gfs2_rindex *ri)
113{
114 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
115 pv(ri, ri_length, "%u");
116
117 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
118 pv(ri, ri_data, "%u");
119
120 pv(ri, ri_bitbytes, "%u");
121}
122
123void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf)
124{
125 const struct gfs2_rgrp *str = buf;
126
127 gfs2_meta_header_in(&rg->rg_header, buf);
128 rg->rg_flags = be32_to_cpu(str->rg_flags);
129 rg->rg_free = be32_to_cpu(str->rg_free);
130 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
131 rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
132}
133
134void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf)
135{
136 struct gfs2_rgrp *str = buf;
137
138 gfs2_meta_header_out(&rg->rg_header, buf);
139 str->rg_flags = cpu_to_be32(rg->rg_flags);
140 str->rg_free = cpu_to_be32(rg->rg_free);
141 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
142 str->__pad = cpu_to_be32(0);
143 str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
144 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
145}
146
147void gfs2_quota_in(struct gfs2_quota *qu, const void *buf)
148{
149 const struct gfs2_quota *str = buf;
150
151 qu->qu_limit = be64_to_cpu(str->qu_limit);
152 qu->qu_warn = be64_to_cpu(str->qu_warn);
153 qu->qu_value = be64_to_cpu(str->qu_value);
154}
155
156void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf)
157{
158 const struct gfs2_dinode *str = buf;
159
160 gfs2_meta_header_in(&di->di_header, buf);
161 gfs2_inum_in(&di->di_num, &str->di_num);
162
163 di->di_mode = be32_to_cpu(str->di_mode);
164 di->di_uid = be32_to_cpu(str->di_uid);
165 di->di_gid = be32_to_cpu(str->di_gid);
166 di->di_nlink = be32_to_cpu(str->di_nlink);
167 di->di_size = be64_to_cpu(str->di_size);
168 di->di_blocks = be64_to_cpu(str->di_blocks);
169 di->di_atime = be64_to_cpu(str->di_atime);
170 di->di_mtime = be64_to_cpu(str->di_mtime);
171 di->di_ctime = be64_to_cpu(str->di_ctime);
172 di->di_major = be32_to_cpu(str->di_major);
173 di->di_minor = be32_to_cpu(str->di_minor);
174
175 di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
176 di->di_goal_data = be64_to_cpu(str->di_goal_data);
177 di->di_generation = be64_to_cpu(str->di_generation);
178
179 di->di_flags = be32_to_cpu(str->di_flags);
180 di->di_payload_format = be32_to_cpu(str->di_payload_format);
181 di->di_height = be16_to_cpu(str->di_height);
182
183 di->di_depth = be16_to_cpu(str->di_depth);
184 di->di_entries = be32_to_cpu(str->di_entries);
185
186 di->di_eattr = be64_to_cpu(str->di_eattr);
187
188}
189
190void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf)
191{
192 struct gfs2_dinode *str = buf;
193
194 gfs2_meta_header_out(&di->di_header, buf);
195 gfs2_inum_out(&di->di_num, (char *)&str->di_num);
196
197 str->di_mode = cpu_to_be32(di->di_mode);
198 str->di_uid = cpu_to_be32(di->di_uid);
199 str->di_gid = cpu_to_be32(di->di_gid);
200 str->di_nlink = cpu_to_be32(di->di_nlink);
201 str->di_size = cpu_to_be64(di->di_size);
202 str->di_blocks = cpu_to_be64(di->di_blocks);
203 str->di_atime = cpu_to_be64(di->di_atime);
204 str->di_mtime = cpu_to_be64(di->di_mtime);
205 str->di_ctime = cpu_to_be64(di->di_ctime);
206 str->di_major = cpu_to_be32(di->di_major);
207 str->di_minor = cpu_to_be32(di->di_minor);
208
209 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
210 str->di_goal_data = cpu_to_be64(di->di_goal_data);
211 str->di_generation = cpu_to_be64(di->di_generation);
212
213 str->di_flags = cpu_to_be32(di->di_flags);
214 str->di_payload_format = cpu_to_be32(di->di_payload_format);
215 str->di_height = cpu_to_be16(di->di_height);
216
217 str->di_depth = cpu_to_be16(di->di_depth);
218 str->di_entries = cpu_to_be32(di->di_entries);
219
220 str->di_eattr = cpu_to_be64(di->di_eattr);
221
222}
223
224void gfs2_dinode_print(const struct gfs2_dinode *di)
225{
226 gfs2_meta_header_print(&di->di_header);
227 gfs2_inum_print(&di->di_num);
228
229 pv(di, di_mode, "0%o");
230 pv(di, di_uid, "%u");
231 pv(di, di_gid, "%u");
232 pv(di, di_nlink, "%u");
233 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
234 printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks);
235 printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime);
236 printk(KERN_INFO " di_mtime = %lld\n", (long long)di->di_mtime);
237 printk(KERN_INFO " di_ctime = %lld\n", (long long)di->di_ctime);
238 pv(di, di_major, "%u");
239 pv(di, di_minor, "%u");
240
241 printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
242 printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
243
244 pv(di, di_flags, "0x%.8X");
245 pv(di, di_payload_format, "%u");
246 pv(di, di_height, "%u");
247
248 pv(di, di_depth, "%u");
249 pv(di, di_entries, "%u");
250
251 printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr);
252}
253
254void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf)
255{
256 const struct gfs2_log_header *str = buf;
257
258 gfs2_meta_header_in(&lh->lh_header, buf);
259 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
260 lh->lh_flags = be32_to_cpu(str->lh_flags);
261 lh->lh_tail = be32_to_cpu(str->lh_tail);
262 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
263 lh->lh_hash = be32_to_cpu(str->lh_hash);
264}
265
266void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf)
267{
268 const struct gfs2_inum_range *str = buf;
269
270 ir->ir_start = be64_to_cpu(str->ir_start);
271 ir->ir_length = be64_to_cpu(str->ir_length);
272}
273
274void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf)
275{
276 struct gfs2_inum_range *str = buf;
277
278 str->ir_start = cpu_to_be64(ir->ir_start);
279 str->ir_length = cpu_to_be64(ir->ir_length);
280}
281
282void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf)
283{
284 const struct gfs2_statfs_change *str = buf;
285
286 sc->sc_total = be64_to_cpu(str->sc_total);
287 sc->sc_free = be64_to_cpu(str->sc_free);
288 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
289}
290
291void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf)
292{
293 struct gfs2_statfs_change *str = buf;
294
295 str->sc_total = cpu_to_be64(sc->sc_total);
296 str->sc_free = cpu_to_be64(sc->sc_free);
297 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
298}
299
300void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf)
301{
302 const struct gfs2_quota_change *str = buf;
303
304 qc->qc_change = be64_to_cpu(str->qc_change);
305 qc->qc_flags = be32_to_cpu(str->qc_flags);
306 qc->qc_id = be32_to_cpu(str->qc_id);
307}
308
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
new file mode 100644
index 000000000000..811f4ada2a01
--- /dev/null
+++ b/fs/gfs2/ops_address.c
@@ -0,0 +1,782 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/pagevec.h>
17#include <linux/mpage.h>
18#include <linux/fs.h>
19#include <linux/gfs2_ondisk.h>
20#include <linux/lm_interface.h>
21
22#include "gfs2.h"
23#include "incore.h"
24#include "bmap.h"
25#include "glock.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "ops_address.h"
30#include "quota.h"
31#include "trans.h"
32#include "rgrp.h"
33#include "ops_file.h"
34#include "util.h"
35#include "glops.h"
36
37
38static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
39 unsigned int from, unsigned int to)
40{
41 struct buffer_head *head = page_buffers(page);
42 unsigned int bsize = head->b_size;
43 struct buffer_head *bh;
44 unsigned int start, end;
45
46 for (bh = head, start = 0; bh != head || !start;
47 bh = bh->b_this_page, start = end) {
48 end = start + bsize;
49 if (end <= from || start >= to)
50 continue;
51 gfs2_trans_add_bh(ip->i_gl, bh, 0);
52 }
53}
54
55/**
56 * gfs2_get_block - Fills in a buffer head with details about a block
57 * @inode: The inode
58 * @lblock: The block number to look up
59 * @bh_result: The buffer head to return the result in
60 * @create: Non-zero if we may add block to the file
61 *
62 * Returns: errno
63 */
64
65int gfs2_get_block(struct inode *inode, sector_t lblock,
66 struct buffer_head *bh_result, int create)
67{
68 return gfs2_block_map(inode, lblock, create, bh_result, 32);
69}
70
71/**
72 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
73 * @inode: The inode
74 * @lblock: The block number to look up
75 * @bh_result: The buffer head to return the result in
76 * @create: Non-zero if we may add block to the file
77 *
78 * Returns: errno
79 */
80
81static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
82 struct buffer_head *bh_result, int create)
83{
84 int error;
85
86 error = gfs2_block_map(inode, lblock, 0, bh_result, 1);
87 if (error)
88 return error;
89 if (bh_result->b_blocknr == 0)
90 return -EIO;
91 return 0;
92}
93
94static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
95 struct buffer_head *bh_result, int create)
96{
97 return gfs2_block_map(inode, lblock, 0, bh_result, 32);
98}
99
100/**
101 * gfs2_writepage - Write complete page
102 * @page: Page to write
103 *
104 * Returns: errno
105 *
106 * Some of this is copied from block_write_full_page() although we still
107 * call it to do most of the work.
108 */
109
110static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
111{
112 struct inode *inode = page->mapping->host;
113 struct gfs2_inode *ip = GFS2_I(inode);
114 struct gfs2_sbd *sdp = GFS2_SB(inode);
115 loff_t i_size = i_size_read(inode);
116 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
117 unsigned offset;
118 int error;
119 int done_trans = 0;
120
121 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
122 unlock_page(page);
123 return -EIO;
124 }
125 if (current->journal_info)
126 goto out_ignore;
127
128 /* Is the page fully outside i_size? (truncate in progress) */
129 offset = i_size & (PAGE_CACHE_SIZE-1);
130 if (page->index > end_index || (page->index == end_index && !offset)) {
131 page->mapping->a_ops->invalidatepage(page, 0);
132 unlock_page(page);
133 return 0; /* don't care */
134 }
135
136 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
137 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
138 if (error)
139 goto out_ignore;
140 if (!page_has_buffers(page)) {
141 create_empty_buffers(page, inode->i_sb->s_blocksize,
142 (1 << BH_Dirty)|(1 << BH_Uptodate));
143 }
144 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
145 done_trans = 1;
146 }
147 error = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
148 if (done_trans)
149 gfs2_trans_end(sdp);
150 gfs2_meta_cache_flush(ip);
151 return error;
152
153out_ignore:
154 redirty_page_for_writepage(wbc, page);
155 unlock_page(page);
156 return 0;
157}
158
159static int zero_readpage(struct page *page)
160{
161 void *kaddr;
162
163 kaddr = kmap_atomic(page, KM_USER0);
164 memset(kaddr, 0, PAGE_CACHE_SIZE);
165 kunmap_atomic(page, KM_USER0);
166
167 SetPageUptodate(page);
168
169 return 0;
170}
171
172/**
173 * stuffed_readpage - Fill in a Linux page with stuffed file data
174 * @ip: the inode
175 * @page: the page
176 *
177 * Returns: errno
178 */
179
180static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
181{
182 struct buffer_head *dibh;
183 void *kaddr;
184 int error;
185
186 /* Only the first page of a stuffed file might contain data */
187 if (unlikely(page->index))
188 return zero_readpage(page);
189
190 error = gfs2_meta_inode_buffer(ip, &dibh);
191 if (error)
192 return error;
193
194 kaddr = kmap_atomic(page, KM_USER0);
195 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
196 ip->i_di.di_size);
197 memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
198 kunmap_atomic(page, KM_USER0);
199
200 brelse(dibh);
201
202 SetPageUptodate(page);
203
204 return 0;
205}
206
207
208/**
209 * gfs2_readpage - readpage with locking
210 * @file: The file to read a page for. N.B. This may be NULL if we are
211 * reading an internal file.
212 * @page: The page to read
213 *
214 * Returns: errno
215 */
216
217static int gfs2_readpage(struct file *file, struct page *page)
218{
219 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
220 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
221 struct gfs2_file *gf = NULL;
222 struct gfs2_holder gh;
223 int error;
224 int do_unlock = 0;
225
226 if (likely(file != &gfs2_internal_file_sentinel)) {
227 if (file) {
228 gf = file->private_data;
229 if (test_bit(GFF_EXLOCK, &gf->f_flags))
230 /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
231 goto skip_lock;
232 }
233 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
234 do_unlock = 1;
235 error = gfs2_glock_nq_m_atime(1, &gh);
236 if (unlikely(error))
237 goto out_unlock;
238 }
239
240skip_lock:
241 if (gfs2_is_stuffed(ip)) {
242 error = stuffed_readpage(ip, page);
243 unlock_page(page);
244 } else
245 error = mpage_readpage(page, gfs2_get_block);
246
247 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
248 error = -EIO;
249
250 if (do_unlock) {
251 gfs2_glock_dq_m(1, &gh);
252 gfs2_holder_uninit(&gh);
253 }
254out:
255 return error;
256out_unlock:
257 unlock_page(page);
258 if (do_unlock)
259 gfs2_holder_uninit(&gh);
260 goto out;
261}
262
263/**
264 * gfs2_readpages - Read a bunch of pages at once
265 *
266 * Some notes:
267 * 1. This is only for readahead, so we can simply ignore any things
268 * which are slightly inconvenient (such as locking conflicts between
269 * the page lock and the glock) and return having done no I/O. Its
270 * obviously not something we'd want to do on too regular a basis.
271 * Any I/O we ignore at this time will be done via readpage later.
272 * 2. We have to handle stuffed files here too.
273 * 3. mpage_readpages() does most of the heavy lifting in the common case.
274 * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
275 * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
276 * well as read-ahead.
277 */
278static int gfs2_readpages(struct file *file, struct address_space *mapping,
279 struct list_head *pages, unsigned nr_pages)
280{
281 struct inode *inode = mapping->host;
282 struct gfs2_inode *ip = GFS2_I(inode);
283 struct gfs2_sbd *sdp = GFS2_SB(inode);
284 struct gfs2_holder gh;
285 unsigned page_idx;
286 int ret;
287 int do_unlock = 0;
288
289 if (likely(file != &gfs2_internal_file_sentinel)) {
290 if (file) {
291 struct gfs2_file *gf = file->private_data;
292 if (test_bit(GFF_EXLOCK, &gf->f_flags))
293 goto skip_lock;
294 }
295 gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
296 LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
297 do_unlock = 1;
298 ret = gfs2_glock_nq_m_atime(1, &gh);
299 if (ret == GLR_TRYFAILED)
300 goto out_noerror;
301 if (unlikely(ret))
302 goto out_unlock;
303 }
304skip_lock:
305 if (gfs2_is_stuffed(ip)) {
306 struct pagevec lru_pvec;
307 pagevec_init(&lru_pvec, 0);
308 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
309 struct page *page = list_entry(pages->prev, struct page, lru);
310 prefetchw(&page->flags);
311 list_del(&page->lru);
312 if (!add_to_page_cache(page, mapping,
313 page->index, GFP_KERNEL)) {
314 ret = stuffed_readpage(ip, page);
315 unlock_page(page);
316 if (!pagevec_add(&lru_pvec, page))
317 __pagevec_lru_add(&lru_pvec);
318 } else {
319 page_cache_release(page);
320 }
321 }
322 pagevec_lru_add(&lru_pvec);
323 ret = 0;
324 } else {
325 /* What we really want to do .... */
326 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
327 }
328
329 if (do_unlock) {
330 gfs2_glock_dq_m(1, &gh);
331 gfs2_holder_uninit(&gh);
332 }
333out:
334 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
335 ret = -EIO;
336 return ret;
337out_noerror:
338 ret = 0;
339out_unlock:
340 /* unlock all pages, we can't do any I/O right now */
341 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
342 struct page *page = list_entry(pages->prev, struct page, lru);
343 list_del(&page->lru);
344 unlock_page(page);
345 page_cache_release(page);
346 }
347 if (do_unlock)
348 gfs2_holder_uninit(&gh);
349 goto out;
350}
351
352/**
353 * gfs2_prepare_write - Prepare to write a page to a file
354 * @file: The file to write to
355 * @page: The page which is to be prepared for writing
356 * @from: From (byte range within page)
357 * @to: To (byte range within page)
358 *
359 * Returns: errno
360 */
361
362static int gfs2_prepare_write(struct file *file, struct page *page,
363 unsigned from, unsigned to)
364{
365 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
366 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
367 unsigned int data_blocks, ind_blocks, rblocks;
368 int alloc_required;
369 int error = 0;
370 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
371 loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
372 struct gfs2_alloc *al;
373
374 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
375 error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
376 if (error)
377 goto out_uninit;
378
379 gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
380
381 error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
382 if (error)
383 goto out_unlock;
384
385
386 if (alloc_required) {
387 al = gfs2_alloc_get(ip);
388
389 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
390 if (error)
391 goto out_alloc_put;
392
393 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
394 if (error)
395 goto out_qunlock;
396
397 al->al_requested = data_blocks + ind_blocks;
398 error = gfs2_inplace_reserve(ip);
399 if (error)
400 goto out_qunlock;
401 }
402
403 rblocks = RES_DINODE + ind_blocks;
404 if (gfs2_is_jdata(ip))
405 rblocks += data_blocks ? data_blocks : 1;
406 if (ind_blocks || data_blocks)
407 rblocks += RES_STATFS + RES_QUOTA;
408
409 error = gfs2_trans_begin(sdp, rblocks, 0);
410 if (error)
411 goto out;
412
413 if (gfs2_is_stuffed(ip)) {
414 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
415 error = gfs2_unstuff_dinode(ip, page);
416 if (error == 0)
417 goto prepare_write;
418 } else if (!PageUptodate(page))
419 error = stuffed_readpage(ip, page);
420 goto out;
421 }
422
423prepare_write:
424 error = block_prepare_write(page, from, to, gfs2_get_block);
425
426out:
427 if (error) {
428 gfs2_trans_end(sdp);
429 if (alloc_required) {
430 gfs2_inplace_release(ip);
431out_qunlock:
432 gfs2_quota_unlock(ip);
433out_alloc_put:
434 gfs2_alloc_put(ip);
435 }
436out_unlock:
437 gfs2_glock_dq_m(1, &ip->i_gh);
438out_uninit:
439 gfs2_holder_uninit(&ip->i_gh);
440 }
441
442 return error;
443}
444
445/**
446 * gfs2_commit_write - Commit write to a file
447 * @file: The file to write to
448 * @page: The page containing the data
449 * @from: From (byte range within page)
450 * @to: To (byte range within page)
451 *
452 * Returns: errno
453 */
454
455static int gfs2_commit_write(struct file *file, struct page *page,
456 unsigned from, unsigned to)
457{
458 struct inode *inode = page->mapping->host;
459 struct gfs2_inode *ip = GFS2_I(inode);
460 struct gfs2_sbd *sdp = GFS2_SB(inode);
461 int error = -EOPNOTSUPP;
462 struct buffer_head *dibh;
463 struct gfs2_alloc *al = &ip->i_alloc;;
464
465 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
466 goto fail_nounlock;
467
468 error = gfs2_meta_inode_buffer(ip, &dibh);
469 if (error)
470 goto fail_endtrans;
471
472 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
473
474 if (gfs2_is_stuffed(ip)) {
475 u64 file_size;
476 void *kaddr;
477
478 file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to;
479
480 kaddr = kmap_atomic(page, KM_USER0);
481 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
482 kaddr + from, to - from);
483 kunmap_atomic(page, KM_USER0);
484
485 SetPageUptodate(page);
486
487 if (inode->i_size < file_size)
488 i_size_write(inode, file_size);
489 } else {
490 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
491 gfs2_is_jdata(ip))
492 gfs2_page_add_databufs(ip, page, from, to);
493 error = generic_commit_write(file, page, from, to);
494 if (error)
495 goto fail;
496 }
497
498 if (ip->i_di.di_size < inode->i_size)
499 ip->i_di.di_size = inode->i_size;
500
501 gfs2_dinode_out(&ip->i_di, dibh->b_data);
502 brelse(dibh);
503 gfs2_trans_end(sdp);
504 if (al->al_requested) {
505 gfs2_inplace_release(ip);
506 gfs2_quota_unlock(ip);
507 gfs2_alloc_put(ip);
508 }
509 gfs2_glock_dq_m(1, &ip->i_gh);
510 gfs2_holder_uninit(&ip->i_gh);
511 return 0;
512
513fail:
514 brelse(dibh);
515fail_endtrans:
516 gfs2_trans_end(sdp);
517 if (al->al_requested) {
518 gfs2_inplace_release(ip);
519 gfs2_quota_unlock(ip);
520 gfs2_alloc_put(ip);
521 }
522 gfs2_glock_dq_m(1, &ip->i_gh);
523 gfs2_holder_uninit(&ip->i_gh);
524fail_nounlock:
525 ClearPageUptodate(page);
526 return error;
527}
528
529/**
530 * gfs2_bmap - Block map function
531 * @mapping: Address space info
532 * @lblock: The block to map
533 *
534 * Returns: The disk address for the block or 0 on hole or error
535 */
536
537static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
538{
539 struct gfs2_inode *ip = GFS2_I(mapping->host);
540 struct gfs2_holder i_gh;
541 sector_t dblock = 0;
542 int error;
543
544 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
545 if (error)
546 return 0;
547
548 if (!gfs2_is_stuffed(ip))
549 dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
550
551 gfs2_glock_dq_uninit(&i_gh);
552
553 return dblock;
554}
555
556static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
557{
558 struct gfs2_bufdata *bd;
559
560 gfs2_log_lock(sdp);
561 bd = bh->b_private;
562 if (bd) {
563 bd->bd_bh = NULL;
564 bh->b_private = NULL;
565 }
566 gfs2_log_unlock(sdp);
567
568 lock_buffer(bh);
569 clear_buffer_dirty(bh);
570 bh->b_bdev = NULL;
571 clear_buffer_mapped(bh);
572 clear_buffer_req(bh);
573 clear_buffer_new(bh);
574 clear_buffer_delay(bh);
575 unlock_buffer(bh);
576}
577
578static void gfs2_invalidatepage(struct page *page, unsigned long offset)
579{
580 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
581 struct buffer_head *head, *bh, *next;
582 unsigned int curr_off = 0;
583
584 BUG_ON(!PageLocked(page));
585 if (!page_has_buffers(page))
586 return;
587
588 bh = head = page_buffers(page);
589 do {
590 unsigned int next_off = curr_off + bh->b_size;
591 next = bh->b_this_page;
592
593 if (offset <= curr_off)
594 discard_buffer(sdp, bh);
595
596 curr_off = next_off;
597 bh = next;
598 } while (bh != head);
599
600 if (!offset)
601 try_to_release_page(page, 0);
602
603 return;
604}
605
606static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
607 const struct iovec *iov, loff_t offset,
608 unsigned long nr_segs)
609{
610 struct file *file = iocb->ki_filp;
611 struct inode *inode = file->f_mapping->host;
612 struct gfs2_inode *ip = GFS2_I(inode);
613 struct gfs2_holder gh;
614 int rv;
615
616 if (rw == READ)
617 mutex_lock(&inode->i_mutex);
618 /*
619 * Shared lock, even if its a write, since we do no allocation
620 * on this path. All we need change is atime.
621 */
622 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
623 rv = gfs2_glock_nq_m_atime(1, &gh);
624 if (rv)
625 goto out;
626
627 if (offset > i_size_read(inode))
628 goto out;
629
630 /*
631 * Should we return an error here? I can't see that O_DIRECT for
632 * a journaled file makes any sense. For now we'll silently fall
633 * back to buffered I/O, likewise we do the same for stuffed
634 * files since they are (a) small and (b) unaligned.
635 */
636 if (gfs2_is_jdata(ip))
637 goto out;
638
639 if (gfs2_is_stuffed(ip))
640 goto out;
641
642 rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
643 inode->i_sb->s_bdev,
644 iov, offset, nr_segs,
645 gfs2_get_block_direct, NULL);
646out:
647 gfs2_glock_dq_m(1, &gh);
648 gfs2_holder_uninit(&gh);
649 if (rw == READ)
650 mutex_unlock(&inode->i_mutex);
651
652 return rv;
653}
654
655/**
656 * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
657 * @bh: the buffer we're stuck on
658 *
659 */
660
661static void stuck_releasepage(struct buffer_head *bh)
662{
663 struct inode *inode = bh->b_page->mapping->host;
664 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
665 struct gfs2_bufdata *bd = bh->b_private;
666 struct gfs2_glock *gl;
667static unsigned limit = 0;
668
669 if (limit > 3)
670 return;
671 limit++;
672
673 fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
674 fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
675 (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
676 fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
677 fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
678
679 if (!bd)
680 return;
681
682 gl = bd->bd_gl;
683
684 fs_warn(sdp, "gl = (%u, %llu)\n",
685 gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
686
687 fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
688 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
689 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
690
691 if (gl->gl_ops == &gfs2_inode_glops) {
692 struct gfs2_inode *ip = gl->gl_object;
693 unsigned int x;
694
695 if (!ip)
696 return;
697
698 fs_warn(sdp, "ip = %llu %llu\n",
699 (unsigned long long)ip->i_num.no_formal_ino,
700 (unsigned long long)ip->i_num.no_addr);
701
702 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
703 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
704 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
705 }
706}
707
708/**
709 * gfs2_releasepage - free the metadata associated with a page
710 * @page: the page that's being released
711 * @gfp_mask: passed from Linux VFS, ignored by us
712 *
713 * Call try_to_free_buffers() if the buffers in this page can be
714 * released.
715 *
716 * Returns: 0
717 */
718
719int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
720{
721 struct inode *aspace = page->mapping->host;
722 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
723 struct buffer_head *bh, *head;
724 struct gfs2_bufdata *bd;
725 unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ;
726
727 if (!page_has_buffers(page))
728 goto out;
729
730 head = bh = page_buffers(page);
731 do {
732 while (atomic_read(&bh->b_count)) {
733 if (!atomic_read(&aspace->i_writecount))
734 return 0;
735
736 if (time_after_eq(jiffies, t)) {
737 stuck_releasepage(bh);
738 /* should we withdraw here? */
739 return 0;
740 }
741
742 yield();
743 }
744
745 gfs2_assert_warn(sdp, !buffer_pinned(bh));
746 gfs2_assert_warn(sdp, !buffer_dirty(bh));
747
748 gfs2_log_lock(sdp);
749 bd = bh->b_private;
750 if (bd) {
751 gfs2_assert_warn(sdp, bd->bd_bh == bh);
752 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
753 gfs2_assert_warn(sdp, !bd->bd_ail);
754 bd->bd_bh = NULL;
755 if (!list_empty(&bd->bd_le.le_list))
756 bd = NULL;
757 bh->b_private = NULL;
758 }
759 gfs2_log_unlock(sdp);
760 if (bd)
761 kmem_cache_free(gfs2_bufdata_cachep, bd);
762
763 bh = bh->b_this_page;
764 } while (bh != head);
765
766out:
767 return try_to_free_buffers(page);
768}
769
770const struct address_space_operations gfs2_file_aops = {
771 .writepage = gfs2_writepage,
772 .readpage = gfs2_readpage,
773 .readpages = gfs2_readpages,
774 .sync_page = block_sync_page,
775 .prepare_write = gfs2_prepare_write,
776 .commit_write = gfs2_commit_write,
777 .bmap = gfs2_bmap,
778 .invalidatepage = gfs2_invalidatepage,
779 .releasepage = gfs2_releasepage,
780 .direct_IO = gfs2_direct_IO,
781};
782
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
new file mode 100644
index 000000000000..35aaee4aa7e1
--- /dev/null
+++ b/fs/gfs2/ops_address.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_ADDRESS_DOT_H__
11#define __OPS_ADDRESS_DOT_H__
12
13#include <linux/fs.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16
17extern const struct address_space_operations gfs2_file_aops;
18extern int gfs2_get_block(struct inode *inode, sector_t lblock,
19 struct buffer_head *bh_result, int create);
20extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
21
22#endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
new file mode 100644
index 000000000000..00041b1b8025
--- /dev/null
+++ b/fs/gfs2/ops_dentry.c
@@ -0,0 +1,119 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/smp_lock.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/crc32.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "dir.h"
23#include "glock.h"
24#include "ops_dentry.h"
25#include "util.h"
26
27/**
28 * gfs2_drevalidate - Check directory lookup consistency
29 * @dentry: the mapping to check
30 * @nd:
31 *
32 * Check to make sure the lookup necessary to arrive at this inode from its
33 * parent is still good.
34 *
35 * Returns: 1 if the dentry is ok, 0 if it isn't
36 */
37
38static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
39{
40 struct dentry *parent = dget_parent(dentry);
41 struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode);
42 struct gfs2_inode *dip = GFS2_I(parent->d_inode);
43 struct inode *inode = dentry->d_inode;
44 struct gfs2_holder d_gh;
45 struct gfs2_inode *ip;
46 struct gfs2_inum inum;
47 unsigned int type;
48 int error;
49
50 if (inode && is_bad_inode(inode))
51 goto invalid;
52
53 if (sdp->sd_args.ar_localcaching)
54 goto valid;
55
56 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
57 if (error)
58 goto fail;
59
60 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
61 switch (error) {
62 case 0:
63 if (!inode)
64 goto invalid_gunlock;
65 break;
66 case -ENOENT:
67 if (!inode)
68 goto valid_gunlock;
69 goto invalid_gunlock;
70 default:
71 goto fail_gunlock;
72 }
73
74 ip = GFS2_I(inode);
75
76 if (!gfs2_inum_equal(&ip->i_num, &inum))
77 goto invalid_gunlock;
78
79 if (IF2DT(ip->i_di.di_mode) != type) {
80 gfs2_consist_inode(dip);
81 goto fail_gunlock;
82 }
83
84valid_gunlock:
85 gfs2_glock_dq_uninit(&d_gh);
86valid:
87 dput(parent);
88 return 1;
89
90invalid_gunlock:
91 gfs2_glock_dq_uninit(&d_gh);
92invalid:
93 if (inode && S_ISDIR(inode->i_mode)) {
94 if (have_submounts(dentry))
95 goto valid;
96 shrink_dcache_parent(dentry);
97 }
98 d_drop(dentry);
99 dput(parent);
100 return 0;
101
102fail_gunlock:
103 gfs2_glock_dq_uninit(&d_gh);
104fail:
105 dput(parent);
106 return 0;
107}
108
109static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
110{
111 str->hash = gfs2_disk_hash(str->name, str->len);
112 return 0;
113}
114
115struct dentry_operations gfs2_dops = {
116 .d_revalidate = gfs2_drevalidate,
117 .d_hash = gfs2_dhash,
118};
119
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h
new file mode 100644
index 000000000000..5caa3db4d3f5
--- /dev/null
+++ b/fs/gfs2/ops_dentry.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_DENTRY_DOT_H__
11#define __OPS_DENTRY_DOT_H__
12
13#include <linux/dcache.h>
14
15extern struct dentry_operations gfs2_dops;
16
17#endif /* __OPS_DENTRY_DOT_H__ */
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
new file mode 100644
index 000000000000..86127d93bd35
--- /dev/null
+++ b/fs/gfs2/ops_export.c
@@ -0,0 +1,298 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "dir.h"
22#include "glock.h"
23#include "glops.h"
24#include "inode.h"
25#include "ops_export.h"
26#include "rgrp.h"
27#include "util.h"
28
29static struct dentry *gfs2_decode_fh(struct super_block *sb,
30 __u32 *fh,
31 int fh_len,
32 int fh_type,
33 int (*acceptable)(void *context,
34 struct dentry *dentry),
35 void *context)
36{
37 struct gfs2_fh_obj fh_obj;
38 struct gfs2_inum *this, parent;
39
40 if (fh_type != fh_len)
41 return NULL;
42
43 this = &fh_obj.this;
44 fh_obj.imode = DT_UNKNOWN;
45 memset(&parent, 0, sizeof(struct gfs2_inum));
46
47 switch (fh_type) {
48 case GFS2_LARGE_FH_SIZE:
49 parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
50 parent.no_formal_ino |= be32_to_cpu(fh[5]);
51 parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
52 parent.no_addr |= be32_to_cpu(fh[7]);
53 fh_obj.imode = be32_to_cpu(fh[8]);
54 case GFS2_SMALL_FH_SIZE:
55 this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
56 this->no_formal_ino |= be32_to_cpu(fh[1]);
57 this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
58 this->no_addr |= be32_to_cpu(fh[3]);
59 break;
60 default:
61 return NULL;
62 }
63
64 return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
65 acceptable, context);
66}
67
68static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
69 int connectable)
70{
71 struct inode *inode = dentry->d_inode;
72 struct super_block *sb = inode->i_sb;
73 struct gfs2_inode *ip = GFS2_I(inode);
74
75 if (*len < GFS2_SMALL_FH_SIZE ||
76 (connectable && *len < GFS2_LARGE_FH_SIZE))
77 return 255;
78
79 fh[0] = ip->i_num.no_formal_ino >> 32;
80 fh[0] = cpu_to_be32(fh[0]);
81 fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
82 fh[1] = cpu_to_be32(fh[1]);
83 fh[2] = ip->i_num.no_addr >> 32;
84 fh[2] = cpu_to_be32(fh[2]);
85 fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
86 fh[3] = cpu_to_be32(fh[3]);
87 *len = GFS2_SMALL_FH_SIZE;
88
89 if (!connectable || inode == sb->s_root->d_inode)
90 return *len;
91
92 spin_lock(&dentry->d_lock);
93 inode = dentry->d_parent->d_inode;
94 ip = GFS2_I(inode);
95 igrab(inode);
96 spin_unlock(&dentry->d_lock);
97
98 fh[4] = ip->i_num.no_formal_ino >> 32;
99 fh[4] = cpu_to_be32(fh[4]);
100 fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
101 fh[5] = cpu_to_be32(fh[5]);
102 fh[6] = ip->i_num.no_addr >> 32;
103 fh[6] = cpu_to_be32(fh[6]);
104 fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
105 fh[7] = cpu_to_be32(fh[7]);
106
107 fh[8] = cpu_to_be32(inode->i_mode);
108 fh[9] = 0; /* pad to double word */
109 *len = GFS2_LARGE_FH_SIZE;
110
111 iput(inode);
112
113 return *len;
114}
115
116struct get_name_filldir {
117 struct gfs2_inum inum;
118 char *name;
119};
120
121static int get_name_filldir(void *opaque, const char *name, unsigned int length,
122 u64 offset, struct gfs2_inum *inum,
123 unsigned int type)
124{
125 struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
126
127 if (!gfs2_inum_equal(inum, &gnfd->inum))
128 return 0;
129
130 memcpy(gnfd->name, name, length);
131 gnfd->name[length] = 0;
132
133 return 1;
134}
135
136static int gfs2_get_name(struct dentry *parent, char *name,
137 struct dentry *child)
138{
139 struct inode *dir = parent->d_inode;
140 struct inode *inode = child->d_inode;
141 struct gfs2_inode *dip, *ip;
142 struct get_name_filldir gnfd;
143 struct gfs2_holder gh;
144 u64 offset = 0;
145 int error;
146
147 if (!dir)
148 return -EINVAL;
149
150 if (!S_ISDIR(dir->i_mode) || !inode)
151 return -EINVAL;
152
153 dip = GFS2_I(dir);
154 ip = GFS2_I(inode);
155
156 *name = 0;
157 gnfd.inum = ip->i_num;
158 gnfd.name = name;
159
160 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
161 if (error)
162 return error;
163
164 error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir);
165
166 gfs2_glock_dq_uninit(&gh);
167
168 if (!error && !*name)
169 error = -ENOENT;
170
171 return error;
172}
173
174static struct dentry *gfs2_get_parent(struct dentry *child)
175{
176 struct qstr dotdot;
177 struct inode *inode;
178 struct dentry *dentry;
179
180 gfs2_str2qstr(&dotdot, "..");
181 inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL);
182
183 if (!inode)
184 return ERR_PTR(-ENOENT);
185 /*
186 * In case of an error, @inode carries the error value, and we
187 * have to return that as a(n invalid) pointer to dentry.
188 */
189 if (IS_ERR(inode))
190 return ERR_PTR(PTR_ERR(inode));
191
192 dentry = d_alloc_anon(inode);
193 if (!dentry) {
194 iput(inode);
195 return ERR_PTR(-ENOMEM);
196 }
197
198 return dentry;
199}
200
201static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
202{
203 struct gfs2_sbd *sdp = sb->s_fs_info;
204 struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
205 struct gfs2_inum *inum = &fh_obj->this;
206 struct gfs2_holder i_gh, ri_gh, rgd_gh;
207 struct gfs2_rgrpd *rgd;
208 struct inode *inode;
209 struct dentry *dentry;
210 int error;
211
212 /* System files? */
213
214 inode = gfs2_ilookup(sb, inum);
215 if (inode) {
216 if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
217 iput(inode);
218 return ERR_PTR(-ESTALE);
219 }
220 goto out_inode;
221 }
222
223 error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
224 LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
225 &i_gh);
226 if (error)
227 return ERR_PTR(error);
228
229 error = gfs2_rindex_hold(sdp, &ri_gh);
230 if (error)
231 goto fail;
232
233 error = -EINVAL;
234 rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
235 if (!rgd)
236 goto fail_rindex;
237
238 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
239 if (error)
240 goto fail_rindex;
241
242 error = -ESTALE;
243 if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
244 goto fail_rgd;
245
246 gfs2_glock_dq_uninit(&rgd_gh);
247 gfs2_glock_dq_uninit(&ri_gh);
248
249 inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
250 if (!inode)
251 goto fail;
252 if (IS_ERR(inode)) {
253 error = PTR_ERR(inode);
254 goto fail;
255 }
256
257 error = gfs2_inode_refresh(GFS2_I(inode));
258 if (error) {
259 iput(inode);
260 goto fail;
261 }
262
263 error = -EIO;
264 if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
265 iput(inode);
266 goto fail;
267 }
268
269 gfs2_glock_dq_uninit(&i_gh);
270
271out_inode:
272 dentry = d_alloc_anon(inode);
273 if (!dentry) {
274 iput(inode);
275 return ERR_PTR(-ENOMEM);
276 }
277
278 return dentry;
279
280fail_rgd:
281 gfs2_glock_dq_uninit(&rgd_gh);
282
283fail_rindex:
284 gfs2_glock_dq_uninit(&ri_gh);
285
286fail:
287 gfs2_glock_dq_uninit(&i_gh);
288 return ERR_PTR(error);
289}
290
291struct export_operations gfs2_export_ops = {
292 .decode_fh = gfs2_decode_fh,
293 .encode_fh = gfs2_encode_fh,
294 .get_name = gfs2_get_name,
295 .get_parent = gfs2_get_parent,
296 .get_dentry = gfs2_get_dentry,
297};
298
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
new file mode 100644
index 000000000000..09aca5046fb1
--- /dev/null
+++ b/fs/gfs2/ops_export.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_EXPORT_DOT_H__
11#define __OPS_EXPORT_DOT_H__
12
13#define GFS2_SMALL_FH_SIZE 4
14#define GFS2_LARGE_FH_SIZE 10
15
16extern struct export_operations gfs2_export_ops;
17struct gfs2_fh_obj {
18 struct gfs2_inum this;
19 __u32 imode;
20};
21
22#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
new file mode 100644
index 000000000000..a9ac1358ce21
--- /dev/null
+++ b/fs/gfs2/ops_file.c
@@ -0,0 +1,642 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/uio.h>
17#include <linux/blkdev.h>
18#include <linux/mm.h>
19#include <linux/smp_lock.h>
20#include <linux/fs.h>
21#include <linux/gfs2_ondisk.h>
22#include <linux/ext2_fs.h>
23#include <linux/crc32.h>
24#include <linux/iflags.h>
25#include <linux/lm_interface.h>
26#include <asm/uaccess.h>
27
28#include "gfs2.h"
29#include "incore.h"
30#include "bmap.h"
31#include "dir.h"
32#include "glock.h"
33#include "glops.h"
34#include "inode.h"
35#include "lm.h"
36#include "log.h"
37#include "meta_io.h"
38#include "ops_file.h"
39#include "ops_vm.h"
40#include "quota.h"
41#include "rgrp.h"
42#include "trans.h"
43#include "util.h"
44#include "eaops.h"
45
46/* For regular, non-NFS */
47struct filldir_reg {
48 struct gfs2_sbd *fdr_sbd;
49 int fdr_prefetch;
50
51 filldir_t fdr_filldir;
52 void *fdr_opaque;
53};
54
55/*
56 * Most fields left uninitialised to catch anybody who tries to
57 * use them. f_flags set to prevent file_accessed() from touching
58 * any other part of this. Its use is purely as a flag so that we
59 * know (in readpage()) whether or not do to locking.
60 */
61struct file gfs2_internal_file_sentinel = {
62 .f_flags = O_NOATIME|O_RDONLY,
63};
64
65static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
66 unsigned long offset, unsigned long size)
67{
68 char *kaddr;
69 unsigned long count = desc->count;
70
71 if (size > count)
72 size = count;
73
74 kaddr = kmap(page);
75 memcpy(desc->arg.buf, kaddr + offset, size);
76 kunmap(page);
77
78 desc->count = count - size;
79 desc->written += size;
80 desc->arg.buf += size;
81 return size;
82}
83
84int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
85 char *buf, loff_t *pos, unsigned size)
86{
87 struct inode *inode = &ip->i_inode;
88 read_descriptor_t desc;
89 desc.written = 0;
90 desc.arg.buf = buf;
91 desc.count = size;
92 desc.error = 0;
93 do_generic_mapping_read(inode->i_mapping, ra_state,
94 &gfs2_internal_file_sentinel, pos, &desc,
95 gfs2_read_actor);
96 return desc.written ? desc.written : desc.error;
97}
98
99/**
100 * gfs2_llseek - seek to a location in a file
101 * @file: the file
102 * @offset: the offset
103 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
104 *
105 * SEEK_END requires the glock for the file because it references the
106 * file's size.
107 *
108 * Returns: The new offset, or errno
109 */
110
111static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
112{
113 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
114 struct gfs2_holder i_gh;
115 loff_t error;
116
117 if (origin == 2) {
118 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
119 &i_gh);
120 if (!error) {
121 error = remote_llseek(file, offset, origin);
122 gfs2_glock_dq_uninit(&i_gh);
123 }
124 } else
125 error = remote_llseek(file, offset, origin);
126
127 return error;
128}
129
130/**
131 * filldir_func - Report a directory entry to the caller of gfs2_dir_read()
132 * @opaque: opaque data used by the function
133 * @name: the name of the directory entry
134 * @length: the length of the name
135 * @offset: the entry's offset in the directory
136 * @inum: the inode number the entry points to
137 * @type: the type of inode the entry points to
138 *
139 * Returns: 0 on success, 1 if buffer full
140 */
141
142static int filldir_func(void *opaque, const char *name, unsigned int length,
143 u64 offset, struct gfs2_inum *inum,
144 unsigned int type)
145{
146 struct filldir_reg *fdr = (struct filldir_reg *)opaque;
147 struct gfs2_sbd *sdp = fdr->fdr_sbd;
148 int error;
149
150 error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
151 inum->no_addr, type);
152 if (error)
153 return 1;
154
155 if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
156 gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops,
157 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
158 gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops,
159 LM_ST_SHARED, LM_FLAG_TRY);
160 }
161
162 return 0;
163}
164
165/**
166 * gfs2_readdir - Read directory entries from a directory
167 * @file: The directory to read from
168 * @dirent: Buffer for dirents
169 * @filldir: Function used to do the copying
170 *
171 * Returns: errno
172 */
173
174static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
175{
176 struct inode *dir = file->f_mapping->host;
177 struct gfs2_inode *dip = GFS2_I(dir);
178 struct filldir_reg fdr;
179 struct gfs2_holder d_gh;
180 u64 offset = file->f_pos;
181 int error;
182
183 fdr.fdr_sbd = GFS2_SB(dir);
184 fdr.fdr_prefetch = 1;
185 fdr.fdr_filldir = filldir;
186 fdr.fdr_opaque = dirent;
187
188 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
189 error = gfs2_glock_nq_atime(&d_gh);
190 if (error) {
191 gfs2_holder_uninit(&d_gh);
192 return error;
193 }
194
195 error = gfs2_dir_read(dir, &offset, &fdr, filldir_func);
196
197 gfs2_glock_dq_uninit(&d_gh);
198
199 file->f_pos = offset;
200
201 return error;
202}
203
204
205static const u32 iflags_to_gfs2[32] = {
206 [iflag_Sync] = GFS2_DIF_SYNC,
207 [iflag_Immutable] = GFS2_DIF_IMMUTABLE,
208 [iflag_Append] = GFS2_DIF_APPENDONLY,
209 [iflag_NoAtime] = GFS2_DIF_NOATIME,
210 [iflag_Index] = GFS2_DIF_EXHASH,
211 [iflag_JournalData] = GFS2_DIF_JDATA,
212 [iflag_DirectIO] = GFS2_DIF_DIRECTIO,
213};
214
215static const u32 gfs2_to_iflags[32] = {
216 [gfs2fl_Sync] = IFLAG_SYNC,
217 [gfs2fl_Immutable] = IFLAG_IMMUTABLE,
218 [gfs2fl_AppendOnly] = IFLAG_APPEND,
219 [gfs2fl_NoAtime] = IFLAG_NOATIME,
220 [gfs2fl_ExHash] = IFLAG_INDEX,
221 [gfs2fl_Jdata] = IFLAG_JOURNAL_DATA,
222 [gfs2fl_Directio] = IFLAG_DIRECTIO,
223 [gfs2fl_InheritDirectio] = IFLAG_DIRECTIO,
224 [gfs2fl_InheritJdata] = IFLAG_JOURNAL_DATA,
225};
226
227static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
228{
229 struct inode *inode = filp->f_dentry->d_inode;
230 struct gfs2_inode *ip = GFS2_I(inode);
231 struct gfs2_holder gh;
232 int error;
233 u32 iflags;
234
235 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
236 error = gfs2_glock_nq_m_atime(1, &gh);
237 if (error)
238 return error;
239
240 iflags = iflags_cvt(gfs2_to_iflags, ip->i_di.di_flags);
241 if (put_user(iflags, ptr))
242 error = -EFAULT;
243
244 gfs2_glock_dq_m(1, &gh);
245 gfs2_holder_uninit(&gh);
246 return error;
247}
248
249/* Flags that can be set by user space */
250#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \
251 GFS2_DIF_DIRECTIO| \
252 GFS2_DIF_IMMUTABLE| \
253 GFS2_DIF_APPENDONLY| \
254 GFS2_DIF_NOATIME| \
255 GFS2_DIF_SYNC| \
256 GFS2_DIF_SYSTEM| \
257 GFS2_DIF_INHERIT_DIRECTIO| \
258 GFS2_DIF_INHERIT_JDATA)
259
260/**
261 * gfs2_set_flags - set flags on an inode
262 * @inode: The inode
263 * @flags: The flags to set
264 * @mask: Indicates which flags are valid
265 *
266 */
267static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
268{
269 struct inode *inode = filp->f_dentry->d_inode;
270 struct gfs2_inode *ip = GFS2_I(inode);
271 struct gfs2_sbd *sdp = GFS2_SB(inode);
272 struct buffer_head *bh;
273 struct gfs2_holder gh;
274 int error;
275 u32 new_flags, flags;
276
277 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
278 if (error)
279 return error;
280
281 flags = ip->i_di.di_flags;
282 new_flags = (flags & ~mask) | (reqflags & mask);
283 if ((new_flags ^ flags) == 0)
284 goto out;
285
286 if (S_ISDIR(inode->i_mode)) {
287 if ((new_flags ^ flags) & GFS2_DIF_JDATA)
288 new_flags ^= (GFS2_DIF_JDATA|GFS2_DIF_INHERIT_JDATA);
289 if ((new_flags ^ flags) & GFS2_DIF_DIRECTIO)
290 new_flags ^= (GFS2_DIF_DIRECTIO|GFS2_DIF_INHERIT_DIRECTIO);
291 }
292
293 error = -EINVAL;
294 if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
295 goto out;
296
297 error = -EPERM;
298 if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
299 goto out;
300 if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
301 goto out;
302 if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
303 !capable(CAP_LINUX_IMMUTABLE))
304 goto out;
305 if (!IS_IMMUTABLE(inode)) {
306 error = permission(inode, MAY_WRITE, NULL);
307 if (error)
308 goto out;
309 }
310
311 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
312 if (error)
313 goto out;
314 error = gfs2_meta_inode_buffer(ip, &bh);
315 if (error)
316 goto out_trans_end;
317 gfs2_trans_add_bh(ip->i_gl, bh, 1);
318 ip->i_di.di_flags = new_flags;
319 gfs2_dinode_out(&ip->i_di, bh->b_data);
320 brelse(bh);
321out_trans_end:
322 gfs2_trans_end(sdp);
323out:
324 gfs2_glock_dq_uninit(&gh);
325 return error;
326}
327
328static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
329{
330 u32 iflags, gfsflags;
331 if (get_user(iflags, ptr))
332 return -EFAULT;
333 gfsflags = iflags_cvt(iflags_to_gfs2, iflags);
334 return do_gfs2_set_flags(filp, gfsflags, ~0);
335}
336
337static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
338{
339 switch(cmd) {
340 case IFLAGS_GET_IOC:
341 return gfs2_get_flags(filp, (u32 __user *)arg);
342 case IFLAGS_SET_IOC:
343 return gfs2_set_flags(filp, (u32 __user *)arg);
344 }
345 return -ENOTTY;
346}
347
348
349/**
350 * gfs2_mmap -
351 * @file: The file to map
352 * @vma: The VMA which described the mapping
353 *
354 * Returns: 0 or error code
355 */
356
357static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
358{
359 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
360 struct gfs2_holder i_gh;
361 int error;
362
363 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
364 error = gfs2_glock_nq_atime(&i_gh);
365 if (error) {
366 gfs2_holder_uninit(&i_gh);
367 return error;
368 }
369
370 /* This is VM_MAYWRITE instead of VM_WRITE because a call
371 to mprotect() can turn on VM_WRITE later. */
372
373 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
374 (VM_MAYSHARE | VM_MAYWRITE))
375 vma->vm_ops = &gfs2_vm_ops_sharewrite;
376 else
377 vma->vm_ops = &gfs2_vm_ops_private;
378
379 gfs2_glock_dq_uninit(&i_gh);
380
381 return error;
382}
383
384/**
385 * gfs2_open - open a file
386 * @inode: the inode to open
387 * @file: the struct file for this opening
388 *
389 * Returns: errno
390 */
391
392static int gfs2_open(struct inode *inode, struct file *file)
393{
394 struct gfs2_inode *ip = GFS2_I(inode);
395 struct gfs2_holder i_gh;
396 struct gfs2_file *fp;
397 int error;
398
399 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
400 if (!fp)
401 return -ENOMEM;
402
403 mutex_init(&fp->f_fl_mutex);
404
405 gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
406 file->private_data = fp;
407
408 if (S_ISREG(ip->i_di.di_mode)) {
409 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
410 &i_gh);
411 if (error)
412 goto fail;
413
414 if (!(file->f_flags & O_LARGEFILE) &&
415 ip->i_di.di_size > MAX_NON_LFS) {
416 error = -EFBIG;
417 goto fail_gunlock;
418 }
419
420 /* Listen to the Direct I/O flag */
421
422 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
423 file->f_flags |= O_DIRECT;
424
425 gfs2_glock_dq_uninit(&i_gh);
426 }
427
428 return 0;
429
430fail_gunlock:
431 gfs2_glock_dq_uninit(&i_gh);
432fail:
433 file->private_data = NULL;
434 kfree(fp);
435 return error;
436}
437
438/**
439 * gfs2_close - called to close a struct file
440 * @inode: the inode the struct file belongs to
441 * @file: the struct file being closed
442 *
443 * Returns: errno
444 */
445
446static int gfs2_close(struct inode *inode, struct file *file)
447{
448 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
449 struct gfs2_file *fp;
450
451 fp = file->private_data;
452 file->private_data = NULL;
453
454 if (gfs2_assert_warn(sdp, fp))
455 return -EIO;
456
457 kfree(fp);
458
459 return 0;
460}
461
462/**
463 * gfs2_fsync - sync the dirty data for a file (across the cluster)
464 * @file: the file that points to the dentry (we ignore this)
465 * @dentry: the dentry that points to the inode to sync
466 *
467 * Returns: errno
468 */
469
470static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
471{
472 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
473
474 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
475
476 return 0;
477}
478
479/**
480 * gfs2_lock - acquire/release a posix lock on a file
481 * @file: the file pointer
482 * @cmd: either modify or retrieve lock state, possibly wait
483 * @fl: type and range of lock
484 *
485 * Returns: errno
486 */
487
488static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
489{
490 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
491 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
492 struct lm_lockname name =
493 { .ln_number = ip->i_num.no_addr,
494 .ln_type = LM_TYPE_PLOCK };
495
496 if (!(fl->fl_flags & FL_POSIX))
497 return -ENOLCK;
498 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
499 return -ENOLCK;
500
501 if (sdp->sd_args.ar_localflocks) {
502 if (IS_GETLK(cmd)) {
503 struct file_lock tmp;
504 int ret;
505 ret = posix_test_lock(file, fl, &tmp);
506 fl->fl_type = F_UNLCK;
507 if (ret)
508 memcpy(fl, &tmp, sizeof(struct file_lock));
509 return 0;
510 } else {
511 return posix_lock_file_wait(file, fl);
512 }
513 }
514
515 if (IS_GETLK(cmd))
516 return gfs2_lm_plock_get(sdp, &name, file, fl);
517 else if (fl->fl_type == F_UNLCK)
518 return gfs2_lm_punlock(sdp, &name, file, fl);
519 else
520 return gfs2_lm_plock(sdp, &name, file, cmd, fl);
521}
522
523static int do_flock(struct file *file, int cmd, struct file_lock *fl)
524{
525 struct gfs2_file *fp = file->private_data;
526 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
527 struct gfs2_inode *ip = GFS2_I(file->f_dentry->d_inode);
528 struct gfs2_glock *gl;
529 unsigned int state;
530 int flags;
531 int error = 0;
532
533 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
534 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
535
536 mutex_lock(&fp->f_fl_mutex);
537
538 gl = fl_gh->gh_gl;
539 if (gl) {
540 if (fl_gh->gh_state == state)
541 goto out;
542 gfs2_glock_hold(gl);
543 flock_lock_file_wait(file,
544 &(struct file_lock){.fl_type = F_UNLCK});
545 gfs2_glock_dq_uninit(fl_gh);
546 } else {
547 error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
548 ip->i_num.no_addr, &gfs2_flock_glops,
549 CREATE, &gl);
550 if (error)
551 goto out;
552 }
553
554 gfs2_holder_init(gl, state, flags, fl_gh);
555 gfs2_glock_put(gl);
556
557 error = gfs2_glock_nq(fl_gh);
558 if (error) {
559 gfs2_holder_uninit(fl_gh);
560 if (error == GLR_TRYFAILED)
561 error = -EAGAIN;
562 } else {
563 error = flock_lock_file_wait(file, fl);
564 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
565 }
566
567out:
568 mutex_unlock(&fp->f_fl_mutex);
569 return error;
570}
571
572static void do_unflock(struct file *file, struct file_lock *fl)
573{
574 struct gfs2_file *fp = file->private_data;
575 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
576
577 mutex_lock(&fp->f_fl_mutex);
578 flock_lock_file_wait(file, fl);
579 if (fl_gh->gh_gl)
580 gfs2_glock_dq_uninit(fl_gh);
581 mutex_unlock(&fp->f_fl_mutex);
582}
583
584/**
585 * gfs2_flock - acquire/release a flock lock on a file
586 * @file: the file pointer
587 * @cmd: either modify or retrieve lock state, possibly wait
588 * @fl: type and range of lock
589 *
590 * Returns: errno
591 */
592
593static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
594{
595 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
596 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
597
598 if (!(fl->fl_flags & FL_FLOCK))
599 return -ENOLCK;
600 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
601 return -ENOLCK;
602
603 if (sdp->sd_args.ar_localflocks)
604 return flock_lock_file_wait(file, fl);
605
606 if (fl->fl_type == F_UNLCK) {
607 do_unflock(file, fl);
608 return 0;
609 } else
610 return do_flock(file, cmd, fl);
611}
612
613const struct file_operations gfs2_file_fops = {
614 .llseek = gfs2_llseek,
615 .read = generic_file_read,
616 .readv = generic_file_readv,
617 .aio_read = generic_file_aio_read,
618 .write = generic_file_write,
619 .writev = generic_file_writev,
620 .aio_write = generic_file_aio_write,
621 .unlocked_ioctl = gfs2_ioctl,
622 .mmap = gfs2_mmap,
623 .open = gfs2_open,
624 .release = gfs2_close,
625 .fsync = gfs2_fsync,
626 .lock = gfs2_lock,
627 .sendfile = generic_file_sendfile,
628 .flock = gfs2_flock,
629 .splice_read = generic_file_splice_read,
630 .splice_write = generic_file_splice_write,
631};
632
633const struct file_operations gfs2_dir_fops = {
634 .readdir = gfs2_readdir,
635 .unlocked_ioctl = gfs2_ioctl,
636 .open = gfs2_open,
637 .release = gfs2_close,
638 .fsync = gfs2_fsync,
639 .lock = gfs2_lock,
640 .flock = gfs2_flock,
641};
642
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
new file mode 100644
index 000000000000..ce319f89ec8e
--- /dev/null
+++ b/fs/gfs2/ops_file.h
@@ -0,0 +1,24 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_FILE_DOT_H__
11#define __OPS_FILE_DOT_H__
12
13#include <linux/fs.h>
14struct gfs2_inode;
15
16extern struct file gfs2_internal_file_sentinel;
17extern int gfs2_internal_read(struct gfs2_inode *ip,
18 struct file_ra_state *ra_state,
19 char *buf, loff_t *pos, unsigned size);
20
21extern const struct file_operations gfs2_file_fops;
22extern const struct file_operations gfs2_dir_fops;
23
24#endif /* __OPS_FILE_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
new file mode 100644
index 000000000000..a9aa2edd756f
--- /dev/null
+++ b/fs/gfs2/ops_fstype.c
@@ -0,0 +1,943 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/blkdev.h>
16#include <linux/kthread.h>
17#include <linux/namei.h>
18#include <linux/mount.h>
19#include <linux/gfs2_ondisk.h>
20#include <linux/lm_interface.h>
21
22#include "gfs2.h"
23#include "incore.h"
24#include "daemon.h"
25#include "glock.h"
26#include "glops.h"
27#include "inode.h"
28#include "lm.h"
29#include "mount.h"
30#include "ops_export.h"
31#include "ops_fstype.h"
32#include "ops_super.h"
33#include "recovery.h"
34#include "rgrp.h"
35#include "super.h"
36#include "sys.h"
37#include "util.h"
38
39#define DO 0
40#define UNDO 1
41
42extern struct dentry_operations gfs2_dops;
43
44static struct gfs2_sbd *init_sbd(struct super_block *sb)
45{
46 struct gfs2_sbd *sdp;
47
48 sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL);
49 if (!sdp)
50 return NULL;
51
52 sb->s_fs_info = sdp;
53 sdp->sd_vfs = sb;
54
55 gfs2_tune_init(&sdp->sd_tune);
56
57 INIT_LIST_HEAD(&sdp->sd_reclaim_list);
58 spin_lock_init(&sdp->sd_reclaim_lock);
59 init_waitqueue_head(&sdp->sd_reclaim_wq);
60
61 mutex_init(&sdp->sd_inum_mutex);
62 spin_lock_init(&sdp->sd_statfs_spin);
63 mutex_init(&sdp->sd_statfs_mutex);
64
65 spin_lock_init(&sdp->sd_rindex_spin);
66 mutex_init(&sdp->sd_rindex_mutex);
67 INIT_LIST_HEAD(&sdp->sd_rindex_list);
68 INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
69 INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
70
71 INIT_LIST_HEAD(&sdp->sd_jindex_list);
72 spin_lock_init(&sdp->sd_jindex_spin);
73 mutex_init(&sdp->sd_jindex_mutex);
74
75 INIT_LIST_HEAD(&sdp->sd_quota_list);
76 spin_lock_init(&sdp->sd_quota_spin);
77 mutex_init(&sdp->sd_quota_mutex);
78
79 spin_lock_init(&sdp->sd_log_lock);
80
81 INIT_LIST_HEAD(&sdp->sd_log_le_gl);
82 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
83 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
84 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
85 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
86
87 mutex_init(&sdp->sd_log_reserve_mutex);
88 INIT_LIST_HEAD(&sdp->sd_ail1_list);
89 INIT_LIST_HEAD(&sdp->sd_ail2_list);
90
91 init_rwsem(&sdp->sd_log_flush_lock);
92 INIT_LIST_HEAD(&sdp->sd_log_flush_list);
93
94 INIT_LIST_HEAD(&sdp->sd_revoke_list);
95
96 mutex_init(&sdp->sd_freeze_lock);
97
98 return sdp;
99}
100
101static void init_vfs(struct super_block *sb, unsigned noatime)
102{
103 struct gfs2_sbd *sdp = sb->s_fs_info;
104
105 sb->s_magic = GFS2_MAGIC;
106 sb->s_op = &gfs2_super_ops;
107 sb->s_export_op = &gfs2_export_ops;
108 sb->s_maxbytes = MAX_LFS_FILESIZE;
109
110 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
111 set_bit(noatime, &sdp->sd_flags);
112
113 /* Don't let the VFS update atimes. GFS2 handles this itself. */
114 sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
115}
116
117static int init_names(struct gfs2_sbd *sdp, int silent)
118{
119 struct gfs2_sb *sb = NULL;
120 char *proto, *table;
121 int error = 0;
122
123 proto = sdp->sd_args.ar_lockproto;
124 table = sdp->sd_args.ar_locktable;
125
126 /* Try to autodetect */
127
128 if (!proto[0] || !table[0]) {
129 struct buffer_head *bh;
130 bh = sb_getblk(sdp->sd_vfs,
131 GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
132 lock_buffer(bh);
133 clear_buffer_uptodate(bh);
134 clear_buffer_dirty(bh);
135 unlock_buffer(bh);
136 ll_rw_block(READ, 1, &bh);
137 wait_on_buffer(bh);
138
139 if (!buffer_uptodate(bh)) {
140 brelse(bh);
141 return -EIO;
142 }
143
144 sb = kmalloc(sizeof(struct gfs2_sb), GFP_KERNEL);
145 if (!sb) {
146 brelse(bh);
147 return -ENOMEM;
148 }
149 gfs2_sb_in(sb, bh->b_data);
150 brelse(bh);
151
152 error = gfs2_check_sb(sdp, sb, silent);
153 if (error)
154 goto out;
155
156 if (!proto[0])
157 proto = sb->sb_lockproto;
158 if (!table[0])
159 table = sb->sb_locktable;
160 }
161
162 if (!table[0])
163 table = sdp->sd_vfs->s_id;
164
165 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
166 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
167
168out:
169 kfree(sb);
170 return error;
171}
172
173static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
174 int undo)
175{
176 struct task_struct *p;
177 int error = 0;
178
179 if (undo)
180 goto fail_trans;
181
182 p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
183 error = IS_ERR(p);
184 if (error) {
185 fs_err(sdp, "can't start scand thread: %d\n", error);
186 return error;
187 }
188 sdp->sd_scand_process = p;
189
190 for (sdp->sd_glockd_num = 0;
191 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
192 sdp->sd_glockd_num++) {
193 p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
194 error = IS_ERR(p);
195 if (error) {
196 fs_err(sdp, "can't start glockd thread: %d\n", error);
197 goto fail;
198 }
199 sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
200 }
201
202 error = gfs2_glock_nq_num(sdp,
203 GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
204 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
205 mount_gh);
206 if (error) {
207 fs_err(sdp, "can't acquire mount glock: %d\n", error);
208 goto fail;
209 }
210
211 error = gfs2_glock_nq_num(sdp,
212 GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
213 LM_ST_SHARED,
214 LM_FLAG_NOEXP | GL_EXACT,
215 &sdp->sd_live_gh);
216 if (error) {
217 fs_err(sdp, "can't acquire live glock: %d\n", error);
218 goto fail_mount;
219 }
220
221 error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
222 CREATE, &sdp->sd_rename_gl);
223 if (error) {
224 fs_err(sdp, "can't create rename glock: %d\n", error);
225 goto fail_live;
226 }
227
228 error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
229 CREATE, &sdp->sd_trans_gl);
230 if (error) {
231 fs_err(sdp, "can't create transaction glock: %d\n", error);
232 goto fail_rename;
233 }
234 set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
235
236 return 0;
237
238fail_trans:
239 gfs2_glock_put(sdp->sd_trans_gl);
240fail_rename:
241 gfs2_glock_put(sdp->sd_rename_gl);
242fail_live:
243 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
244fail_mount:
245 gfs2_glock_dq_uninit(mount_gh);
246fail:
247 while (sdp->sd_glockd_num--)
248 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
249
250 kthread_stop(sdp->sd_scand_process);
251 return error;
252}
253
254static struct inode *gfs2_lookup_root(struct super_block *sb,
255 struct gfs2_inum *inum)
256{
257 return gfs2_inode_lookup(sb, inum, DT_DIR);
258}
259
260static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
261{
262 struct super_block *sb = sdp->sd_vfs;
263 struct gfs2_holder sb_gh;
264 struct gfs2_inum *inum;
265 struct inode *inode;
266 int error = 0;
267
268 if (undo) {
269 if (sb->s_root) {
270 dput(sb->s_root);
271 sb->s_root = NULL;
272 }
273 return 0;
274 }
275
276 error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
277 LM_ST_SHARED, 0, &sb_gh);
278 if (error) {
279 fs_err(sdp, "can't acquire superblock glock: %d\n", error);
280 return error;
281 }
282
283 error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
284 if (error) {
285 fs_err(sdp, "can't read superblock: %d\n", error);
286 goto out;
287 }
288
289 /* Set up the buffer cache and SB for real */
290 if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
291 error = -EINVAL;
292 fs_err(sdp, "FS block size (%u) is too small for device "
293 "block size (%u)\n",
294 sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
295 goto out;
296 }
297 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
298 error = -EINVAL;
299 fs_err(sdp, "FS block size (%u) is too big for machine "
300 "page size (%u)\n",
301 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
302 goto out;
303 }
304 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
305
306 /* Get the root inode */
307 inum = &sdp->sd_sb.sb_root_dir;
308 if (sb->s_type == &gfs2meta_fs_type)
309 inum = &sdp->sd_sb.sb_master_dir;
310 inode = gfs2_lookup_root(sb, inum);
311 if (IS_ERR(inode)) {
312 error = PTR_ERR(inode);
313 fs_err(sdp, "can't read in root inode: %d\n", error);
314 goto out;
315 }
316
317 sb->s_root = d_alloc_root(inode);
318 if (!sb->s_root) {
319 fs_err(sdp, "can't get root dentry\n");
320 error = -ENOMEM;
321 iput(inode);
322 }
323 sb->s_root->d_op = &gfs2_dops;
324out:
325 gfs2_glock_dq_uninit(&sb_gh);
326 return error;
327}
328
329static int init_journal(struct gfs2_sbd *sdp, int undo)
330{
331 struct gfs2_holder ji_gh;
332 struct task_struct *p;
333 struct gfs2_inode *ip;
334 int jindex = 1;
335 int error = 0;
336
337 if (undo) {
338 jindex = 0;
339 goto fail_recoverd;
340 }
341
342 sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex");
343 if (IS_ERR(sdp->sd_jindex)) {
344 fs_err(sdp, "can't lookup journal index: %d\n", error);
345 return PTR_ERR(sdp->sd_jindex);
346 }
347 ip = GFS2_I(sdp->sd_jindex);
348 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
349
350 /* Load in the journal index special file */
351
352 error = gfs2_jindex_hold(sdp, &ji_gh);
353 if (error) {
354 fs_err(sdp, "can't read journal index: %d\n", error);
355 goto fail;
356 }
357
358 error = -EINVAL;
359 if (!gfs2_jindex_size(sdp)) {
360 fs_err(sdp, "no journals!\n");
361 goto fail_jindex;
362 }
363
364 if (sdp->sd_args.ar_spectator) {
365 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
366 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
367 } else {
368 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
369 fs_err(sdp, "can't mount journal #%u\n",
370 sdp->sd_lockstruct.ls_jid);
371 fs_err(sdp, "there are only %u journals (0 - %u)\n",
372 gfs2_jindex_size(sdp),
373 gfs2_jindex_size(sdp) - 1);
374 goto fail_jindex;
375 }
376 sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
377
378 error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
379 &gfs2_journal_glops,
380 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
381 &sdp->sd_journal_gh);
382 if (error) {
383 fs_err(sdp, "can't acquire journal glock: %d\n", error);
384 goto fail_jindex;
385 }
386
387 ip = GFS2_I(sdp->sd_jdesc->jd_inode);
388 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
389 LM_FLAG_NOEXP | GL_EXACT,
390 &sdp->sd_jinode_gh);
391 if (error) {
392 fs_err(sdp, "can't acquire journal inode glock: %d\n",
393 error);
394 goto fail_journal_gh;
395 }
396
397 error = gfs2_jdesc_check(sdp->sd_jdesc);
398 if (error) {
399 fs_err(sdp, "my journal (%u) is bad: %d\n",
400 sdp->sd_jdesc->jd_jid, error);
401 goto fail_jinode_gh;
402 }
403 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
404 }
405
406 if (sdp->sd_lockstruct.ls_first) {
407 unsigned int x;
408 for (x = 0; x < sdp->sd_journals; x++) {
409 error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x));
410 if (error) {
411 fs_err(sdp, "error recovering journal %u: %d\n",
412 x, error);
413 goto fail_jinode_gh;
414 }
415 }
416
417 gfs2_lm_others_may_mount(sdp);
418 } else if (!sdp->sd_args.ar_spectator) {
419 error = gfs2_recover_journal(sdp->sd_jdesc);
420 if (error) {
421 fs_err(sdp, "error recovering my journal: %d\n", error);
422 goto fail_jinode_gh;
423 }
424 }
425
426 set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
427 gfs2_glock_dq_uninit(&ji_gh);
428 jindex = 0;
429
430 p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
431 error = IS_ERR(p);
432 if (error) {
433 fs_err(sdp, "can't start recoverd thread: %d\n", error);
434 goto fail_jinode_gh;
435 }
436 sdp->sd_recoverd_process = p;
437
438 return 0;
439
440fail_recoverd:
441 kthread_stop(sdp->sd_recoverd_process);
442fail_jinode_gh:
443 if (!sdp->sd_args.ar_spectator)
444 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
445fail_journal_gh:
446 if (!sdp->sd_args.ar_spectator)
447 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
448fail_jindex:
449 gfs2_jindex_free(sdp);
450 if (jindex)
451 gfs2_glock_dq_uninit(&ji_gh);
452fail:
453 iput(sdp->sd_jindex);
454 return error;
455}
456
457
458static int init_inodes(struct gfs2_sbd *sdp, int undo)
459{
460 int error = 0;
461 struct gfs2_inode *ip;
462 struct inode *inode;
463
464 if (undo)
465 goto fail_qinode;
466
467 inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
468 if (IS_ERR(inode)) {
469 error = PTR_ERR(inode);
470 fs_err(sdp, "can't read in master directory: %d\n", error);
471 goto fail;
472 }
473 sdp->sd_master_dir = inode;
474
475 error = init_journal(sdp, undo);
476 if (error)
477 goto fail_master;
478
479 /* Read in the master inode number inode */
480 sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum");
481 if (IS_ERR(sdp->sd_inum_inode)) {
482 error = PTR_ERR(sdp->sd_inum_inode);
483 fs_err(sdp, "can't read in inum inode: %d\n", error);
484 goto fail_journal;
485 }
486
487
488 /* Read in the master statfs inode */
489 sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs");
490 if (IS_ERR(sdp->sd_statfs_inode)) {
491 error = PTR_ERR(sdp->sd_statfs_inode);
492 fs_err(sdp, "can't read in statfs inode: %d\n", error);
493 goto fail_inum;
494 }
495
496 /* Read in the resource index inode */
497 sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex");
498 if (IS_ERR(sdp->sd_rindex)) {
499 error = PTR_ERR(sdp->sd_rindex);
500 fs_err(sdp, "can't get resource index inode: %d\n", error);
501 goto fail_statfs;
502 }
503 ip = GFS2_I(sdp->sd_rindex);
504 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
505 sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1;
506
507 /* Read in the quota inode */
508 sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
509 if (IS_ERR(sdp->sd_quota_inode)) {
510 error = PTR_ERR(sdp->sd_quota_inode);
511 fs_err(sdp, "can't get quota file inode: %d\n", error);
512 goto fail_rindex;
513 }
514 return 0;
515
516fail_qinode:
517 iput(sdp->sd_quota_inode);
518fail_rindex:
519 gfs2_clear_rgrpd(sdp);
520 iput(sdp->sd_rindex);
521fail_statfs:
522 iput(sdp->sd_statfs_inode);
523fail_inum:
524 iput(sdp->sd_inum_inode);
525fail_journal:
526 init_journal(sdp, UNDO);
527fail_master:
528 iput(sdp->sd_master_dir);
529fail:
530 return error;
531}
532
533static int init_per_node(struct gfs2_sbd *sdp, int undo)
534{
535 struct inode *pn = NULL;
536 char buf[30];
537 int error = 0;
538 struct gfs2_inode *ip;
539
540 if (sdp->sd_args.ar_spectator)
541 return 0;
542
543 if (undo)
544 goto fail_qc_gh;
545
546 pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node");
547 if (IS_ERR(pn)) {
548 error = PTR_ERR(pn);
549 fs_err(sdp, "can't find per_node directory: %d\n", error);
550 return error;
551 }
552
553 sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
554 sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf);
555 if (IS_ERR(sdp->sd_ir_inode)) {
556 error = PTR_ERR(sdp->sd_ir_inode);
557 fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
558 goto fail;
559 }
560
561 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
562 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
563 if (IS_ERR(sdp->sd_sc_inode)) {
564 error = PTR_ERR(sdp->sd_sc_inode);
565 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
566 goto fail_ir_i;
567 }
568
569 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
570 sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
571 if (IS_ERR(sdp->sd_qc_inode)) {
572 error = PTR_ERR(sdp->sd_qc_inode);
573 fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
574 goto fail_ut_i;
575 }
576
577 iput(pn);
578 pn = NULL;
579
580 ip = GFS2_I(sdp->sd_ir_inode);
581 error = gfs2_glock_nq_init(ip->i_gl,
582 LM_ST_EXCLUSIVE, 0,
583 &sdp->sd_ir_gh);
584 if (error) {
585 fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
586 goto fail_qc_i;
587 }
588
589 ip = GFS2_I(sdp->sd_sc_inode);
590 error = gfs2_glock_nq_init(ip->i_gl,
591 LM_ST_EXCLUSIVE, 0,
592 &sdp->sd_sc_gh);
593 if (error) {
594 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
595 goto fail_ir_gh;
596 }
597
598 ip = GFS2_I(sdp->sd_qc_inode);
599 error = gfs2_glock_nq_init(ip->i_gl,
600 LM_ST_EXCLUSIVE, 0,
601 &sdp->sd_qc_gh);
602 if (error) {
603 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
604 goto fail_ut_gh;
605 }
606
607 return 0;
608
609fail_qc_gh:
610 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
611fail_ut_gh:
612 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
613fail_ir_gh:
614 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
615fail_qc_i:
616 iput(sdp->sd_qc_inode);
617fail_ut_i:
618 iput(sdp->sd_sc_inode);
619fail_ir_i:
620 iput(sdp->sd_ir_inode);
621fail:
622 if (pn)
623 iput(pn);
624 return error;
625}
626
627static int init_threads(struct gfs2_sbd *sdp, int undo)
628{
629 struct task_struct *p;
630 int error = 0;
631
632 if (undo)
633 goto fail_quotad;
634
635 sdp->sd_log_flush_time = jiffies;
636 sdp->sd_jindex_refresh_time = jiffies;
637
638 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
639 error = IS_ERR(p);
640 if (error) {
641 fs_err(sdp, "can't start logd thread: %d\n", error);
642 return error;
643 }
644 sdp->sd_logd_process = p;
645
646 sdp->sd_statfs_sync_time = jiffies;
647 sdp->sd_quota_sync_time = jiffies;
648
649 p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
650 error = IS_ERR(p);
651 if (error) {
652 fs_err(sdp, "can't start quotad thread: %d\n", error);
653 goto fail;
654 }
655 sdp->sd_quotad_process = p;
656
657 return 0;
658
659
660fail_quotad:
661 kthread_stop(sdp->sd_quotad_process);
662fail:
663 kthread_stop(sdp->sd_logd_process);
664 return error;
665}
666
667/**
668 * fill_super - Read in superblock
669 * @sb: The VFS superblock
670 * @data: Mount options
671 * @silent: Don't complain if it's not a GFS2 filesystem
672 *
673 * Returns: errno
674 */
675
676static int fill_super(struct super_block *sb, void *data, int silent)
677{
678 struct gfs2_sbd *sdp;
679 struct gfs2_holder mount_gh;
680 int error;
681
682 sdp = init_sbd(sb);
683 if (!sdp) {
684 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
685 return -ENOMEM;
686 }
687
688 error = gfs2_mount_args(sdp, (char *)data, 0);
689 if (error) {
690 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
691 goto fail;
692 }
693
694 init_vfs(sb, SDF_NOATIME);
695
696 /* Set up the buffer cache and fill in some fake block size values
697 to allow us to read-in the on-disk superblock. */
698 sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
699 sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
700 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
701 GFS2_BASIC_BLOCK_SHIFT;
702 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
703
704 error = init_names(sdp, silent);
705 if (error)
706 goto fail;
707
708 error = gfs2_sys_fs_add(sdp);
709 if (error)
710 goto fail;
711
712 error = gfs2_lm_mount(sdp, silent);
713 if (error)
714 goto fail_sys;
715
716 error = init_locking(sdp, &mount_gh, DO);
717 if (error)
718 goto fail_lm;
719
720 error = init_sb(sdp, silent, DO);
721 if (error)
722 goto fail_locking;
723
724 error = init_inodes(sdp, DO);
725 if (error)
726 goto fail_sb;
727
728 error = init_per_node(sdp, DO);
729 if (error)
730 goto fail_inodes;
731
732 error = gfs2_statfs_init(sdp);
733 if (error) {
734 fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
735 goto fail_per_node;
736 }
737
738 error = init_threads(sdp, DO);
739 if (error)
740 goto fail_per_node;
741
742 if (!(sb->s_flags & MS_RDONLY)) {
743 error = gfs2_make_fs_rw(sdp);
744 if (error) {
745 fs_err(sdp, "can't make FS RW: %d\n", error);
746 goto fail_threads;
747 }
748 }
749
750 gfs2_glock_dq_uninit(&mount_gh);
751
752 return 0;
753
754fail_threads:
755 init_threads(sdp, UNDO);
756fail_per_node:
757 init_per_node(sdp, UNDO);
758fail_inodes:
759 init_inodes(sdp, UNDO);
760fail_sb:
761 init_sb(sdp, 0, UNDO);
762fail_locking:
763 init_locking(sdp, &mount_gh, UNDO);
764fail_lm:
765 gfs2_gl_hash_clear(sdp, WAIT);
766 gfs2_lm_unmount(sdp);
767 while (invalidate_inodes(sb))
768 yield();
769fail_sys:
770 gfs2_sys_fs_del(sdp);
771fail:
772 kfree(sdp);
773 sb->s_fs_info = NULL;
774 return error;
775}
776
777static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
778 const char *dev_name, void *data, struct vfsmount *mnt)
779{
780 struct super_block *sb;
781 struct gfs2_sbd *sdp;
782 int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
783 if (error)
784 goto out;
785 sb = mnt->mnt_sb;
786 sdp = sb->s_fs_info;
787 sdp->sd_gfs2mnt = mnt;
788out:
789 return error;
790}
791
792static int fill_super_meta(struct super_block *sb, struct super_block *new,
793 void *data, int silent)
794{
795 struct gfs2_sbd *sdp = sb->s_fs_info;
796 struct inode *inode;
797 int error = 0;
798
799 new->s_fs_info = sdp;
800 sdp->sd_vfs_meta = sb;
801
802 init_vfs(new, SDF_NOATIME);
803
804 /* Get the master inode */
805 inode = igrab(sdp->sd_master_dir);
806
807 new->s_root = d_alloc_root(inode);
808 if (!new->s_root) {
809 fs_err(sdp, "can't get root dentry\n");
810 error = -ENOMEM;
811 iput(inode);
812 }
813 new->s_root->d_op = &gfs2_dops;
814
815 return error;
816}
817
818static int set_bdev_super(struct super_block *s, void *data)
819{
820 s->s_bdev = data;
821 s->s_dev = s->s_bdev->bd_dev;
822 return 0;
823}
824
825static int test_bdev_super(struct super_block *s, void *data)
826{
827 return s->s_bdev == data;
828}
829
830static struct super_block* get_gfs2_sb(const char *dev_name)
831{
832 struct kstat stat;
833 struct nameidata nd;
834 struct file_system_type *fstype;
835 struct super_block *sb = NULL, *s;
836 struct list_head *l;
837 int error;
838
839 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
840 if (error) {
841 printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n",
842 dev_name);
843 goto out;
844 }
845 error = vfs_getattr(nd.mnt, nd.dentry, &stat);
846
847 fstype = get_fs_type("gfs2");
848 list_for_each(l, &fstype->fs_supers) {
849 s = list_entry(l, struct super_block, s_instances);
850 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
851 (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) {
852 sb = s;
853 goto free_nd;
854 }
855 }
856
857 printk(KERN_WARNING "GFS2: Unrecognized block device or "
858 "mount point %s", dev_name);
859
860free_nd:
861 path_release(&nd);
862out:
863 return sb;
864}
865
866static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
867 const char *dev_name, void *data, struct vfsmount *mnt)
868{
869 int error = 0;
870 struct super_block *sb = NULL, *new;
871 struct gfs2_sbd *sdp;
872 char *gfs2mnt = NULL;
873
874 sb = get_gfs2_sb(dev_name);
875 if (!sb) {
876 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
877 error = -ENOENT;
878 goto error;
879 }
880 sdp = (struct gfs2_sbd*) sb->s_fs_info;
881 if (sdp->sd_vfs_meta) {
882 printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
883 error = -EBUSY;
884 goto error;
885 }
886 mutex_lock(&sb->s_bdev->bd_mount_mutex);
887 new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
888 mutex_unlock(&sb->s_bdev->bd_mount_mutex);
889 if (IS_ERR(new)) {
890 error = PTR_ERR(new);
891 goto error;
892 }
893 module_put(fs_type->owner);
894 new->s_flags = flags;
895 strlcpy(new->s_id, sb->s_id, sizeof(new->s_id));
896 sb_set_blocksize(new, sb->s_blocksize);
897 error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0);
898 if (error) {
899 up_write(&new->s_umount);
900 deactivate_super(new);
901 goto error;
902 }
903
904 new->s_flags |= MS_ACTIVE;
905
906 /* Grab a reference to the gfs2 mount point */
907 atomic_inc(&sdp->sd_gfs2mnt->mnt_count);
908 return simple_set_mnt(mnt, new);
909error:
910 if (gfs2mnt)
911 kfree(gfs2mnt);
912 return error;
913}
914
915static void gfs2_kill_sb(struct super_block *sb)
916{
917 kill_block_super(sb);
918}
919
920static void gfs2_kill_sb_meta(struct super_block *sb)
921{
922 struct gfs2_sbd *sdp = sb->s_fs_info;
923 generic_shutdown_super(sb);
924 sdp->sd_vfs_meta = NULL;
925 atomic_dec(&sdp->sd_gfs2mnt->mnt_count);
926}
927
928struct file_system_type gfs2_fs_type = {
929 .name = "gfs2",
930 .fs_flags = FS_REQUIRES_DEV,
931 .get_sb = gfs2_get_sb,
932 .kill_sb = gfs2_kill_sb,
933 .owner = THIS_MODULE,
934};
935
936struct file_system_type gfs2meta_fs_type = {
937 .name = "gfs2meta",
938 .fs_flags = FS_REQUIRES_DEV,
939 .get_sb = gfs2_get_sb_meta,
940 .kill_sb = gfs2_kill_sb_meta,
941 .owner = THIS_MODULE,
942};
943
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
new file mode 100644
index 000000000000..7cc2c296271b
--- /dev/null
+++ b/fs/gfs2/ops_fstype.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_FSTYPE_DOT_H__
11#define __OPS_FSTYPE_DOT_H__
12
13#include <linux/fs.h>
14
15extern struct file_system_type gfs2_fs_type;
16extern struct file_system_type gfs2meta_fs_type;
17
18#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
new file mode 100644
index 000000000000..57af0192b241
--- /dev/null
+++ b/fs/gfs2/ops_inode.c
@@ -0,0 +1,1151 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/namei.h>
16#include <linux/utsname.h>
17#include <linux/mm.h>
18#include <linux/xattr.h>
19#include <linux/posix_acl.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h>
22#include <linux/lm_interface.h>
23#include <asm/uaccess.h>
24
25#include "gfs2.h"
26#include "incore.h"
27#include "acl.h"
28#include "bmap.h"
29#include "dir.h"
30#include "eaops.h"
31#include "eattr.h"
32#include "glock.h"
33#include "inode.h"
34#include "meta_io.h"
35#include "ops_dentry.h"
36#include "ops_inode.h"
37#include "quota.h"
38#include "rgrp.h"
39#include "trans.h"
40#include "util.h"
41
42/**
43 * gfs2_create - Create a file
44 * @dir: The directory in which to create the file
45 * @dentry: The dentry of the new file
46 * @mode: The mode of the new file
47 *
48 * Returns: errno
49 */
50
51static int gfs2_create(struct inode *dir, struct dentry *dentry,
52 int mode, struct nameidata *nd)
53{
54 struct gfs2_inode *dip = GFS2_I(dir);
55 struct gfs2_sbd *sdp = GFS2_SB(dir);
56 struct gfs2_holder ghs[2];
57 struct inode *inode;
58
59 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
60
61 for (;;) {
62 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
63 if (!IS_ERR(inode)) {
64 gfs2_trans_end(sdp);
65 if (dip->i_alloc.al_rgd)
66 gfs2_inplace_release(dip);
67 gfs2_quota_unlock(dip);
68 gfs2_alloc_put(dip);
69 gfs2_glock_dq_uninit_m(2, ghs);
70 mark_inode_dirty(inode);
71 break;
72 } else if (PTR_ERR(inode) != -EEXIST ||
73 (nd->intent.open.flags & O_EXCL)) {
74 gfs2_holder_uninit(ghs);
75 return PTR_ERR(inode);
76 }
77
78 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
79 if (inode) {
80 if (!IS_ERR(inode)) {
81 gfs2_holder_uninit(ghs);
82 break;
83 } else {
84 gfs2_holder_uninit(ghs);
85 return PTR_ERR(inode);
86 }
87 }
88 }
89
90 d_instantiate(dentry, inode);
91
92 return 0;
93}
94
95/**
96 * gfs2_lookup - Look up a filename in a directory and return its inode
97 * @dir: The directory inode
98 * @dentry: The dentry of the new inode
99 * @nd: passed from Linux VFS, ignored by us
100 *
101 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
102 *
103 * Returns: errno
104 */
105
106static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
107 struct nameidata *nd)
108{
109 struct inode *inode = NULL;
110
111 dentry->d_op = &gfs2_dops;
112
113 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
114 if (inode && IS_ERR(inode))
115 return ERR_PTR(PTR_ERR(inode));
116
117 if (inode)
118 return d_splice_alias(inode, dentry);
119 d_add(dentry, inode);
120
121 return NULL;
122}
123
124/**
125 * gfs2_link - Link to a file
126 * @old_dentry: The inode to link
127 * @dir: Add link to this directory
128 * @dentry: The name of the link
129 *
130 * Link the inode in "old_dentry" into the directory "dir" with the
131 * name in "dentry".
132 *
133 * Returns: errno
134 */
135
136static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
137 struct dentry *dentry)
138{
139 struct gfs2_inode *dip = GFS2_I(dir);
140 struct gfs2_sbd *sdp = GFS2_SB(dir);
141 struct inode *inode = old_dentry->d_inode;
142 struct gfs2_inode *ip = GFS2_I(inode);
143 struct gfs2_holder ghs[2];
144 int alloc_required;
145 int error;
146
147 if (S_ISDIR(ip->i_di.di_mode))
148 return -EPERM;
149
150 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
151 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
152
153 error = gfs2_glock_nq_m(2, ghs);
154 if (error)
155 goto out;
156
157 error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
158 if (error)
159 goto out_gunlock;
160
161 error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
162 switch (error) {
163 case -ENOENT:
164 break;
165 case 0:
166 error = -EEXIST;
167 default:
168 goto out_gunlock;
169 }
170
171 error = -EINVAL;
172 if (!dip->i_di.di_nlink)
173 goto out_gunlock;
174 error = -EFBIG;
175 if (dip->i_di.di_entries == (u32)-1)
176 goto out_gunlock;
177 error = -EPERM;
178 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
179 goto out_gunlock;
180 error = -EINVAL;
181 if (!ip->i_di.di_nlink)
182 goto out_gunlock;
183 error = -EMLINK;
184 if (ip->i_di.di_nlink == (u32)-1)
185 goto out_gunlock;
186
187 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
188 if (error < 0)
189 goto out_gunlock;
190 error = 0;
191
192 if (alloc_required) {
193 struct gfs2_alloc *al = gfs2_alloc_get(dip);
194
195 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
196 if (error)
197 goto out_alloc;
198
199 error = gfs2_quota_check(dip, dip->i_di.di_uid,
200 dip->i_di.di_gid);
201 if (error)
202 goto out_gunlock_q;
203
204 al->al_requested = sdp->sd_max_dirres;
205
206 error = gfs2_inplace_reserve(dip);
207 if (error)
208 goto out_gunlock_q;
209
210 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
211 al->al_rgd->rd_ri.ri_length +
212 2 * RES_DINODE + RES_STATFS +
213 RES_QUOTA, 0);
214 if (error)
215 goto out_ipres;
216 } else {
217 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
218 if (error)
219 goto out_ipres;
220 }
221
222 error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
223 IF2DT(ip->i_di.di_mode));
224 if (error)
225 goto out_end_trans;
226
227 error = gfs2_change_nlink(ip, +1);
228
229out_end_trans:
230 gfs2_trans_end(sdp);
231out_ipres:
232 if (alloc_required)
233 gfs2_inplace_release(dip);
234out_gunlock_q:
235 if (alloc_required)
236 gfs2_quota_unlock(dip);
237out_alloc:
238 if (alloc_required)
239 gfs2_alloc_put(dip);
240out_gunlock:
241 gfs2_glock_dq_m(2, ghs);
242out:
243 gfs2_holder_uninit(ghs);
244 gfs2_holder_uninit(ghs + 1);
245 if (!error) {
246 atomic_inc(&inode->i_count);
247 d_instantiate(dentry, inode);
248 mark_inode_dirty(inode);
249 }
250 return error;
251}
252
253/**
254 * gfs2_unlink - Unlink a file
255 * @dir: The inode of the directory containing the file to unlink
256 * @dentry: The file itself
257 *
258 * Unlink a file. Call gfs2_unlinki()
259 *
260 * Returns: errno
261 */
262
263static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
264{
265 struct gfs2_inode *dip = GFS2_I(dir);
266 struct gfs2_sbd *sdp = GFS2_SB(dir);
267 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
268 struct gfs2_holder ghs[2];
269 int error;
270
271 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
272 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
273
274 error = gfs2_glock_nq_m(2, ghs);
275 if (error)
276 goto out;
277
278 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
279 if (error)
280 goto out_gunlock;
281
282 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
283 if (error)
284 goto out_gunlock;
285
286 error = gfs2_dir_del(dip, &dentry->d_name);
287 if (error)
288 goto out_end_trans;
289
290 error = gfs2_change_nlink(ip, -1);
291
292out_end_trans:
293 gfs2_trans_end(sdp);
294out_gunlock:
295 gfs2_glock_dq_m(2, ghs);
296out:
297 gfs2_holder_uninit(ghs);
298 gfs2_holder_uninit(ghs + 1);
299 return error;
300}
301
302/**
303 * gfs2_symlink - Create a symlink
304 * @dir: The directory to create the symlink in
305 * @dentry: The dentry to put the symlink in
306 * @symname: The thing which the link points to
307 *
308 * Returns: errno
309 */
310
311static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
312 const char *symname)
313{
314 struct gfs2_inode *dip = GFS2_I(dir), *ip;
315 struct gfs2_sbd *sdp = GFS2_SB(dir);
316 struct gfs2_holder ghs[2];
317 struct inode *inode;
318 struct buffer_head *dibh;
319 int size;
320 int error;
321
322 /* Must be stuffed with a null terminator for gfs2_follow_link() */
323 size = strlen(symname);
324 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
325 return -ENAMETOOLONG;
326
327 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
328
329 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
330 if (IS_ERR(inode)) {
331 gfs2_holder_uninit(ghs);
332 return PTR_ERR(inode);
333 }
334
335 ip = ghs[1].gh_gl->gl_object;
336
337 ip->i_di.di_size = size;
338
339 error = gfs2_meta_inode_buffer(ip, &dibh);
340
341 if (!gfs2_assert_withdraw(sdp, !error)) {
342 gfs2_dinode_out(&ip->i_di, dibh->b_data);
343 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
344 size);
345 brelse(dibh);
346 }
347
348 gfs2_trans_end(sdp);
349 if (dip->i_alloc.al_rgd)
350 gfs2_inplace_release(dip);
351 gfs2_quota_unlock(dip);
352 gfs2_alloc_put(dip);
353
354 gfs2_glock_dq_uninit_m(2, ghs);
355
356 d_instantiate(dentry, inode);
357 mark_inode_dirty(inode);
358
359 return 0;
360}
361
362/**
363 * gfs2_mkdir - Make a directory
364 * @dir: The parent directory of the new one
365 * @dentry: The dentry of the new directory
366 * @mode: The mode of the new directory
367 *
368 * Returns: errno
369 */
370
371static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
372{
373 struct gfs2_inode *dip = GFS2_I(dir), *ip;
374 struct gfs2_sbd *sdp = GFS2_SB(dir);
375 struct gfs2_holder ghs[2];
376 struct inode *inode;
377 struct buffer_head *dibh;
378 int error;
379
380 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
381
382 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
383 if (IS_ERR(inode)) {
384 gfs2_holder_uninit(ghs);
385 return PTR_ERR(inode);
386 }
387
388 ip = ghs[1].gh_gl->gl_object;
389
390 ip->i_di.di_nlink = 2;
391 ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
392 ip->i_di.di_flags |= GFS2_DIF_JDATA;
393 ip->i_di.di_payload_format = GFS2_FORMAT_DE;
394 ip->i_di.di_entries = 2;
395
396 error = gfs2_meta_inode_buffer(ip, &dibh);
397
398 if (!gfs2_assert_withdraw(sdp, !error)) {
399 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
400 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
401 struct qstr str;
402
403 gfs2_str2qstr(&str, ".");
404 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
405 gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent);
406 dent->de_inum = di->di_num; /* already GFS2 endian */
407 dent->de_type = DT_DIR;
408 di->di_entries = cpu_to_be32(1);
409
410 gfs2_str2qstr(&str, "..");
411 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
412 gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
413
414 gfs2_inum_out(&dip->i_num, (char *) &dent->de_inum);
415 dent->de_type = DT_DIR;
416
417 gfs2_dinode_out(&ip->i_di, (char *)di);
418
419 brelse(dibh);
420 }
421
422 error = gfs2_change_nlink(dip, +1);
423 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
424
425 gfs2_trans_end(sdp);
426 if (dip->i_alloc.al_rgd)
427 gfs2_inplace_release(dip);
428 gfs2_quota_unlock(dip);
429 gfs2_alloc_put(dip);
430
431 gfs2_glock_dq_uninit_m(2, ghs);
432
433 d_instantiate(dentry, inode);
434 mark_inode_dirty(inode);
435
436 return 0;
437}
438
439/**
440 * gfs2_rmdir - Remove a directory
441 * @dir: The parent directory of the directory to be removed
442 * @dentry: The dentry of the directory to remove
443 *
444 * Remove a directory. Call gfs2_rmdiri()
445 *
446 * Returns: errno
447 */
448
449static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
450{
451 struct gfs2_inode *dip = GFS2_I(dir);
452 struct gfs2_sbd *sdp = GFS2_SB(dir);
453 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
454 struct gfs2_holder ghs[2];
455 int error;
456
457 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
458 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
459
460 error = gfs2_glock_nq_m(2, ghs);
461 if (error)
462 goto out;
463
464 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
465 if (error)
466 goto out_gunlock;
467
468 if (ip->i_di.di_entries < 2) {
469 if (gfs2_consist_inode(ip))
470 gfs2_dinode_print(&ip->i_di);
471 error = -EIO;
472 goto out_gunlock;
473 }
474 if (ip->i_di.di_entries > 2) {
475 error = -ENOTEMPTY;
476 goto out_gunlock;
477 }
478
479 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
480 if (error)
481 goto out_gunlock;
482
483 error = gfs2_rmdiri(dip, &dentry->d_name, ip);
484
485 gfs2_trans_end(sdp);
486
487out_gunlock:
488 gfs2_glock_dq_m(2, ghs);
489out:
490 gfs2_holder_uninit(ghs);
491 gfs2_holder_uninit(ghs + 1);
492 return error;
493}
494
495/**
496 * gfs2_mknod - Make a special file
497 * @dir: The directory in which the special file will reside
498 * @dentry: The dentry of the special file
499 * @mode: The mode of the special file
500 * @rdev: The device specification of the special file
501 *
502 */
503
504static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
505 dev_t dev)
506{
507 struct gfs2_inode *dip = GFS2_I(dir), *ip;
508 struct gfs2_sbd *sdp = GFS2_SB(dir);
509 struct gfs2_holder ghs[2];
510 struct inode *inode;
511 struct buffer_head *dibh;
512 u32 major = 0, minor = 0;
513 int error;
514
515 switch (mode & S_IFMT) {
516 case S_IFBLK:
517 case S_IFCHR:
518 major = MAJOR(dev);
519 minor = MINOR(dev);
520 break;
521 case S_IFIFO:
522 case S_IFSOCK:
523 break;
524 default:
525 return -EOPNOTSUPP;
526 };
527
528 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
529
530 inode = gfs2_createi(ghs, &dentry->d_name, mode);
531 if (IS_ERR(inode)) {
532 gfs2_holder_uninit(ghs);
533 return PTR_ERR(inode);
534 }
535
536 ip = ghs[1].gh_gl->gl_object;
537
538 ip->i_di.di_major = major;
539 ip->i_di.di_minor = minor;
540
541 error = gfs2_meta_inode_buffer(ip, &dibh);
542
543 if (!gfs2_assert_withdraw(sdp, !error)) {
544 gfs2_dinode_out(&ip->i_di, dibh->b_data);
545 brelse(dibh);
546 }
547
548 gfs2_trans_end(sdp);
549 if (dip->i_alloc.al_rgd)
550 gfs2_inplace_release(dip);
551 gfs2_quota_unlock(dip);
552 gfs2_alloc_put(dip);
553
554 gfs2_glock_dq_uninit_m(2, ghs);
555
556 d_instantiate(dentry, inode);
557 mark_inode_dirty(inode);
558
559 return 0;
560}
561
562/**
563 * gfs2_rename - Rename a file
564 * @odir: Parent directory of old file name
565 * @odentry: The old dentry of the file
566 * @ndir: Parent directory of new file name
567 * @ndentry: The new dentry of the file
568 *
569 * Returns: errno
570 */
571
572static int gfs2_rename(struct inode *odir, struct dentry *odentry,
573 struct inode *ndir, struct dentry *ndentry)
574{
575 struct gfs2_inode *odip = GFS2_I(odir);
576 struct gfs2_inode *ndip = GFS2_I(ndir);
577 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
578 struct gfs2_inode *nip = NULL;
579 struct gfs2_sbd *sdp = GFS2_SB(odir);
580 struct gfs2_holder ghs[4], r_gh;
581 unsigned int num_gh;
582 int dir_rename = 0;
583 int alloc_required;
584 unsigned int x;
585 int error;
586
587 if (ndentry->d_inode) {
588 nip = GFS2_I(ndentry->d_inode);
589 if (ip == nip)
590 return 0;
591 }
592
593 /* Make sure we aren't trying to move a dirctory into it's subdir */
594
595 if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
596 dir_rename = 1;
597
598 error = gfs2_glock_nq_init(sdp->sd_rename_gl,
599 LM_ST_EXCLUSIVE, 0,
600 &r_gh);
601 if (error)
602 goto out;
603
604 error = gfs2_ok_to_move(ip, ndip);
605 if (error)
606 goto out_gunlock_r;
607 }
608
609 num_gh = 1;
610 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
611 if (odip != ndip) {
612 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
613 num_gh++;
614 }
615 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
616 num_gh++;
617
618 if (nip) {
619 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
620 num_gh++;
621 }
622
623 error = gfs2_glock_nq_m(num_gh, ghs);
624 if (error)
625 goto out_uninit;
626
627 /* Check out the old directory */
628
629 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
630 if (error)
631 goto out_gunlock;
632
633 /* Check out the new directory */
634
635 if (nip) {
636 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
637 if (error)
638 goto out_gunlock;
639
640 if (S_ISDIR(nip->i_di.di_mode)) {
641 if (nip->i_di.di_entries < 2) {
642 if (gfs2_consist_inode(nip))
643 gfs2_dinode_print(&nip->i_di);
644 error = -EIO;
645 goto out_gunlock;
646 }
647 if (nip->i_di.di_entries > 2) {
648 error = -ENOTEMPTY;
649 goto out_gunlock;
650 }
651 }
652 } else {
653 error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL);
654 if (error)
655 goto out_gunlock;
656
657 error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
658 switch (error) {
659 case -ENOENT:
660 error = 0;
661 break;
662 case 0:
663 error = -EEXIST;
664 default:
665 goto out_gunlock;
666 };
667
668 if (odip != ndip) {
669 if (!ndip->i_di.di_nlink) {
670 error = -EINVAL;
671 goto out_gunlock;
672 }
673 if (ndip->i_di.di_entries == (u32)-1) {
674 error = -EFBIG;
675 goto out_gunlock;
676 }
677 if (S_ISDIR(ip->i_di.di_mode) &&
678 ndip->i_di.di_nlink == (u32)-1) {
679 error = -EMLINK;
680 goto out_gunlock;
681 }
682 }
683 }
684
685 /* Check out the dir to be renamed */
686
687 if (dir_rename) {
688 error = permission(odentry->d_inode, MAY_WRITE, NULL);
689 if (error)
690 goto out_gunlock;
691 }
692
693 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
694 if (error < 0)
695 goto out_gunlock;
696 error = 0;
697
698 if (alloc_required) {
699 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
700
701 error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
702 if (error)
703 goto out_alloc;
704
705 error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
706 ndip->i_di.di_gid);
707 if (error)
708 goto out_gunlock_q;
709
710 al->al_requested = sdp->sd_max_dirres;
711
712 error = gfs2_inplace_reserve(ndip);
713 if (error)
714 goto out_gunlock_q;
715
716 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
717 al->al_rgd->rd_ri.ri_length +
718 4 * RES_DINODE + 4 * RES_LEAF +
719 RES_STATFS + RES_QUOTA, 0);
720 if (error)
721 goto out_ipreserv;
722 } else {
723 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
724 5 * RES_LEAF, 0);
725 if (error)
726 goto out_gunlock;
727 }
728
729 /* Remove the target file, if it exists */
730
731 if (nip) {
732 if (S_ISDIR(nip->i_di.di_mode))
733 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
734 else {
735 error = gfs2_dir_del(ndip, &ndentry->d_name);
736 if (error)
737 goto out_end_trans;
738 error = gfs2_change_nlink(nip, -1);
739 }
740 if (error)
741 goto out_end_trans;
742 }
743
744 if (dir_rename) {
745 struct qstr name;
746 gfs2_str2qstr(&name, "..");
747
748 error = gfs2_change_nlink(ndip, +1);
749 if (error)
750 goto out_end_trans;
751 error = gfs2_change_nlink(odip, -1);
752 if (error)
753 goto out_end_trans;
754
755 error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
756 if (error)
757 goto out_end_trans;
758 } else {
759 struct buffer_head *dibh;
760 error = gfs2_meta_inode_buffer(ip, &dibh);
761 if (error)
762 goto out_end_trans;
763 ip->i_di.di_ctime = get_seconds();
764 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
765 gfs2_dinode_out(&ip->i_di, dibh->b_data);
766 brelse(dibh);
767 }
768
769 error = gfs2_dir_del(odip, &odentry->d_name);
770 if (error)
771 goto out_end_trans;
772
773 error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
774 IF2DT(ip->i_di.di_mode));
775 if (error)
776 goto out_end_trans;
777
778out_end_trans:
779 gfs2_trans_end(sdp);
780out_ipreserv:
781 if (alloc_required)
782 gfs2_inplace_release(ndip);
783out_gunlock_q:
784 if (alloc_required)
785 gfs2_quota_unlock(ndip);
786out_alloc:
787 if (alloc_required)
788 gfs2_alloc_put(ndip);
789out_gunlock:
790 gfs2_glock_dq_m(num_gh, ghs);
791out_uninit:
792 for (x = 0; x < num_gh; x++)
793 gfs2_holder_uninit(ghs + x);
794out_gunlock_r:
795 if (dir_rename)
796 gfs2_glock_dq_uninit(&r_gh);
797out:
798 return error;
799}
800
801/**
802 * gfs2_readlink - Read the value of a symlink
803 * @dentry: the symlink
804 * @buf: the buffer to read the symlink data into
805 * @size: the size of the buffer
806 *
807 * Returns: errno
808 */
809
810static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
811 int user_size)
812{
813 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
814 char array[GFS2_FAST_NAME_SIZE], *buf = array;
815 unsigned int len = GFS2_FAST_NAME_SIZE;
816 int error;
817
818 error = gfs2_readlinki(ip, &buf, &len);
819 if (error)
820 return error;
821
822 if (user_size > len - 1)
823 user_size = len - 1;
824
825 if (copy_to_user(user_buf, buf, user_size))
826 error = -EFAULT;
827 else
828 error = user_size;
829
830 if (buf != array)
831 kfree(buf);
832
833 return error;
834}
835
836/**
837 * gfs2_follow_link - Follow a symbolic link
838 * @dentry: The dentry of the link
839 * @nd: Data that we pass to vfs_follow_link()
840 *
841 * This can handle symlinks of any size. It is optimised for symlinks
842 * under GFS2_FAST_NAME_SIZE.
843 *
844 * Returns: 0 on success or error code
845 */
846
847static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
848{
849 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
850 char array[GFS2_FAST_NAME_SIZE], *buf = array;
851 unsigned int len = GFS2_FAST_NAME_SIZE;
852 int error;
853
854 error = gfs2_readlinki(ip, &buf, &len);
855 if (!error) {
856 error = vfs_follow_link(nd, buf);
857 if (buf != array)
858 kfree(buf);
859 }
860
861 return ERR_PTR(error);
862}
863
864/**
865 * gfs2_permission -
866 * @inode:
867 * @mask:
868 * @nd: passed from Linux VFS, ignored by us
869 *
870 * Returns: errno
871 */
872
873static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
874{
875 struct gfs2_inode *ip = GFS2_I(inode);
876 struct gfs2_holder i_gh;
877 int error;
878
879 if (ip->i_vn == ip->i_gl->gl_vn)
880 return generic_permission(inode, mask, gfs2_check_acl);
881
882 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
883 if (!error) {
884 error = generic_permission(inode, mask, gfs2_check_acl_locked);
885 gfs2_glock_dq_uninit(&i_gh);
886 }
887
888 return error;
889}
890
891static int setattr_size(struct inode *inode, struct iattr *attr)
892{
893 struct gfs2_inode *ip = GFS2_I(inode);
894 int error;
895
896 if (attr->ia_size != ip->i_di.di_size) {
897 error = vmtruncate(inode, attr->ia_size);
898 if (error)
899 return error;
900 }
901
902 error = gfs2_truncatei(ip, attr->ia_size);
903 if (error)
904 return error;
905
906 return error;
907}
908
909static int setattr_chown(struct inode *inode, struct iattr *attr)
910{
911 struct gfs2_inode *ip = GFS2_I(inode);
912 struct gfs2_sbd *sdp = GFS2_SB(inode);
913 struct buffer_head *dibh;
914 u32 ouid, ogid, nuid, ngid;
915 int error;
916
917 ouid = ip->i_di.di_uid;
918 ogid = ip->i_di.di_gid;
919 nuid = attr->ia_uid;
920 ngid = attr->ia_gid;
921
922 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
923 ouid = nuid = NO_QUOTA_CHANGE;
924 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
925 ogid = ngid = NO_QUOTA_CHANGE;
926
927 gfs2_alloc_get(ip);
928
929 error = gfs2_quota_lock(ip, nuid, ngid);
930 if (error)
931 goto out_alloc;
932
933 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
934 error = gfs2_quota_check(ip, nuid, ngid);
935 if (error)
936 goto out_gunlock_q;
937 }
938
939 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
940 if (error)
941 goto out_gunlock_q;
942
943 error = gfs2_meta_inode_buffer(ip, &dibh);
944 if (error)
945 goto out_end_trans;
946
947 error = inode_setattr(inode, attr);
948 gfs2_assert_warn(sdp, !error);
949 gfs2_inode_attr_out(ip);
950
951 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
952 gfs2_dinode_out(&ip->i_di, dibh->b_data);
953 brelse(dibh);
954
955 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
956 gfs2_quota_change(ip, -ip->i_di.di_blocks, ouid, ogid);
957 gfs2_quota_change(ip, ip->i_di.di_blocks, nuid, ngid);
958 }
959
960out_end_trans:
961 gfs2_trans_end(sdp);
962out_gunlock_q:
963 gfs2_quota_unlock(ip);
964out_alloc:
965 gfs2_alloc_put(ip);
966 return error;
967}
968
969/**
970 * gfs2_setattr - Change attributes on an inode
971 * @dentry: The dentry which is changing
972 * @attr: The structure describing the change
973 *
974 * The VFS layer wants to change one or more of an inodes attributes. Write
975 * that change out to disk.
976 *
977 * Returns: errno
978 */
979
980static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
981{
982 struct inode *inode = dentry->d_inode;
983 struct gfs2_inode *ip = GFS2_I(inode);
984 struct gfs2_holder i_gh;
985 int error;
986
987 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
988 if (error)
989 return error;
990
991 error = -EPERM;
992 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
993 goto out;
994
995 error = inode_change_ok(inode, attr);
996 if (error)
997 goto out;
998
999 if (attr->ia_valid & ATTR_SIZE)
1000 error = setattr_size(inode, attr);
1001 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1002 error = setattr_chown(inode, attr);
1003 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1004 error = gfs2_acl_chmod(ip, attr);
1005 else
1006 error = gfs2_setattr_simple(ip, attr);
1007
1008out:
1009 gfs2_glock_dq_uninit(&i_gh);
1010 if (!error)
1011 mark_inode_dirty(inode);
1012 return error;
1013}
1014
1015/**
1016 * gfs2_getattr - Read out an inode's attributes
1017 * @mnt: The vfsmount the inode is being accessed from
1018 * @dentry: The dentry to stat
1019 * @stat: The inode's stats
1020 *
1021 * Returns: errno
1022 */
1023
1024static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1025 struct kstat *stat)
1026{
1027 struct inode *inode = dentry->d_inode;
1028 struct gfs2_inode *ip = GFS2_I(inode);
1029 struct gfs2_holder gh;
1030 int error;
1031
1032 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1033 if (!error) {
1034 generic_fillattr(inode, stat);
1035 gfs2_glock_dq_uninit(&gh);
1036 }
1037
1038 return error;
1039}
1040
1041static int gfs2_setxattr(struct dentry *dentry, const char *name,
1042 const void *data, size_t size, int flags)
1043{
1044 struct inode *inode = dentry->d_inode;
1045 struct gfs2_ea_request er;
1046
1047 memset(&er, 0, sizeof(struct gfs2_ea_request));
1048 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1049 if (er.er_type == GFS2_EATYPE_UNUSED)
1050 return -EOPNOTSUPP;
1051 er.er_data = (char *)data;
1052 er.er_name_len = strlen(er.er_name);
1053 er.er_data_len = size;
1054 er.er_flags = flags;
1055
1056 gfs2_assert_warn(GFS2_SB(inode), !(er.er_flags & GFS2_ERF_MODE));
1057
1058 return gfs2_ea_set(GFS2_I(inode), &er);
1059}
1060
1061static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1062 void *data, size_t size)
1063{
1064 struct gfs2_ea_request er;
1065
1066 memset(&er, 0, sizeof(struct gfs2_ea_request));
1067 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1068 if (er.er_type == GFS2_EATYPE_UNUSED)
1069 return -EOPNOTSUPP;
1070 er.er_data = data;
1071 er.er_name_len = strlen(er.er_name);
1072 er.er_data_len = size;
1073
1074 return gfs2_ea_get(GFS2_I(dentry->d_inode), &er);
1075}
1076
1077static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
1078{
1079 struct gfs2_ea_request er;
1080
1081 memset(&er, 0, sizeof(struct gfs2_ea_request));
1082 er.er_data = (size) ? buffer : NULL;
1083 er.er_data_len = size;
1084
1085 return gfs2_ea_list(GFS2_I(dentry->d_inode), &er);
1086}
1087
1088static int gfs2_removexattr(struct dentry *dentry, const char *name)
1089{
1090 struct gfs2_ea_request er;
1091
1092 memset(&er, 0, sizeof(struct gfs2_ea_request));
1093 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1094 if (er.er_type == GFS2_EATYPE_UNUSED)
1095 return -EOPNOTSUPP;
1096 er.er_name_len = strlen(er.er_name);
1097
1098 return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
1099}
1100
1101struct inode_operations gfs2_file_iops = {
1102 .permission = gfs2_permission,
1103 .setattr = gfs2_setattr,
1104 .getattr = gfs2_getattr,
1105 .setxattr = gfs2_setxattr,
1106 .getxattr = gfs2_getxattr,
1107 .listxattr = gfs2_listxattr,
1108 .removexattr = gfs2_removexattr,
1109};
1110
1111struct inode_operations gfs2_dev_iops = {
1112 .permission = gfs2_permission,
1113 .setattr = gfs2_setattr,
1114 .getattr = gfs2_getattr,
1115 .setxattr = gfs2_setxattr,
1116 .getxattr = gfs2_getxattr,
1117 .listxattr = gfs2_listxattr,
1118 .removexattr = gfs2_removexattr,
1119};
1120
1121struct inode_operations gfs2_dir_iops = {
1122 .create = gfs2_create,
1123 .lookup = gfs2_lookup,
1124 .link = gfs2_link,
1125 .unlink = gfs2_unlink,
1126 .symlink = gfs2_symlink,
1127 .mkdir = gfs2_mkdir,
1128 .rmdir = gfs2_rmdir,
1129 .mknod = gfs2_mknod,
1130 .rename = gfs2_rename,
1131 .permission = gfs2_permission,
1132 .setattr = gfs2_setattr,
1133 .getattr = gfs2_getattr,
1134 .setxattr = gfs2_setxattr,
1135 .getxattr = gfs2_getxattr,
1136 .listxattr = gfs2_listxattr,
1137 .removexattr = gfs2_removexattr,
1138};
1139
1140struct inode_operations gfs2_symlink_iops = {
1141 .readlink = gfs2_readlink,
1142 .follow_link = gfs2_follow_link,
1143 .permission = gfs2_permission,
1144 .setattr = gfs2_setattr,
1145 .getattr = gfs2_getattr,
1146 .setxattr = gfs2_setxattr,
1147 .getxattr = gfs2_getxattr,
1148 .listxattr = gfs2_listxattr,
1149 .removexattr = gfs2_removexattr,
1150};
1151
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
new file mode 100644
index 000000000000..b15acb4fd34c
--- /dev/null
+++ b/fs/gfs2/ops_inode.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_INODE_DOT_H__
11#define __OPS_INODE_DOT_H__
12
13#include <linux/fs.h>
14
15extern struct inode_operations gfs2_file_iops;
16extern struct inode_operations gfs2_dir_iops;
17extern struct inode_operations gfs2_symlink_iops;
18extern struct inode_operations gfs2_dev_iops;
19
20#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
new file mode 100644
index 000000000000..06f06f7773d0
--- /dev/null
+++ b/fs/gfs2/ops_super.c
@@ -0,0 +1,468 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/statfs.h>
16#include <linux/seq_file.h>
17#include <linux/mount.h>
18#include <linux/kthread.h>
19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h>
22#include <linux/lm_interface.h>
23
24#include "gfs2.h"
25#include "incore.h"
26#include "glock.h"
27#include "inode.h"
28#include "lm.h"
29#include "log.h"
30#include "mount.h"
31#include "ops_super.h"
32#include "quota.h"
33#include "recovery.h"
34#include "rgrp.h"
35#include "super.h"
36#include "sys.h"
37#include "util.h"
38#include "trans.h"
39#include "dir.h"
40#include "eattr.h"
41#include "bmap.h"
42
43/**
44 * gfs2_write_inode - Make sure the inode is stable on the disk
45 * @inode: The inode
46 * @sync: synchronous write flag
47 *
48 * Returns: errno
49 */
50
51static int gfs2_write_inode(struct inode *inode, int sync)
52{
53 struct gfs2_inode *ip = GFS2_I(inode);
54
55 /* Check this is a "normal" inode */
56 if (inode->i_private) {
57 if (current->flags & PF_MEMALLOC)
58 return 0;
59 if (sync)
60 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
61 }
62
63 return 0;
64}
65
66/**
67 * gfs2_put_super - Unmount the filesystem
68 * @sb: The VFS superblock
69 *
70 */
71
72static void gfs2_put_super(struct super_block *sb)
73{
74 struct gfs2_sbd *sdp = sb->s_fs_info;
75 int error;
76
77 if (!sdp)
78 return;
79
80 if (!strncmp(sb->s_type->name, "gfs2meta", 8))
81 return; /* Nothing to do */
82
83 /* Unfreeze the filesystem, if we need to */
84
85 mutex_lock(&sdp->sd_freeze_lock);
86 if (sdp->sd_freeze_count)
87 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
88 mutex_unlock(&sdp->sd_freeze_lock);
89
90 kthread_stop(sdp->sd_quotad_process);
91 kthread_stop(sdp->sd_logd_process);
92 kthread_stop(sdp->sd_recoverd_process);
93 while (sdp->sd_glockd_num--)
94 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
95 kthread_stop(sdp->sd_scand_process);
96
97 if (!(sb->s_flags & MS_RDONLY)) {
98 error = gfs2_make_fs_ro(sdp);
99 if (error)
100 gfs2_io_error(sdp);
101 }
102 /* At this point, we're through modifying the disk */
103
104 /* Release stuff */
105
106 iput(sdp->sd_master_dir);
107 iput(sdp->sd_jindex);
108 iput(sdp->sd_inum_inode);
109 iput(sdp->sd_statfs_inode);
110 iput(sdp->sd_rindex);
111 iput(sdp->sd_quota_inode);
112
113 gfs2_glock_put(sdp->sd_rename_gl);
114 gfs2_glock_put(sdp->sd_trans_gl);
115
116 if (!sdp->sd_args.ar_spectator) {
117 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
118 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
119 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
120 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
121 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
122 iput(sdp->sd_ir_inode);
123 iput(sdp->sd_sc_inode);
124 iput(sdp->sd_qc_inode);
125 }
126
127 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
128 gfs2_clear_rgrpd(sdp);
129 gfs2_jindex_free(sdp);
130 /* Take apart glock structures and buffer lists */
131 gfs2_gl_hash_clear(sdp, WAIT);
132 /* Unmount the locking protocol */
133 gfs2_lm_unmount(sdp);
134
135 /* At this point, we're through participating in the lockspace */
136 gfs2_sys_fs_del(sdp);
137 kfree(sdp);
138}
139
140/**
141 * gfs2_write_super - disk commit all incore transactions
142 * @sb: the filesystem
143 *
144 * This function is called every time sync(2) is called.
145 * After this exits, all dirty buffers are synced.
146 */
147
148static void gfs2_write_super(struct super_block *sb)
149{
150 gfs2_log_flush(sb->s_fs_info, NULL);
151}
152
153/**
154 * gfs2_write_super_lockfs - prevent further writes to the filesystem
155 * @sb: the VFS structure for the filesystem
156 *
157 */
158
159static void gfs2_write_super_lockfs(struct super_block *sb)
160{
161 struct gfs2_sbd *sdp = sb->s_fs_info;
162 int error;
163
164 for (;;) {
165 error = gfs2_freeze_fs(sdp);
166 if (!error)
167 break;
168
169 switch (error) {
170 case -EBUSY:
171 fs_err(sdp, "waiting for recovery before freeze\n");
172 break;
173
174 default:
175 fs_err(sdp, "error freezing FS: %d\n", error);
176 break;
177 }
178
179 fs_err(sdp, "retrying...\n");
180 msleep(1000);
181 }
182}
183
184/**
185 * gfs2_unlockfs - reallow writes to the filesystem
186 * @sb: the VFS structure for the filesystem
187 *
188 */
189
190static void gfs2_unlockfs(struct super_block *sb)
191{
192 gfs2_unfreeze_fs(sb->s_fs_info);
193}
194
195/**
196 * gfs2_statfs - Gather and return stats about the filesystem
197 * @sb: The superblock
198 * @statfsbuf: The buffer
199 *
200 * Returns: 0 on success or error code
201 */
202
203static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
204{
205 struct super_block *sb = dentry->d_inode->i_sb;
206 struct gfs2_sbd *sdp = sb->s_fs_info;
207 struct gfs2_statfs_change sc;
208 int error;
209
210 if (gfs2_tune_get(sdp, gt_statfs_slow))
211 error = gfs2_statfs_slow(sdp, &sc);
212 else
213 error = gfs2_statfs_i(sdp, &sc);
214
215 if (error)
216 return error;
217
218 buf->f_type = GFS2_MAGIC;
219 buf->f_bsize = sdp->sd_sb.sb_bsize;
220 buf->f_blocks = sc.sc_total;
221 buf->f_bfree = sc.sc_free;
222 buf->f_bavail = sc.sc_free;
223 buf->f_files = sc.sc_dinodes + sc.sc_free;
224 buf->f_ffree = sc.sc_free;
225 buf->f_namelen = GFS2_FNAMESIZE;
226
227 return 0;
228}
229
230/**
231 * gfs2_remount_fs - called when the FS is remounted
232 * @sb: the filesystem
233 * @flags: the remount flags
234 * @data: extra data passed in (not used right now)
235 *
236 * Returns: errno
237 */
238
239static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
240{
241 struct gfs2_sbd *sdp = sb->s_fs_info;
242 int error;
243
244 error = gfs2_mount_args(sdp, data, 1);
245 if (error)
246 return error;
247
248 if (sdp->sd_args.ar_spectator)
249 *flags |= MS_RDONLY;
250 else {
251 if (*flags & MS_RDONLY) {
252 if (!(sb->s_flags & MS_RDONLY))
253 error = gfs2_make_fs_ro(sdp);
254 } else if (!(*flags & MS_RDONLY) &&
255 (sb->s_flags & MS_RDONLY)) {
256 error = gfs2_make_fs_rw(sdp);
257 }
258 }
259
260 if (*flags & (MS_NOATIME | MS_NODIRATIME))
261 set_bit(SDF_NOATIME, &sdp->sd_flags);
262 else
263 clear_bit(SDF_NOATIME, &sdp->sd_flags);
264
265 /* Don't let the VFS update atimes. GFS2 handles this itself. */
266 *flags |= MS_NOATIME | MS_NODIRATIME;
267
268 return error;
269}
270
271/**
272 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
273 * @inode: The VFS inode
274 *
275 */
276
277static void gfs2_clear_inode(struct inode *inode)
278{
279 /* This tells us its a "real" inode and not one which only
280 * serves to contain an address space (see rgrp.c, meta_io.c)
281 * which therefore doesn't have its own glocks.
282 */
283 if (inode->i_private) {
284 struct gfs2_inode *ip = GFS2_I(inode);
285 gfs2_glock_inode_squish(inode);
286 gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED);
287 ip->i_gl->gl_object = NULL;
288 gfs2_glock_schedule_for_reclaim(ip->i_gl);
289 gfs2_glock_put(ip->i_gl);
290 ip->i_gl = NULL;
291 if (ip->i_iopen_gh.gh_gl)
292 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
293 }
294}
295
296/**
297 * gfs2_show_options - Show mount options for /proc/mounts
298 * @s: seq_file structure
299 * @mnt: vfsmount
300 *
301 * Returns: 0 on success or error code
302 */
303
304static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
305{
306 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
307 struct gfs2_args *args = &sdp->sd_args;
308
309 if (args->ar_lockproto[0])
310 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
311 if (args->ar_locktable[0])
312 seq_printf(s, ",locktable=%s", args->ar_locktable);
313 if (args->ar_hostdata[0])
314 seq_printf(s, ",hostdata=%s", args->ar_hostdata);
315 if (args->ar_spectator)
316 seq_printf(s, ",spectator");
317 if (args->ar_ignore_local_fs)
318 seq_printf(s, ",ignore_local_fs");
319 if (args->ar_localflocks)
320 seq_printf(s, ",localflocks");
321 if (args->ar_localcaching)
322 seq_printf(s, ",localcaching");
323 if (args->ar_debug)
324 seq_printf(s, ",debug");
325 if (args->ar_upgrade)
326 seq_printf(s, ",upgrade");
327 if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
328 seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
329 if (args->ar_posix_acl)
330 seq_printf(s, ",acl");
331 if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
332 char *state;
333 switch (args->ar_quota) {
334 case GFS2_QUOTA_OFF:
335 state = "off";
336 break;
337 case GFS2_QUOTA_ACCOUNT:
338 state = "account";
339 break;
340 case GFS2_QUOTA_ON:
341 state = "on";
342 break;
343 default:
344 state = "unknown";
345 break;
346 }
347 seq_printf(s, ",quota=%s", state);
348 }
349 if (args->ar_suiddir)
350 seq_printf(s, ",suiddir");
351 if (args->ar_data != GFS2_DATA_DEFAULT) {
352 char *state;
353 switch (args->ar_data) {
354 case GFS2_DATA_WRITEBACK:
355 state = "writeback";
356 break;
357 case GFS2_DATA_ORDERED:
358 state = "ordered";
359 break;
360 default:
361 state = "unknown";
362 break;
363 }
364 seq_printf(s, ",data=%s", state);
365 }
366
367 return 0;
368}
369
370/*
371 * We have to (at the moment) hold the inodes main lock to cover
372 * the gap between unlocking the shared lock on the iopen lock and
373 * taking the exclusive lock. I'd rather do a shared -> exclusive
374 * conversion on the iopen lock, but we can change that later. This
375 * is safe, just less efficient.
376 */
377static void gfs2_delete_inode(struct inode *inode)
378{
379 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
380 struct gfs2_inode *ip = GFS2_I(inode);
381 struct gfs2_holder gh;
382 int error;
383
384 if (!inode->i_private)
385 goto out;
386
387 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh);
388 if (unlikely(error)) {
389 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
390 goto out;
391 }
392
393 gfs2_glock_dq(&ip->i_iopen_gh);
394 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
395 error = gfs2_glock_nq(&ip->i_iopen_gh);
396 if (error)
397 goto out_uninit;
398
399 if (S_ISDIR(ip->i_di.di_mode) &&
400 (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
401 error = gfs2_dir_exhash_dealloc(ip);
402 if (error)
403 goto out_unlock;
404 }
405
406 if (ip->i_di.di_eattr) {
407 error = gfs2_ea_dealloc(ip);
408 if (error)
409 goto out_unlock;
410 }
411
412 if (!gfs2_is_stuffed(ip)) {
413 error = gfs2_file_dealloc(ip);
414 if (error)
415 goto out_unlock;
416 }
417
418 error = gfs2_dinode_dealloc(ip);
419
420out_unlock:
421 gfs2_glock_dq(&ip->i_iopen_gh);
422out_uninit:
423 gfs2_holder_uninit(&ip->i_iopen_gh);
424 gfs2_glock_dq_uninit(&gh);
425 if (error)
426 fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
427out:
428 truncate_inode_pages(&inode->i_data, 0);
429 clear_inode(inode);
430}
431
432
433
434static struct inode *gfs2_alloc_inode(struct super_block *sb)
435{
436 struct gfs2_sbd *sdp = sb->s_fs_info;
437 struct gfs2_inode *ip;
438
439 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
440 if (ip) {
441 ip->i_flags = 0;
442 ip->i_gl = NULL;
443 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
444 ip->i_last_pfault = jiffies;
445 }
446 return &ip->i_inode;
447}
448
449static void gfs2_destroy_inode(struct inode *inode)
450{
451 kmem_cache_free(gfs2_inode_cachep, inode);
452}
453
454struct super_operations gfs2_super_ops = {
455 .alloc_inode = gfs2_alloc_inode,
456 .destroy_inode = gfs2_destroy_inode,
457 .write_inode = gfs2_write_inode,
458 .delete_inode = gfs2_delete_inode,
459 .put_super = gfs2_put_super,
460 .write_super = gfs2_write_super,
461 .write_super_lockfs = gfs2_write_super_lockfs,
462 .unlockfs = gfs2_unlockfs,
463 .statfs = gfs2_statfs,
464 .remount_fs = gfs2_remount_fs,
465 .clear_inode = gfs2_clear_inode,
466 .show_options = gfs2_show_options,
467};
468
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
new file mode 100644
index 000000000000..9de73f042f78
--- /dev/null
+++ b/fs/gfs2/ops_super.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_SUPER_DOT_H__
11#define __OPS_SUPER_DOT_H__
12
13#include <linux/fs.h>
14
15extern struct super_operations gfs2_super_ops;
16
17#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
new file mode 100644
index 000000000000..5453d2947ab3
--- /dev/null
+++ b/fs/gfs2/ops_vm.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "bmap.h"
23#include "glock.h"
24#include "inode.h"
25#include "ops_vm.h"
26#include "quota.h"
27#include "rgrp.h"
28#include "trans.h"
29#include "util.h"
30
31static void pfault_be_greedy(struct gfs2_inode *ip)
32{
33 unsigned int time;
34
35 spin_lock(&ip->i_spin);
36 time = ip->i_greedy;
37 ip->i_last_pfault = jiffies;
38 spin_unlock(&ip->i_spin);
39
40 igrab(&ip->i_inode);
41 if (gfs2_glock_be_greedy(ip->i_gl, time))
42 iput(&ip->i_inode);
43}
44
45static struct page *gfs2_private_nopage(struct vm_area_struct *area,
46 unsigned long address, int *type)
47{
48 struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
49 struct page *result;
50
51 set_bit(GIF_PAGED, &ip->i_flags);
52
53 result = filemap_nopage(area, address, type);
54
55 if (result && result != NOPAGE_OOM)
56 pfault_be_greedy(ip);
57
58 return result;
59}
60
61static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
62{
63 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
64 unsigned long index = page->index;
65 u64 lblock = index << (PAGE_CACHE_SHIFT -
66 sdp->sd_sb.sb_bsize_shift);
67 unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
68 struct gfs2_alloc *al;
69 unsigned int data_blocks, ind_blocks;
70 unsigned int x;
71 int error;
72
73 al = gfs2_alloc_get(ip);
74
75 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
76 if (error)
77 goto out;
78
79 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
80 if (error)
81 goto out_gunlock_q;
82
83 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
84
85 al->al_requested = data_blocks + ind_blocks;
86
87 error = gfs2_inplace_reserve(ip);
88 if (error)
89 goto out_gunlock_q;
90
91 error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
92 ind_blocks + RES_DINODE +
93 RES_STATFS + RES_QUOTA, 0);
94 if (error)
95 goto out_ipres;
96
97 if (gfs2_is_stuffed(ip)) {
98 error = gfs2_unstuff_dinode(ip, NULL);
99 if (error)
100 goto out_trans;
101 }
102
103 for (x = 0; x < blocks; ) {
104 u64 dblock;
105 unsigned int extlen;
106 int new = 1;
107
108 error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
109 if (error)
110 goto out_trans;
111
112 lblock += extlen;
113 x += extlen;
114 }
115
116 gfs2_assert_warn(sdp, al->al_alloced);
117
118out_trans:
119 gfs2_trans_end(sdp);
120out_ipres:
121 gfs2_inplace_release(ip);
122out_gunlock_q:
123 gfs2_quota_unlock(ip);
124out:
125 gfs2_alloc_put(ip);
126 return error;
127}
128
129static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
130 unsigned long address, int *type)
131{
132 struct file *file = area->vm_file;
133 struct gfs2_file *gf = file->private_data;
134 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
135 struct gfs2_holder i_gh;
136 struct page *result = NULL;
137 unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
138 area->vm_pgoff;
139 int alloc_required;
140 int error;
141
142 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
143 if (error)
144 return NULL;
145
146 set_bit(GIF_PAGED, &ip->i_flags);
147 set_bit(GIF_SW_PAGED, &ip->i_flags);
148
149 error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT,
150 PAGE_CACHE_SIZE, &alloc_required);
151 if (error)
152 goto out;
153
154 set_bit(GFF_EXLOCK, &gf->f_flags);
155 result = filemap_nopage(area, address, type);
156 clear_bit(GFF_EXLOCK, &gf->f_flags);
157 if (!result || result == NOPAGE_OOM)
158 goto out;
159
160 if (alloc_required) {
161 error = alloc_page_backing(ip, result);
162 if (error) {
163 page_cache_release(result);
164 result = NULL;
165 goto out;
166 }
167 set_page_dirty(result);
168 }
169
170 pfault_be_greedy(ip);
171out:
172 gfs2_glock_dq_uninit(&i_gh);
173
174 return result;
175}
176
177struct vm_operations_struct gfs2_vm_ops_private = {
178 .nopage = gfs2_private_nopage,
179};
180
181struct vm_operations_struct gfs2_vm_ops_sharewrite = {
182 .nopage = gfs2_sharewrite_nopage,
183};
184
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h
new file mode 100644
index 000000000000..4ae8f43ed5e3
--- /dev/null
+++ b/fs/gfs2/ops_vm.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_VM_DOT_H__
11#define __OPS_VM_DOT_H__
12
13#include <linux/mm.h>
14
15extern struct vm_operations_struct gfs2_vm_ops_private;
16extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
17
18#endif /* __OPS_VM_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
new file mode 100644
index 000000000000..c0a3c9a2a593
--- /dev/null
+++ b/fs/gfs2/quota.c
@@ -0,0 +1,1226 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10/*
11 * Quota change tags are associated with each transaction that allocates or
12 * deallocates space. Those changes are accumulated locally to each node (in a
13 * per-node file) and then are periodically synced to the quota file. This
14 * avoids the bottleneck of constantly touching the quota file, but introduces
15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need to a quota check
19 * program to be run on node crashes or anything like that.
20 *
21 * There are couple of knobs that let the administrator manage the quota
22 * fuzziness. "quota_quantum" sets the maximum time a quota change can be
23 * sitting on one node before being synced to the quota file. (The default is
24 * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency
25 * of quota file syncs increases as the user moves closer to their limit. The
26 * more frequent the syncs, the more accurate the quota enforcement, but that
27 * means that there is more contention between the nodes for the quota file.
28 * The default value is one. This sets the maximum theoretical quota overrun
29 * (with infinite node with infinite bandwidth) to twice the user's limit. (In
30 * practice, the maximum overrun you see should be much less.) A "quota_scale"
31 * number greater than one makes quota syncs more frequent and reduces the
32 * maximum overrun. Numbers less than one (but greater than zero) make quota
33 * syncs less frequent.
34 *
35 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of
36 * the quota file, so it is not being constantly read.
37 */
38
39#include <linux/sched.h>
40#include <linux/slab.h>
41#include <linux/spinlock.h>
42#include <linux/completion.h>
43#include <linux/buffer_head.h>
44#include <linux/sort.h>
45#include <linux/fs.h>
46#include <linux/gfs2_ondisk.h>
47#include <linux/lm_interface.h>
48
49#include "gfs2.h"
50#include "incore.h"
51#include "bmap.h"
52#include "glock.h"
53#include "glops.h"
54#include "log.h"
55#include "meta_io.h"
56#include "quota.h"
57#include "rgrp.h"
58#include "super.h"
59#include "trans.h"
60#include "inode.h"
61#include "ops_file.h"
62#include "ops_address.h"
63#include "util.h"
64
65#define QUOTA_USER 1
66#define QUOTA_GROUP 0
67
68static u64 qd2offset(struct gfs2_quota_data *qd)
69{
70 u64 offset;
71
72 offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
73 offset *= sizeof(struct gfs2_quota);
74
75 return offset;
76}
77
78static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
79 struct gfs2_quota_data **qdp)
80{
81 struct gfs2_quota_data *qd;
82 int error;
83
84 qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL);
85 if (!qd)
86 return -ENOMEM;
87
88 qd->qd_count = 1;
89 qd->qd_id = id;
90 if (user)
91 set_bit(QDF_USER, &qd->qd_flags);
92 qd->qd_slot = -1;
93
94 error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
95 &gfs2_quota_glops, CREATE, &qd->qd_gl);
96 if (error)
97 goto fail;
98
99 error = gfs2_lvb_hold(qd->qd_gl);
100 gfs2_glock_put(qd->qd_gl);
101 if (error)
102 goto fail;
103
104 *qdp = qd;
105
106 return 0;
107
108fail:
109 kfree(qd);
110 return error;
111}
112
113static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
114 struct gfs2_quota_data **qdp)
115{
116 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
117 int error, found;
118
119 *qdp = NULL;
120
121 for (;;) {
122 found = 0;
123 spin_lock(&sdp->sd_quota_spin);
124 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
125 if (qd->qd_id == id &&
126 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
127 qd->qd_count++;
128 found = 1;
129 break;
130 }
131 }
132
133 if (!found)
134 qd = NULL;
135
136 if (!qd && new_qd) {
137 qd = new_qd;
138 list_add(&qd->qd_list, &sdp->sd_quota_list);
139 atomic_inc(&sdp->sd_quota_count);
140 new_qd = NULL;
141 }
142
143 spin_unlock(&sdp->sd_quota_spin);
144
145 if (qd || !create) {
146 if (new_qd) {
147 gfs2_lvb_unhold(new_qd->qd_gl);
148 kfree(new_qd);
149 }
150 *qdp = qd;
151 return 0;
152 }
153
154 error = qd_alloc(sdp, user, id, &new_qd);
155 if (error)
156 return error;
157 }
158}
159
160static void qd_hold(struct gfs2_quota_data *qd)
161{
162 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
163
164 spin_lock(&sdp->sd_quota_spin);
165 gfs2_assert(sdp, qd->qd_count);
166 qd->qd_count++;
167 spin_unlock(&sdp->sd_quota_spin);
168}
169
170static void qd_put(struct gfs2_quota_data *qd)
171{
172 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
173 spin_lock(&sdp->sd_quota_spin);
174 gfs2_assert(sdp, qd->qd_count);
175 if (!--qd->qd_count)
176 qd->qd_last_touched = jiffies;
177 spin_unlock(&sdp->sd_quota_spin);
178}
179
180static int slot_get(struct gfs2_quota_data *qd)
181{
182 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
183 unsigned int c, o = 0, b;
184 unsigned char byte = 0;
185
186 spin_lock(&sdp->sd_quota_spin);
187
188 if (qd->qd_slot_count++) {
189 spin_unlock(&sdp->sd_quota_spin);
190 return 0;
191 }
192
193 for (c = 0; c < sdp->sd_quota_chunks; c++)
194 for (o = 0; o < PAGE_SIZE; o++) {
195 byte = sdp->sd_quota_bitmap[c][o];
196 if (byte != 0xFF)
197 goto found;
198 }
199
200 goto fail;
201
202found:
203 for (b = 0; b < 8; b++)
204 if (!(byte & (1 << b)))
205 break;
206 qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
207
208 if (qd->qd_slot >= sdp->sd_quota_slots)
209 goto fail;
210
211 sdp->sd_quota_bitmap[c][o] |= 1 << b;
212
213 spin_unlock(&sdp->sd_quota_spin);
214
215 return 0;
216
217fail:
218 qd->qd_slot_count--;
219 spin_unlock(&sdp->sd_quota_spin);
220 return -ENOSPC;
221}
222
223static void slot_hold(struct gfs2_quota_data *qd)
224{
225 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
226
227 spin_lock(&sdp->sd_quota_spin);
228 gfs2_assert(sdp, qd->qd_slot_count);
229 qd->qd_slot_count++;
230 spin_unlock(&sdp->sd_quota_spin);
231}
232
233static void slot_put(struct gfs2_quota_data *qd)
234{
235 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
236
237 spin_lock(&sdp->sd_quota_spin);
238 gfs2_assert(sdp, qd->qd_slot_count);
239 if (!--qd->qd_slot_count) {
240 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
241 qd->qd_slot = -1;
242 }
243 spin_unlock(&sdp->sd_quota_spin);
244}
245
246static int bh_get(struct gfs2_quota_data *qd)
247{
248 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
249 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
250 unsigned int block, offset;
251 struct buffer_head *bh;
252 int error;
253 struct buffer_head bh_map;
254
255 mutex_lock(&sdp->sd_quota_mutex);
256
257 if (qd->qd_bh_count++) {
258 mutex_unlock(&sdp->sd_quota_mutex);
259 return 0;
260 }
261
262 block = qd->qd_slot / sdp->sd_qc_per_block;
263 offset = qd->qd_slot % sdp->sd_qc_per_block;;
264
265 error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map, 1);
266 if (error)
267 goto fail;
268 error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh);
269 if (error)
270 goto fail;
271 error = -EIO;
272 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
273 goto fail_brelse;
274
275 qd->qd_bh = bh;
276 qd->qd_bh_qc = (struct gfs2_quota_change *)
277 (bh->b_data + sizeof(struct gfs2_meta_header) +
278 offset * sizeof(struct gfs2_quota_change));
279
280 mutex_lock(&sdp->sd_quota_mutex);
281
282 return 0;
283
284fail_brelse:
285 brelse(bh);
286fail:
287 qd->qd_bh_count--;
288 mutex_unlock(&sdp->sd_quota_mutex);
289 return error;
290}
291
292static void bh_put(struct gfs2_quota_data *qd)
293{
294 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
295
296 mutex_lock(&sdp->sd_quota_mutex);
297 gfs2_assert(sdp, qd->qd_bh_count);
298 if (!--qd->qd_bh_count) {
299 brelse(qd->qd_bh);
300 qd->qd_bh = NULL;
301 qd->qd_bh_qc = NULL;
302 }
303 mutex_unlock(&sdp->sd_quota_mutex);
304}
305
306static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
307{
308 struct gfs2_quota_data *qd = NULL;
309 int error;
310 int found = 0;
311
312 *qdp = NULL;
313
314 if (sdp->sd_vfs->s_flags & MS_RDONLY)
315 return 0;
316
317 spin_lock(&sdp->sd_quota_spin);
318
319 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
320 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
321 !test_bit(QDF_CHANGE, &qd->qd_flags) ||
322 qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
323 continue;
324
325 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
326
327 set_bit(QDF_LOCKED, &qd->qd_flags);
328 gfs2_assert_warn(sdp, qd->qd_count);
329 qd->qd_count++;
330 qd->qd_change_sync = qd->qd_change;
331 gfs2_assert_warn(sdp, qd->qd_slot_count);
332 qd->qd_slot_count++;
333 found = 1;
334
335 break;
336 }
337
338 if (!found)
339 qd = NULL;
340
341 spin_unlock(&sdp->sd_quota_spin);
342
343 if (qd) {
344 gfs2_assert_warn(sdp, qd->qd_change_sync);
345 error = bh_get(qd);
346 if (error) {
347 clear_bit(QDF_LOCKED, &qd->qd_flags);
348 slot_put(qd);
349 qd_put(qd);
350 return error;
351 }
352 }
353
354 *qdp = qd;
355
356 return 0;
357}
358
359static int qd_trylock(struct gfs2_quota_data *qd)
360{
361 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
362
363 if (sdp->sd_vfs->s_flags & MS_RDONLY)
364 return 0;
365
366 spin_lock(&sdp->sd_quota_spin);
367
368 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
369 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
370 spin_unlock(&sdp->sd_quota_spin);
371 return 0;
372 }
373
374 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
375
376 set_bit(QDF_LOCKED, &qd->qd_flags);
377 gfs2_assert_warn(sdp, qd->qd_count);
378 qd->qd_count++;
379 qd->qd_change_sync = qd->qd_change;
380 gfs2_assert_warn(sdp, qd->qd_slot_count);
381 qd->qd_slot_count++;
382
383 spin_unlock(&sdp->sd_quota_spin);
384
385 gfs2_assert_warn(sdp, qd->qd_change_sync);
386 if (bh_get(qd)) {
387 clear_bit(QDF_LOCKED, &qd->qd_flags);
388 slot_put(qd);
389 qd_put(qd);
390 return 0;
391 }
392
393 return 1;
394}
395
396static void qd_unlock(struct gfs2_quota_data *qd)
397{
398 gfs2_assert_warn(qd->qd_gl->gl_sbd,
399 test_bit(QDF_LOCKED, &qd->qd_flags));
400 clear_bit(QDF_LOCKED, &qd->qd_flags);
401 bh_put(qd);
402 slot_put(qd);
403 qd_put(qd);
404}
405
406static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
407 struct gfs2_quota_data **qdp)
408{
409 int error;
410
411 error = qd_get(sdp, user, id, create, qdp);
412 if (error)
413 return error;
414
415 error = slot_get(*qdp);
416 if (error)
417 goto fail;
418
419 error = bh_get(*qdp);
420 if (error)
421 goto fail_slot;
422
423 return 0;
424
425fail_slot:
426 slot_put(*qdp);
427fail:
428 qd_put(*qdp);
429 return error;
430}
431
432static void qdsb_put(struct gfs2_quota_data *qd)
433{
434 bh_put(qd);
435 slot_put(qd);
436 qd_put(qd);
437}
438
439int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
440{
441 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
442 struct gfs2_alloc *al = &ip->i_alloc;
443 struct gfs2_quota_data **qd = al->al_qd;
444 int error;
445
446 if (gfs2_assert_warn(sdp, !al->al_qd_num) ||
447 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
448 return -EIO;
449
450 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
451 return 0;
452
453 error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
454 if (error)
455 goto out;
456 al->al_qd_num++;
457 qd++;
458
459 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
460 if (error)
461 goto out;
462 al->al_qd_num++;
463 qd++;
464
465 if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
466 error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
467 if (error)
468 goto out;
469 al->al_qd_num++;
470 qd++;
471 }
472
473 if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
474 error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
475 if (error)
476 goto out;
477 al->al_qd_num++;
478 qd++;
479 }
480
481out:
482 if (error)
483 gfs2_quota_unhold(ip);
484 return error;
485}
486
487void gfs2_quota_unhold(struct gfs2_inode *ip)
488{
489 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
490 struct gfs2_alloc *al = &ip->i_alloc;
491 unsigned int x;
492
493 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
494
495 for (x = 0; x < al->al_qd_num; x++) {
496 qdsb_put(al->al_qd[x]);
497 al->al_qd[x] = NULL;
498 }
499 al->al_qd_num = 0;
500}
501
502static int sort_qd(const void *a, const void *b)
503{
504 const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a;
505 const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b;
506
507 if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
508 !test_bit(QDF_USER, &qd_b->qd_flags)) {
509 if (test_bit(QDF_USER, &qd_a->qd_flags))
510 return -1;
511 else
512 return 1;
513 }
514 if (qd_a->qd_id < qd_b->qd_id)
515 return -1;
516 if (qd_a->qd_id > qd_b->qd_id)
517 return 1;
518
519 return 0;
520}
521
522static void do_qc(struct gfs2_quota_data *qd, s64 change)
523{
524 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
525 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
526 struct gfs2_quota_change *qc = qd->qd_bh_qc;
527 s64 x;
528
529 mutex_lock(&sdp->sd_quota_mutex);
530 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1);
531
532 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
533 qc->qc_change = 0;
534 qc->qc_flags = 0;
535 if (test_bit(QDF_USER, &qd->qd_flags))
536 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
537 qc->qc_id = cpu_to_be32(qd->qd_id);
538 }
539
540 x = qc->qc_change;
541 x = be64_to_cpu(x) + change;
542 qc->qc_change = cpu_to_be64(x);
543
544 spin_lock(&sdp->sd_quota_spin);
545 qd->qd_change = x;
546 spin_unlock(&sdp->sd_quota_spin);
547
548 if (!x) {
549 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
550 clear_bit(QDF_CHANGE, &qd->qd_flags);
551 qc->qc_flags = 0;
552 qc->qc_id = 0;
553 slot_put(qd);
554 qd_put(qd);
555 } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
556 qd_hold(qd);
557 slot_hold(qd);
558 }
559
560 mutex_unlock(&sdp->sd_quota_mutex);
561}
562
563/**
564 * gfs2_adjust_quota
565 *
566 * This function was mostly borrowed from gfs2_block_truncate_page which was
567 * in turn mostly borrowed from ext3
568 */
569static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
570 s64 change, struct gfs2_quota_data *qd)
571{
572 struct inode *inode = &ip->i_inode;
573 struct address_space *mapping = inode->i_mapping;
574 unsigned long index = loc >> PAGE_CACHE_SHIFT;
575 unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
576 unsigned blocksize, iblock, pos;
577 struct buffer_head *bh;
578 struct page *page;
579 void *kaddr;
580 __be64 *ptr;
581 s64 value;
582 int err = -EIO;
583
584 page = grab_cache_page(mapping, index);
585 if (!page)
586 return -ENOMEM;
587
588 blocksize = inode->i_sb->s_blocksize;
589 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
590
591 if (!page_has_buffers(page))
592 create_empty_buffers(page, blocksize, 0);
593
594 bh = page_buffers(page);
595 pos = blocksize;
596 while (offset >= pos) {
597 bh = bh->b_this_page;
598 iblock++;
599 pos += blocksize;
600 }
601
602 if (!buffer_mapped(bh)) {
603 gfs2_get_block(inode, iblock, bh, 1);
604 if (!buffer_mapped(bh))
605 goto unlock;
606 }
607
608 if (PageUptodate(page))
609 set_buffer_uptodate(bh);
610
611 if (!buffer_uptodate(bh)) {
612 ll_rw_block(READ, 1, &bh);
613 wait_on_buffer(bh);
614 if (!buffer_uptodate(bh))
615 goto unlock;
616 }
617
618 gfs2_trans_add_bh(ip->i_gl, bh, 0);
619
620 kaddr = kmap_atomic(page, KM_USER0);
621 ptr = kaddr + offset;
622 value = (s64)be64_to_cpu(*ptr) + change;
623 *ptr = cpu_to_be64(value);
624 flush_dcache_page(page);
625 kunmap_atomic(kaddr, KM_USER0);
626 err = 0;
627 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
628 qd->qd_qb.qb_value = cpu_to_be64(value);
629unlock:
630 unlock_page(page);
631 page_cache_release(page);
632 return err;
633}
634
635static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
636{
637 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
638 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
639 unsigned int data_blocks, ind_blocks;
640 struct gfs2_holder *ghs, i_gh;
641 unsigned int qx, x;
642 struct gfs2_quota_data *qd;
643 loff_t offset;
644 unsigned int nalloc = 0;
645 struct gfs2_alloc *al = NULL;
646 int error;
647
648 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
649 &data_blocks, &ind_blocks);
650
651 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL);
652 if (!ghs)
653 return -ENOMEM;
654
655 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
656 for (qx = 0; qx < num_qd; qx++) {
657 error = gfs2_glock_nq_init(qda[qx]->qd_gl,
658 LM_ST_EXCLUSIVE,
659 GL_NOCACHE, &ghs[qx]);
660 if (error)
661 goto out;
662 }
663
664 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
665 if (error)
666 goto out;
667
668 for (x = 0; x < num_qd; x++) {
669 int alloc_required;
670
671 offset = qd2offset(qda[x]);
672 error = gfs2_write_alloc_required(ip, offset,
673 sizeof(struct gfs2_quota),
674 &alloc_required);
675 if (error)
676 goto out_gunlock;
677 if (alloc_required)
678 nalloc++;
679 }
680
681 if (nalloc) {
682 al = gfs2_alloc_get(ip);
683
684 al->al_requested = nalloc * (data_blocks + ind_blocks);
685
686 error = gfs2_inplace_reserve(ip);
687 if (error)
688 goto out_alloc;
689
690 error = gfs2_trans_begin(sdp,
691 al->al_rgd->rd_ri.ri_length +
692 num_qd * data_blocks +
693 nalloc * ind_blocks +
694 RES_DINODE + num_qd +
695 RES_STATFS, 0);
696 if (error)
697 goto out_ipres;
698 } else {
699 error = gfs2_trans_begin(sdp,
700 num_qd * data_blocks +
701 RES_DINODE + num_qd, 0);
702 if (error)
703 goto out_gunlock;
704 }
705
706 for (x = 0; x < num_qd; x++) {
707 qd = qda[x];
708 offset = qd2offset(qd);
709 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
710 (struct gfs2_quota_data *)
711 qd->qd_gl->gl_lvb);
712 if (error)
713 goto out_end_trans;
714
715 do_qc(qd, -qd->qd_change_sync);
716 }
717
718 error = 0;
719
720out_end_trans:
721 gfs2_trans_end(sdp);
722out_ipres:
723 if (nalloc)
724 gfs2_inplace_release(ip);
725out_alloc:
726 if (nalloc)
727 gfs2_alloc_put(ip);
728out_gunlock:
729 gfs2_glock_dq_uninit(&i_gh);
730out:
731 while (qx--)
732 gfs2_glock_dq_uninit(&ghs[qx]);
733 kfree(ghs);
734 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
735 return error;
736}
737
738static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
739 struct gfs2_holder *q_gh)
740{
741 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
742 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
743 struct gfs2_holder i_gh;
744 struct gfs2_quota q;
745 char buf[sizeof(struct gfs2_quota)];
746 struct file_ra_state ra_state;
747 int error;
748 struct gfs2_quota_lvb *qlvb;
749
750 file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
751restart:
752 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
753 if (error)
754 return error;
755
756 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
757
758 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
759 loff_t pos;
760 gfs2_glock_dq_uninit(q_gh);
761 error = gfs2_glock_nq_init(qd->qd_gl,
762 LM_ST_EXCLUSIVE, GL_NOCACHE,
763 q_gh);
764 if (error)
765 return error;
766
767 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
768 if (error)
769 goto fail;
770
771 memset(buf, 0, sizeof(struct gfs2_quota));
772 pos = qd2offset(qd);
773 error = gfs2_internal_read(ip, &ra_state, buf,
774 &pos, sizeof(struct gfs2_quota));
775 if (error < 0)
776 goto fail_gunlock;
777
778 gfs2_glock_dq_uninit(&i_gh);
779
780
781 gfs2_quota_in(&q, buf);
782 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
783 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
784 qlvb->__pad = 0;
785 qlvb->qb_limit = cpu_to_be64(q.qu_limit);
786 qlvb->qb_warn = cpu_to_be64(q.qu_warn);
787 qlvb->qb_value = cpu_to_be64(q.qu_value);
788 qd->qd_qb = *qlvb;
789
790 if (gfs2_glock_is_blocking(qd->qd_gl)) {
791 gfs2_glock_dq_uninit(q_gh);
792 force_refresh = 0;
793 goto restart;
794 }
795 }
796
797 return 0;
798
799fail_gunlock:
800 gfs2_glock_dq_uninit(&i_gh);
801fail:
802 gfs2_glock_dq_uninit(q_gh);
803 return error;
804}
805
806int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
807{
808 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
809 struct gfs2_alloc *al = &ip->i_alloc;
810 unsigned int x;
811 int error = 0;
812
813 gfs2_quota_hold(ip, uid, gid);
814
815 if (capable(CAP_SYS_RESOURCE) ||
816 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
817 return 0;
818
819 sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *),
820 sort_qd, NULL);
821
822 for (x = 0; x < al->al_qd_num; x++) {
823 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
824 if (error)
825 break;
826 }
827
828 if (!error)
829 set_bit(GIF_QD_LOCKED, &ip->i_flags);
830 else {
831 while (x--)
832 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
833 gfs2_quota_unhold(ip);
834 }
835
836 return error;
837}
838
839static int need_sync(struct gfs2_quota_data *qd)
840{
841 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
842 struct gfs2_tune *gt = &sdp->sd_tune;
843 s64 value;
844 unsigned int num, den;
845 int do_sync = 1;
846
847 if (!qd->qd_qb.qb_limit)
848 return 0;
849
850 spin_lock(&sdp->sd_quota_spin);
851 value = qd->qd_change;
852 spin_unlock(&sdp->sd_quota_spin);
853
854 spin_lock(&gt->gt_spin);
855 num = gt->gt_quota_scale_num;
856 den = gt->gt_quota_scale_den;
857 spin_unlock(&gt->gt_spin);
858
859 if (value < 0)
860 do_sync = 0;
861 else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >=
862 (s64)be64_to_cpu(qd->qd_qb.qb_limit))
863 do_sync = 0;
864 else {
865 value *= gfs2_jindex_size(sdp) * num;
866 do_div(value, den);
867 value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
868 if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
869 do_sync = 0;
870 }
871
872 return do_sync;
873}
874
875void gfs2_quota_unlock(struct gfs2_inode *ip)
876{
877 struct gfs2_alloc *al = &ip->i_alloc;
878 struct gfs2_quota_data *qda[4];
879 unsigned int count = 0;
880 unsigned int x;
881
882 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
883 goto out;
884
885 for (x = 0; x < al->al_qd_num; x++) {
886 struct gfs2_quota_data *qd;
887 int sync;
888
889 qd = al->al_qd[x];
890 sync = need_sync(qd);
891
892 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
893
894 if (sync && qd_trylock(qd))
895 qda[count++] = qd;
896 }
897
898 if (count) {
899 do_sync(count, qda);
900 for (x = 0; x < count; x++)
901 qd_unlock(qda[x]);
902 }
903
904out:
905 gfs2_quota_unhold(ip);
906}
907
908#define MAX_LINE 256
909
910static int print_message(struct gfs2_quota_data *qd, char *type)
911{
912 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
913
914 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\r\n",
915 sdp->sd_fsname, type,
916 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
917 qd->qd_id);
918
919 return 0;
920}
921
922int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
923{
924 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
925 struct gfs2_alloc *al = &ip->i_alloc;
926 struct gfs2_quota_data *qd;
927 s64 value;
928 unsigned int x;
929 int error = 0;
930
931 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
932 return 0;
933
934 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
935 return 0;
936
937 for (x = 0; x < al->al_qd_num; x++) {
938 qd = al->al_qd[x];
939
940 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
941 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
942 continue;
943
944 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
945 spin_lock(&sdp->sd_quota_spin);
946 value += qd->qd_change;
947 spin_unlock(&sdp->sd_quota_spin);
948
949 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
950 print_message(qd, "exceeded");
951 error = -EDQUOT;
952 break;
953 } else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
954 (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value &&
955 time_after_eq(jiffies, qd->qd_last_warn +
956 gfs2_tune_get(sdp,
957 gt_quota_warn_period) * HZ)) {
958 error = print_message(qd, "warning");
959 qd->qd_last_warn = jiffies;
960 }
961 }
962
963 return error;
964}
965
966void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
967 u32 uid, u32 gid)
968{
969 struct gfs2_alloc *al = &ip->i_alloc;
970 struct gfs2_quota_data *qd;
971 unsigned int x;
972 unsigned int found = 0;
973
974 if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
975 return;
976 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
977 return;
978
979 for (x = 0; x < al->al_qd_num; x++) {
980 qd = al->al_qd[x];
981
982 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
983 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
984 do_qc(qd, change);
985 found++;
986 }
987 }
988}
989
990int gfs2_quota_sync(struct gfs2_sbd *sdp)
991{
992 struct gfs2_quota_data **qda;
993 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
994 unsigned int num_qd;
995 unsigned int x;
996 int error = 0;
997
998 sdp->sd_quota_sync_gen++;
999
1000 qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
1001 if (!qda)
1002 return -ENOMEM;
1003
1004 do {
1005 num_qd = 0;
1006
1007 for (;;) {
1008 error = qd_fish(sdp, qda + num_qd);
1009 if (error || !qda[num_qd])
1010 break;
1011 if (++num_qd == max_qd)
1012 break;
1013 }
1014
1015 if (num_qd) {
1016 if (!error)
1017 error = do_sync(num_qd, qda);
1018 if (!error)
1019 for (x = 0; x < num_qd; x++)
1020 qda[x]->qd_sync_gen =
1021 sdp->sd_quota_sync_gen;
1022
1023 for (x = 0; x < num_qd; x++)
1024 qd_unlock(qda[x]);
1025 }
1026 } while (!error && num_qd == max_qd);
1027
1028 kfree(qda);
1029
1030 return error;
1031}
1032
1033int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1034{
1035 struct gfs2_quota_data *qd;
1036 struct gfs2_holder q_gh;
1037 int error;
1038
1039 error = qd_get(sdp, user, id, CREATE, &qd);
1040 if (error)
1041 return error;
1042
1043 error = do_glock(qd, FORCE, &q_gh);
1044 if (!error)
1045 gfs2_glock_dq_uninit(&q_gh);
1046
1047 qd_put(qd);
1048
1049 return error;
1050}
1051
1052int gfs2_quota_init(struct gfs2_sbd *sdp)
1053{
1054 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
1055 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
1056 unsigned int x, slot = 0;
1057 unsigned int found = 0;
1058 u64 dblock;
1059 u32 extlen = 0;
1060 int error;
1061
1062 if (!ip->i_di.di_size || ip->i_di.di_size > (64 << 20) ||
1063 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
1064 gfs2_consist_inode(ip);
1065 return -EIO;
1066 }
1067 sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
1068 sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE);
1069
1070 error = -ENOMEM;
1071
1072 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
1073 sizeof(unsigned char *), GFP_KERNEL);
1074 if (!sdp->sd_quota_bitmap)
1075 return error;
1076
1077 for (x = 0; x < sdp->sd_quota_chunks; x++) {
1078 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
1079 if (!sdp->sd_quota_bitmap[x])
1080 goto fail;
1081 }
1082
1083 for (x = 0; x < blocks; x++) {
1084 struct buffer_head *bh;
1085 unsigned int y;
1086
1087 if (!extlen) {
1088 int new = 0;
1089 error = gfs2_extent_map(&ip->i_inode, x, &new, &dblock, &extlen);
1090 if (error)
1091 goto fail;
1092 }
1093 error = -EIO;
1094 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
1095 if (!bh)
1096 goto fail;
1097 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
1098 brelse(bh);
1099 goto fail;
1100 }
1101
1102 for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
1103 y++, slot++) {
1104 struct gfs2_quota_change qc;
1105 struct gfs2_quota_data *qd;
1106
1107 gfs2_quota_change_in(&qc, bh->b_data +
1108 sizeof(struct gfs2_meta_header) +
1109 y * sizeof(struct gfs2_quota_change));
1110 if (!qc.qc_change)
1111 continue;
1112
1113 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
1114 qc.qc_id, &qd);
1115 if (error) {
1116 brelse(bh);
1117 goto fail;
1118 }
1119
1120 set_bit(QDF_CHANGE, &qd->qd_flags);
1121 qd->qd_change = qc.qc_change;
1122 qd->qd_slot = slot;
1123 qd->qd_slot_count = 1;
1124 qd->qd_last_touched = jiffies;
1125
1126 spin_lock(&sdp->sd_quota_spin);
1127 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1128 list_add(&qd->qd_list, &sdp->sd_quota_list);
1129 atomic_inc(&sdp->sd_quota_count);
1130 spin_unlock(&sdp->sd_quota_spin);
1131
1132 found++;
1133 }
1134
1135 brelse(bh);
1136 dblock++;
1137 extlen--;
1138 }
1139
1140 if (found)
1141 fs_info(sdp, "found %u quota changes\n", found);
1142
1143 return 0;
1144
1145fail:
1146 gfs2_quota_cleanup(sdp);
1147 return error;
1148}
1149
1150void gfs2_quota_scan(struct gfs2_sbd *sdp)
1151{
1152 struct gfs2_quota_data *qd, *safe;
1153 LIST_HEAD(dead);
1154
1155 spin_lock(&sdp->sd_quota_spin);
1156 list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
1157 if (!qd->qd_count &&
1158 time_after_eq(jiffies, qd->qd_last_touched +
1159 gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
1160 list_move(&qd->qd_list, &dead);
1161 gfs2_assert_warn(sdp,
1162 atomic_read(&sdp->sd_quota_count) > 0);
1163 atomic_dec(&sdp->sd_quota_count);
1164 }
1165 }
1166 spin_unlock(&sdp->sd_quota_spin);
1167
1168 while (!list_empty(&dead)) {
1169 qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
1170 list_del(&qd->qd_list);
1171
1172 gfs2_assert_warn(sdp, !qd->qd_change);
1173 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1174 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1175
1176 gfs2_lvb_unhold(qd->qd_gl);
1177 kfree(qd);
1178 }
1179}
1180
1181void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1182{
1183 struct list_head *head = &sdp->sd_quota_list;
1184 struct gfs2_quota_data *qd;
1185 unsigned int x;
1186
1187 spin_lock(&sdp->sd_quota_spin);
1188 while (!list_empty(head)) {
1189 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1190
1191 if (qd->qd_count > 1 ||
1192 (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1193 list_move(&qd->qd_list, head);
1194 spin_unlock(&sdp->sd_quota_spin);
1195 schedule();
1196 spin_lock(&sdp->sd_quota_spin);
1197 continue;
1198 }
1199
1200 list_del(&qd->qd_list);
1201 atomic_dec(&sdp->sd_quota_count);
1202 spin_unlock(&sdp->sd_quota_spin);
1203
1204 if (!qd->qd_count) {
1205 gfs2_assert_warn(sdp, !qd->qd_change);
1206 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1207 } else
1208 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1209 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1210
1211 gfs2_lvb_unhold(qd->qd_gl);
1212 kfree(qd);
1213
1214 spin_lock(&sdp->sd_quota_spin);
1215 }
1216 spin_unlock(&sdp->sd_quota_spin);
1217
1218 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1219
1220 if (sdp->sd_quota_bitmap) {
1221 for (x = 0; x < sdp->sd_quota_chunks; x++)
1222 kfree(sdp->sd_quota_bitmap[x]);
1223 kfree(sdp->sd_quota_bitmap);
1224 }
1225}
1226
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
new file mode 100644
index 000000000000..a8be1417051f
--- /dev/null
+++ b/fs/gfs2/quota.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __QUOTA_DOT_H__
11#define __QUOTA_DOT_H__
12
13struct gfs2_inode;
14struct gfs2_sbd;
15
16#define NO_QUOTA_CHANGE ((u32)-1)
17
18int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid);
19void gfs2_quota_unhold(struct gfs2_inode *ip);
20
21int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid);
22void gfs2_quota_unlock(struct gfs2_inode *ip);
23
24int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
25void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
26 u32 uid, u32 gid);
27
28int gfs2_quota_sync(struct gfs2_sbd *sdp);
29int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
30
31int gfs2_quota_init(struct gfs2_sbd *sdp);
32void gfs2_quota_scan(struct gfs2_sbd *sdp);
33void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
34
35#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
new file mode 100644
index 000000000000..0a8a4b87dcc6
--- /dev/null
+++ b/fs/gfs2/recovery.c
@@ -0,0 +1,570 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "glops.h"
24#include "lm.h"
25#include "lops.h"
26#include "meta_io.h"
27#include "recovery.h"
28#include "super.h"
29#include "util.h"
30#include "dir.h"
31
32int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
33 struct buffer_head **bh)
34{
35 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
36 struct gfs2_glock *gl = ip->i_gl;
37 int new = 0;
38 u64 dblock;
39 u32 extlen;
40 int error;
41
42 error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
43 if (error)
44 return error;
45 if (!dblock) {
46 gfs2_consist_inode(ip);
47 return -EIO;
48 }
49
50 *bh = gfs2_meta_ra(gl, dblock, extlen);
51
52 return error;
53}
54
55int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
56{
57 struct list_head *head = &sdp->sd_revoke_list;
58 struct gfs2_revoke_replay *rr;
59 int found = 0;
60
61 list_for_each_entry(rr, head, rr_list) {
62 if (rr->rr_blkno == blkno) {
63 found = 1;
64 break;
65 }
66 }
67
68 if (found) {
69 rr->rr_where = where;
70 return 0;
71 }
72
73 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
74 if (!rr)
75 return -ENOMEM;
76
77 rr->rr_blkno = blkno;
78 rr->rr_where = where;
79 list_add(&rr->rr_list, head);
80
81 return 1;
82}
83
84int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
85{
86 struct gfs2_revoke_replay *rr;
87 int wrap, a, b, revoke;
88 int found = 0;
89
90 list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
91 if (rr->rr_blkno == blkno) {
92 found = 1;
93 break;
94 }
95 }
96
97 if (!found)
98 return 0;
99
100 wrap = (rr->rr_where < sdp->sd_replay_tail);
101 a = (sdp->sd_replay_tail < where);
102 b = (where < rr->rr_where);
103 revoke = (wrap) ? (a || b) : (a && b);
104
105 return revoke;
106}
107
108void gfs2_revoke_clean(struct gfs2_sbd *sdp)
109{
110 struct list_head *head = &sdp->sd_revoke_list;
111 struct gfs2_revoke_replay *rr;
112
113 while (!list_empty(head)) {
114 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
115 list_del(&rr->rr_list);
116 kfree(rr);
117 }
118}
119
120/**
121 * get_log_header - read the log header for a given segment
122 * @jd: the journal
123 * @blk: the block to look at
124 * @lh: the log header to return
125 *
126 * Read the log header for a given segement in a given journal. Do a few
127 * sanity checks on it.
128 *
129 * Returns: 0 on success,
130 * 1 if the header was invalid or incomplete,
131 * errno on error
132 */
133
134static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
135 struct gfs2_log_header *head)
136{
137 struct buffer_head *bh;
138 struct gfs2_log_header lh;
139 u32 hash;
140 int error;
141
142 error = gfs2_replay_read_block(jd, blk, &bh);
143 if (error)
144 return error;
145
146 memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
147 lh.lh_hash = 0;
148 hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
149 gfs2_log_header_in(&lh, bh->b_data);
150
151 brelse(bh);
152
153 if (lh.lh_header.mh_magic != GFS2_MAGIC ||
154 lh.lh_header.mh_type != GFS2_METATYPE_LH ||
155 lh.lh_blkno != blk || lh.lh_hash != hash)
156 return 1;
157
158 *head = lh;
159
160 return 0;
161}
162
163/**
164 * find_good_lh - find a good log header
165 * @jd: the journal
166 * @blk: the segment to start searching from
167 * @lh: the log header to fill in
168 * @forward: if true search forward in the log, else search backward
169 *
170 * Call get_log_header() to get a log header for a segment, but if the
171 * segment is bad, either scan forward or backward until we find a good one.
172 *
173 * Returns: errno
174 */
175
176static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
177 struct gfs2_log_header *head)
178{
179 unsigned int orig_blk = *blk;
180 int error;
181
182 for (;;) {
183 error = get_log_header(jd, *blk, head);
184 if (error <= 0)
185 return error;
186
187 if (++*blk == jd->jd_blocks)
188 *blk = 0;
189
190 if (*blk == orig_blk) {
191 gfs2_consist_inode(GFS2_I(jd->jd_inode));
192 return -EIO;
193 }
194 }
195}
196
197/**
198 * jhead_scan - make sure we've found the head of the log
199 * @jd: the journal
200 * @head: this is filled in with the log descriptor of the head
201 *
202 * At this point, seg and lh should be either the head of the log or just
203 * before. Scan forward until we find the head.
204 *
205 * Returns: errno
206 */
207
208static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
209{
210 unsigned int blk = head->lh_blkno;
211 struct gfs2_log_header lh;
212 int error;
213
214 for (;;) {
215 if (++blk == jd->jd_blocks)
216 blk = 0;
217
218 error = get_log_header(jd, blk, &lh);
219 if (error < 0)
220 return error;
221 if (error == 1)
222 continue;
223
224 if (lh.lh_sequence == head->lh_sequence) {
225 gfs2_consist_inode(GFS2_I(jd->jd_inode));
226 return -EIO;
227 }
228 if (lh.lh_sequence < head->lh_sequence)
229 break;
230
231 *head = lh;
232 }
233
234 return 0;
235}
236
237/**
238 * gfs2_find_jhead - find the head of a log
239 * @jd: the journal
240 * @head: the log descriptor for the head of the log is returned here
241 *
242 * Do a binary search of a journal and find the valid log entry with the
243 * highest sequence number. (i.e. the log head)
244 *
245 * Returns: errno
246 */
247
248int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
249{
250 struct gfs2_log_header lh_1, lh_m;
251 u32 blk_1, blk_2, blk_m;
252 int error;
253
254 blk_1 = 0;
255 blk_2 = jd->jd_blocks - 1;
256
257 for (;;) {
258 blk_m = (blk_1 + blk_2) / 2;
259
260 error = find_good_lh(jd, &blk_1, &lh_1);
261 if (error)
262 return error;
263
264 error = find_good_lh(jd, &blk_m, &lh_m);
265 if (error)
266 return error;
267
268 if (blk_1 == blk_m || blk_m == blk_2)
269 break;
270
271 if (lh_1.lh_sequence <= lh_m.lh_sequence)
272 blk_1 = blk_m;
273 else
274 blk_2 = blk_m;
275 }
276
277 error = jhead_scan(jd, &lh_1);
278 if (error)
279 return error;
280
281 *head = lh_1;
282
283 return error;
284}
285
286/**
287 * foreach_descriptor - go through the active part of the log
288 * @jd: the journal
289 * @start: the first log header in the active region
290 * @end: the last log header (don't process the contents of this entry))
291 *
292 * Call a given function once for every log descriptor in the active
293 * portion of the log.
294 *
295 * Returns: errno
296 */
297
298static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
299 unsigned int end, int pass)
300{
301 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
302 struct buffer_head *bh;
303 struct gfs2_log_descriptor *ld;
304 int error = 0;
305 u32 length;
306 __be64 *ptr;
307 unsigned int offset = sizeof(struct gfs2_log_descriptor);
308 offset += sizeof(__be64) - 1;
309 offset &= ~(sizeof(__be64) - 1);
310
311 while (start != end) {
312 error = gfs2_replay_read_block(jd, start, &bh);
313 if (error)
314 return error;
315 if (gfs2_meta_check(sdp, bh)) {
316 brelse(bh);
317 return -EIO;
318 }
319 ld = (struct gfs2_log_descriptor *)bh->b_data;
320 length = be32_to_cpu(ld->ld_length);
321
322 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
323 struct gfs2_log_header lh;
324 error = get_log_header(jd, start, &lh);
325 if (!error) {
326 gfs2_replay_incr_blk(sdp, &start);
327 brelse(bh);
328 continue;
329 }
330 if (error == 1) {
331 gfs2_consist_inode(GFS2_I(jd->jd_inode));
332 error = -EIO;
333 }
334 brelse(bh);
335 return error;
336 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
337 brelse(bh);
338 return -EIO;
339 }
340 ptr = (__be64 *)(bh->b_data + offset);
341 error = lops_scan_elements(jd, start, ld, ptr, pass);
342 if (error) {
343 brelse(bh);
344 return error;
345 }
346
347 while (length--)
348 gfs2_replay_incr_blk(sdp, &start);
349
350 brelse(bh);
351 }
352
353 return 0;
354}
355
356/**
357 * clean_journal - mark a dirty journal as being clean
358 * @sdp: the filesystem
359 * @jd: the journal
360 * @gl: the journal's glock
361 * @head: the head journal to start from
362 *
363 * Returns: errno
364 */
365
366static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
367{
368 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
369 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
370 unsigned int lblock;
371 struct gfs2_log_header *lh;
372 u32 hash;
373 struct buffer_head *bh;
374 int error;
375 struct buffer_head bh_map;
376
377 lblock = head->lh_blkno;
378 gfs2_replay_incr_blk(sdp, &lblock);
379 error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map, 1);
380 if (error)
381 return error;
382 if (!bh_map.b_blocknr) {
383 gfs2_consist_inode(ip);
384 return -EIO;
385 }
386
387 bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
388 lock_buffer(bh);
389 memset(bh->b_data, 0, bh->b_size);
390 set_buffer_uptodate(bh);
391 clear_buffer_dirty(bh);
392 unlock_buffer(bh);
393
394 lh = (struct gfs2_log_header *)bh->b_data;
395 memset(lh, 0, sizeof(struct gfs2_log_header));
396 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
397 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
398 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
399 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
400 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
401 lh->lh_blkno = cpu_to_be32(lblock);
402 hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
403 lh->lh_hash = cpu_to_be32(hash);
404
405 set_buffer_dirty(bh);
406 if (sync_dirty_buffer(bh))
407 gfs2_io_error_bh(sdp, bh);
408 brelse(bh);
409
410 return error;
411}
412
413/**
414 * gfs2_recover_journal - recovery a given journal
415 * @jd: the struct gfs2_jdesc describing the journal
416 *
417 * Acquire the journal's lock, check to see if the journal is clean, and
418 * do recovery if necessary.
419 *
420 * Returns: errno
421 */
422
423int gfs2_recover_journal(struct gfs2_jdesc *jd)
424{
425 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
426 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
427 struct gfs2_log_header head;
428 struct gfs2_holder j_gh, ji_gh, t_gh;
429 unsigned long t;
430 int ro = 0;
431 unsigned int pass;
432 int error;
433
434 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
435 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
436 jd->jd_jid);
437
438 /* Aquire the journal lock so we can do recovery */
439
440 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
441 LM_ST_EXCLUSIVE,
442 LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
443 &j_gh);
444 switch (error) {
445 case 0:
446 break;
447
448 case GLR_TRYFAILED:
449 fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
450 error = 0;
451
452 default:
453 goto fail;
454 };
455
456 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
457 LM_FLAG_NOEXP, &ji_gh);
458 if (error)
459 goto fail_gunlock_j;
460 } else {
461 fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
462 }
463
464 fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
465
466 error = gfs2_jdesc_check(jd);
467 if (error)
468 goto fail_gunlock_ji;
469
470 error = gfs2_find_jhead(jd, &head);
471 if (error)
472 goto fail_gunlock_ji;
473
474 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
475 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
476 jd->jd_jid);
477
478 t = jiffies;
479
480 /* Acquire a shared hold on the transaction lock */
481
482 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
483 LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
484 GL_NOCANCEL | GL_NOCACHE, &t_gh);
485 if (error)
486 goto fail_gunlock_ji;
487
488 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
489 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
490 ro = 1;
491 } else {
492 if (sdp->sd_vfs->s_flags & MS_RDONLY)
493 ro = 1;
494 }
495
496 if (ro) {
497 fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
498 jd->jd_jid);
499 error = -EROFS;
500 goto fail_gunlock_tr;
501 }
502
503 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
504
505 for (pass = 0; pass < 2; pass++) {
506 lops_before_scan(jd, &head, pass);
507 error = foreach_descriptor(jd, head.lh_tail,
508 head.lh_blkno, pass);
509 lops_after_scan(jd, error, pass);
510 if (error)
511 goto fail_gunlock_tr;
512 }
513
514 error = clean_journal(jd, &head);
515 if (error)
516 goto fail_gunlock_tr;
517
518 gfs2_glock_dq_uninit(&t_gh);
519 t = DIV_ROUND_UP(jiffies - t, HZ);
520 fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
521 jd->jd_jid, t);
522 }
523
524 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
525 gfs2_glock_dq_uninit(&ji_gh);
526
527 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
528
529 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
530 gfs2_glock_dq_uninit(&j_gh);
531
532 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
533 return 0;
534
535fail_gunlock_tr:
536 gfs2_glock_dq_uninit(&t_gh);
537fail_gunlock_ji:
538 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
539 gfs2_glock_dq_uninit(&ji_gh);
540fail_gunlock_j:
541 gfs2_glock_dq_uninit(&j_gh);
542 }
543
544 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
545
546fail:
547 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
548 return error;
549}
550
551/**
552 * gfs2_check_journals - Recover any dirty journals
553 * @sdp: the filesystem
554 *
555 */
556
557void gfs2_check_journals(struct gfs2_sbd *sdp)
558{
559 struct gfs2_jdesc *jd;
560
561 for (;;) {
562 jd = gfs2_jdesc_find_dirty(sdp);
563 if (!jd)
564 break;
565
566 if (jd != sdp->sd_jdesc)
567 gfs2_recover_journal(jd);
568 }
569}
570
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
new file mode 100644
index 000000000000..961feedf4d8b
--- /dev/null
+++ b/fs/gfs2/recovery.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __RECOVERY_DOT_H__
11#define __RECOVERY_DOT_H__
12
13#include "incore.h"
14
15static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
16{
17 if (++*blk == sdp->sd_jdesc->jd_blocks)
18 *blk = 0;
19}
20
21int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
22 struct buffer_head **bh);
23
24int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
25int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
26void gfs2_revoke_clean(struct gfs2_sbd *sdp);
27
28int gfs2_find_jhead(struct gfs2_jdesc *jd,
29 struct gfs2_log_header *head);
30int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
31void gfs2_check_journals(struct gfs2_sbd *sdp);
32
33#endif /* __RECOVERY_DOT_H__ */
34
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
new file mode 100644
index 000000000000..b261385c0065
--- /dev/null
+++ b/fs/gfs2/rgrp.c
@@ -0,0 +1,1513 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/fs.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "glock.h"
22#include "glops.h"
23#include "lops.h"
24#include "meta_io.h"
25#include "quota.h"
26#include "rgrp.h"
27#include "super.h"
28#include "trans.h"
29#include "ops_file.h"
30#include "util.h"
31
32#define BFITNOENT ((u32)~0)
33
34/*
35 * These routines are used by the resource group routines (rgrp.c)
36 * to keep track of block allocation. Each block is represented by two
37 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks.
38 *
39 * 0 = Free
40 * 1 = Used (not metadata)
41 * 2 = Unlinked (still in use) inode
42 * 3 = Used (metadata)
43 */
44
45static const char valid_change[16] = {
46 /* current */
47 /* n */ 0, 1, 1, 1,
48 /* e */ 1, 0, 0, 0,
49 /* w */ 0, 0, 0, 1,
50 1, 0, 0, 0
51};
52
53/**
54 * gfs2_setbit - Set a bit in the bitmaps
55 * @buffer: the buffer that holds the bitmaps
56 * @buflen: the length (in bytes) of the buffer
57 * @block: the block to set
58 * @new_state: the new state of the block
59 *
60 */
61
62static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
63 unsigned int buflen, u32 block,
64 unsigned char new_state)
65{
66 unsigned char *byte, *end, cur_state;
67 unsigned int bit;
68
69 byte = buffer + (block / GFS2_NBBY);
70 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
71 end = buffer + buflen;
72
73 gfs2_assert(rgd->rd_sbd, byte < end);
74
75 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
76
77 if (valid_change[new_state * 4 + cur_state]) {
78 *byte ^= cur_state << bit;
79 *byte |= new_state << bit;
80 } else
81 gfs2_consist_rgrpd(rgd);
82}
83
84/**
85 * gfs2_testbit - test a bit in the bitmaps
86 * @buffer: the buffer that holds the bitmaps
87 * @buflen: the length (in bytes) of the buffer
88 * @block: the block to read
89 *
90 */
91
92static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
93 unsigned int buflen, u32 block)
94{
95 unsigned char *byte, *end, cur_state;
96 unsigned int bit;
97
98 byte = buffer + (block / GFS2_NBBY);
99 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
100 end = buffer + buflen;
101
102 gfs2_assert(rgd->rd_sbd, byte < end);
103
104 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
105
106 return cur_state;
107}
108
109/**
110 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
111 * a block in a given allocation state.
112 * @buffer: the buffer that holds the bitmaps
113 * @buflen: the length (in bytes) of the buffer
114 * @goal: start search at this block's bit-pair (within @buffer)
115 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
116 * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
117 *
118 * Scope of @goal and returned block number is only within this bitmap buffer,
119 * not entire rgrp or filesystem. @buffer will be offset from the actual
120 * beginning of a bitmap block buffer, skipping any header structures.
121 *
122 * Return: the block number (bitmap buffer scope) that was found
123 */
124
125static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
126 unsigned int buflen, u32 goal,
127 unsigned char old_state)
128{
129 unsigned char *byte, *end, alloc;
130 u32 blk = goal;
131 unsigned int bit;
132
133 byte = buffer + (goal / GFS2_NBBY);
134 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
135 end = buffer + buflen;
136 alloc = (old_state & 1) ? 0 : 0x55;
137
138 while (byte < end) {
139 if ((*byte & 0x55) == alloc) {
140 blk += (8 - bit) >> 1;
141
142 bit = 0;
143 byte++;
144
145 continue;
146 }
147
148 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
149 return blk;
150
151 bit += GFS2_BIT_SIZE;
152 if (bit >= 8) {
153 bit = 0;
154 byte++;
155 }
156
157 blk++;
158 }
159
160 return BFITNOENT;
161}
162
163/**
164 * gfs2_bitcount - count the number of bits in a certain state
165 * @buffer: the buffer that holds the bitmaps
166 * @buflen: the length (in bytes) of the buffer
167 * @state: the state of the block we're looking for
168 *
169 * Returns: The number of bits
170 */
171
172static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer,
173 unsigned int buflen, unsigned char state)
174{
175 unsigned char *byte = buffer;
176 unsigned char *end = buffer + buflen;
177 unsigned char state1 = state << 2;
178 unsigned char state2 = state << 4;
179 unsigned char state3 = state << 6;
180 u32 count = 0;
181
182 for (; byte < end; byte++) {
183 if (((*byte) & 0x03) == state)
184 count++;
185 if (((*byte) & 0x0C) == state1)
186 count++;
187 if (((*byte) & 0x30) == state2)
188 count++;
189 if (((*byte) & 0xC0) == state3)
190 count++;
191 }
192
193 return count;
194}
195
196/**
197 * gfs2_rgrp_verify - Verify that a resource group is consistent
198 * @sdp: the filesystem
199 * @rgd: the rgrp
200 *
201 */
202
203void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
204{
205 struct gfs2_sbd *sdp = rgd->rd_sbd;
206 struct gfs2_bitmap *bi = NULL;
207 u32 length = rgd->rd_ri.ri_length;
208 u32 count[4], tmp;
209 int buf, x;
210
211 memset(count, 0, 4 * sizeof(u32));
212
213 /* Count # blocks in each of 4 possible allocation states */
214 for (buf = 0; buf < length; buf++) {
215 bi = rgd->rd_bits + buf;
216 for (x = 0; x < 4; x++)
217 count[x] += gfs2_bitcount(rgd,
218 bi->bi_bh->b_data +
219 bi->bi_offset,
220 bi->bi_len, x);
221 }
222
223 if (count[0] != rgd->rd_rg.rg_free) {
224 if (gfs2_consist_rgrpd(rgd))
225 fs_err(sdp, "free data mismatch: %u != %u\n",
226 count[0], rgd->rd_rg.rg_free);
227 return;
228 }
229
230 tmp = rgd->rd_ri.ri_data -
231 rgd->rd_rg.rg_free -
232 rgd->rd_rg.rg_dinodes;
233 if (count[1] + count[2] != tmp) {
234 if (gfs2_consist_rgrpd(rgd))
235 fs_err(sdp, "used data mismatch: %u != %u\n",
236 count[1], tmp);
237 return;
238 }
239
240 if (count[3] != rgd->rd_rg.rg_dinodes) {
241 if (gfs2_consist_rgrpd(rgd))
242 fs_err(sdp, "used metadata mismatch: %u != %u\n",
243 count[3], rgd->rd_rg.rg_dinodes);
244 return;
245 }
246
247 if (count[2] > count[3]) {
248 if (gfs2_consist_rgrpd(rgd))
249 fs_err(sdp, "unlinked inodes > inodes: %u\n",
250 count[2]);
251 return;
252 }
253
254}
255
256static inline int rgrp_contains_block(struct gfs2_rindex *ri, u64 block)
257{
258 u64 first = ri->ri_data0;
259 u64 last = first + ri->ri_data;
260 return first <= block && block < last;
261}
262
263/**
264 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
265 * @sdp: The GFS2 superblock
266 * @n: The data block number
267 *
268 * Returns: The resource group, or NULL if not found
269 */
270
271struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
272{
273 struct gfs2_rgrpd *rgd;
274
275 spin_lock(&sdp->sd_rindex_spin);
276
277 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
278 if (rgrp_contains_block(&rgd->rd_ri, blk)) {
279 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
280 spin_unlock(&sdp->sd_rindex_spin);
281 return rgd;
282 }
283 }
284
285 spin_unlock(&sdp->sd_rindex_spin);
286
287 return NULL;
288}
289
290/**
291 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
292 * @sdp: The GFS2 superblock
293 *
294 * Returns: The first rgrp in the filesystem
295 */
296
297struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
298{
299 gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
300 return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
301}
302
303/**
304 * gfs2_rgrpd_get_next - get the next RG
305 * @rgd: A RG
306 *
307 * Returns: The next rgrp
308 */
309
310struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
311{
312 if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
313 return NULL;
314 return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
315}
316
317static void clear_rgrpdi(struct gfs2_sbd *sdp)
318{
319 struct list_head *head;
320 struct gfs2_rgrpd *rgd;
321 struct gfs2_glock *gl;
322
323 spin_lock(&sdp->sd_rindex_spin);
324 sdp->sd_rindex_forward = NULL;
325 head = &sdp->sd_rindex_recent_list;
326 while (!list_empty(head)) {
327 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
328 list_del(&rgd->rd_recent);
329 }
330 spin_unlock(&sdp->sd_rindex_spin);
331
332 head = &sdp->sd_rindex_list;
333 while (!list_empty(head)) {
334 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
335 gl = rgd->rd_gl;
336
337 list_del(&rgd->rd_list);
338 list_del(&rgd->rd_list_mru);
339
340 if (gl) {
341 gl->gl_object = NULL;
342 gfs2_glock_put(gl);
343 }
344
345 kfree(rgd->rd_bits);
346 kfree(rgd);
347 }
348}
349
350void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
351{
352 mutex_lock(&sdp->sd_rindex_mutex);
353 clear_rgrpdi(sdp);
354 mutex_unlock(&sdp->sd_rindex_mutex);
355}
356
357/**
358 * gfs2_compute_bitstructs - Compute the bitmap sizes
359 * @rgd: The resource group descriptor
360 *
361 * Calculates bitmap descriptors, one for each block that contains bitmap data
362 *
363 * Returns: errno
364 */
365
366static int compute_bitstructs(struct gfs2_rgrpd *rgd)
367{
368 struct gfs2_sbd *sdp = rgd->rd_sbd;
369 struct gfs2_bitmap *bi;
370 u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
371 u32 bytes_left, bytes;
372 int x;
373
374 if (!length)
375 return -EINVAL;
376
377 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
378 if (!rgd->rd_bits)
379 return -ENOMEM;
380
381 bytes_left = rgd->rd_ri.ri_bitbytes;
382
383 for (x = 0; x < length; x++) {
384 bi = rgd->rd_bits + x;
385
386 /* small rgrp; bitmap stored completely in header block */
387 if (length == 1) {
388 bytes = bytes_left;
389 bi->bi_offset = sizeof(struct gfs2_rgrp);
390 bi->bi_start = 0;
391 bi->bi_len = bytes;
392 /* header block */
393 } else if (x == 0) {
394 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
395 bi->bi_offset = sizeof(struct gfs2_rgrp);
396 bi->bi_start = 0;
397 bi->bi_len = bytes;
398 /* last block */
399 } else if (x + 1 == length) {
400 bytes = bytes_left;
401 bi->bi_offset = sizeof(struct gfs2_meta_header);
402 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
403 bi->bi_len = bytes;
404 /* other blocks */
405 } else {
406 bytes = sdp->sd_sb.sb_bsize -
407 sizeof(struct gfs2_meta_header);
408 bi->bi_offset = sizeof(struct gfs2_meta_header);
409 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
410 bi->bi_len = bytes;
411 }
412
413 bytes_left -= bytes;
414 }
415
416 if (bytes_left) {
417 gfs2_consist_rgrpd(rgd);
418 return -EIO;
419 }
420 bi = rgd->rd_bits + (length - 1);
421 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
422 if (gfs2_consist_rgrpd(rgd)) {
423 gfs2_rindex_print(&rgd->rd_ri);
424 fs_err(sdp, "start=%u len=%u offset=%u\n",
425 bi->bi_start, bi->bi_len, bi->bi_offset);
426 }
427 return -EIO;
428 }
429
430 return 0;
431}
432
433/**
434 * gfs2_ri_update - Pull in a new resource index from the disk
435 * @gl: The glock covering the rindex inode
436 *
437 * Returns: 0 on successful update, error code otherwise
438 */
439
440static int gfs2_ri_update(struct gfs2_inode *ip)
441{
442 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
443 struct inode *inode = &ip->i_inode;
444 struct gfs2_rgrpd *rgd;
445 char buf[sizeof(struct gfs2_rindex)];
446 struct file_ra_state ra_state;
447 u64 junk = ip->i_di.di_size;
448 int error;
449
450 if (do_div(junk, sizeof(struct gfs2_rindex))) {
451 gfs2_consist_inode(ip);
452 return -EIO;
453 }
454
455 clear_rgrpdi(sdp);
456
457 file_ra_state_init(&ra_state, inode->i_mapping);
458 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
459 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
460 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
461 sizeof(struct gfs2_rindex));
462 if (!error)
463 break;
464 if (error != sizeof(struct gfs2_rindex)) {
465 if (error > 0)
466 error = -EIO;
467 goto fail;
468 }
469
470 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
471 error = -ENOMEM;
472 if (!rgd)
473 goto fail;
474
475 mutex_init(&rgd->rd_mutex);
476 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
477 rgd->rd_sbd = sdp;
478
479 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
480 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
481
482 gfs2_rindex_in(&rgd->rd_ri, buf);
483 error = compute_bitstructs(rgd);
484 if (error)
485 goto fail;
486
487 error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
488 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
489 if (error)
490 goto fail;
491
492 rgd->rd_gl->gl_object = rgd;
493 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
494 }
495
496 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
497 return 0;
498
499fail:
500 clear_rgrpdi(sdp);
501 return error;
502}
503
504/**
505 * gfs2_rindex_hold - Grab a lock on the rindex
506 * @sdp: The GFS2 superblock
507 * @ri_gh: the glock holder
508 *
509 * We grab a lock on the rindex inode to make sure that it doesn't
510 * change whilst we are performing an operation. We keep this lock
511 * for quite long periods of time compared to other locks. This
512 * doesn't matter, since it is shared and it is very, very rarely
513 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
514 *
515 * This makes sure that we're using the latest copy of the resource index
516 * special file, which might have been updated if someone expanded the
517 * filesystem (via gfs2_grow utility), which adds new resource groups.
518 *
519 * Returns: 0 on success, error code otherwise
520 */
521
522int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
523{
524 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
525 struct gfs2_glock *gl = ip->i_gl;
526 int error;
527
528 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
529 if (error)
530 return error;
531
532 /* Read new copy from disk if we don't have the latest */
533 if (sdp->sd_rindex_vn != gl->gl_vn) {
534 mutex_lock(&sdp->sd_rindex_mutex);
535 if (sdp->sd_rindex_vn != gl->gl_vn) {
536 error = gfs2_ri_update(ip);
537 if (error)
538 gfs2_glock_dq_uninit(ri_gh);
539 }
540 mutex_unlock(&sdp->sd_rindex_mutex);
541 }
542
543 return error;
544}
545
546/**
547 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
548 * @rgd: the struct gfs2_rgrpd describing the RG to read in
549 *
550 * Read in all of a Resource Group's header and bitmap blocks.
551 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
552 *
553 * Returns: errno
554 */
555
556int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
557{
558 struct gfs2_sbd *sdp = rgd->rd_sbd;
559 struct gfs2_glock *gl = rgd->rd_gl;
560 unsigned int length = rgd->rd_ri.ri_length;
561 struct gfs2_bitmap *bi;
562 unsigned int x, y;
563 int error;
564
565 mutex_lock(&rgd->rd_mutex);
566
567 spin_lock(&sdp->sd_rindex_spin);
568 if (rgd->rd_bh_count) {
569 rgd->rd_bh_count++;
570 spin_unlock(&sdp->sd_rindex_spin);
571 mutex_unlock(&rgd->rd_mutex);
572 return 0;
573 }
574 spin_unlock(&sdp->sd_rindex_spin);
575
576 for (x = 0; x < length; x++) {
577 bi = rgd->rd_bits + x;
578 error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh);
579 if (error)
580 goto fail;
581 }
582
583 for (y = length; y--;) {
584 bi = rgd->rd_bits + y;
585 error = gfs2_meta_wait(sdp, bi->bi_bh);
586 if (error)
587 goto fail;
588 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
589 GFS2_METATYPE_RG)) {
590 error = -EIO;
591 goto fail;
592 }
593 }
594
595 if (rgd->rd_rg_vn != gl->gl_vn) {
596 gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data);
597 rgd->rd_rg_vn = gl->gl_vn;
598 }
599
600 spin_lock(&sdp->sd_rindex_spin);
601 rgd->rd_free_clone = rgd->rd_rg.rg_free;
602 rgd->rd_bh_count++;
603 spin_unlock(&sdp->sd_rindex_spin);
604
605 mutex_unlock(&rgd->rd_mutex);
606
607 return 0;
608
609fail:
610 while (x--) {
611 bi = rgd->rd_bits + x;
612 brelse(bi->bi_bh);
613 bi->bi_bh = NULL;
614 gfs2_assert_warn(sdp, !bi->bi_clone);
615 }
616 mutex_unlock(&rgd->rd_mutex);
617
618 return error;
619}
620
621void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
622{
623 struct gfs2_sbd *sdp = rgd->rd_sbd;
624
625 spin_lock(&sdp->sd_rindex_spin);
626 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
627 rgd->rd_bh_count++;
628 spin_unlock(&sdp->sd_rindex_spin);
629}
630
631/**
632 * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
633 * @rgd: the struct gfs2_rgrpd describing the RG to read in
634 *
635 */
636
637void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
638{
639 struct gfs2_sbd *sdp = rgd->rd_sbd;
640 int x, length = rgd->rd_ri.ri_length;
641
642 spin_lock(&sdp->sd_rindex_spin);
643 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
644 if (--rgd->rd_bh_count) {
645 spin_unlock(&sdp->sd_rindex_spin);
646 return;
647 }
648
649 for (x = 0; x < length; x++) {
650 struct gfs2_bitmap *bi = rgd->rd_bits + x;
651 kfree(bi->bi_clone);
652 bi->bi_clone = NULL;
653 brelse(bi->bi_bh);
654 bi->bi_bh = NULL;
655 }
656
657 spin_unlock(&sdp->sd_rindex_spin);
658}
659
660void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
661{
662 struct gfs2_sbd *sdp = rgd->rd_sbd;
663 unsigned int length = rgd->rd_ri.ri_length;
664 unsigned int x;
665
666 for (x = 0; x < length; x++) {
667 struct gfs2_bitmap *bi = rgd->rd_bits + x;
668 if (!bi->bi_clone)
669 continue;
670 memcpy(bi->bi_clone + bi->bi_offset,
671 bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
672 }
673
674 spin_lock(&sdp->sd_rindex_spin);
675 rgd->rd_free_clone = rgd->rd_rg.rg_free;
676 spin_unlock(&sdp->sd_rindex_spin);
677}
678
679/**
680 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
681 * @ip: the incore GFS2 inode structure
682 *
683 * Returns: the struct gfs2_alloc
684 */
685
686struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
687{
688 struct gfs2_alloc *al = &ip->i_alloc;
689
690 /* FIXME: Should assert that the correct locks are held here... */
691 memset(al, 0, sizeof(*al));
692 return al;
693}
694
695/**
696 * try_rgrp_fit - See if a given reservation will fit in a given RG
697 * @rgd: the RG data
698 * @al: the struct gfs2_alloc structure describing the reservation
699 *
700 * If there's room for the requested blocks to be allocated from the RG:
701 * Sets the $al_reserved_data field in @al.
702 * Sets the $al_reserved_meta field in @al.
703 * Sets the $al_rgd field in @al.
704 *
705 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
706 */
707
708static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
709{
710 struct gfs2_sbd *sdp = rgd->rd_sbd;
711 int ret = 0;
712
713 spin_lock(&sdp->sd_rindex_spin);
714 if (rgd->rd_free_clone >= al->al_requested) {
715 al->al_rgd = rgd;
716 ret = 1;
717 }
718 spin_unlock(&sdp->sd_rindex_spin);
719
720 return ret;
721}
722
723/**
724 * recent_rgrp_first - get first RG from "recent" list
725 * @sdp: The GFS2 superblock
726 * @rglast: address of the rgrp used last
727 *
728 * Returns: The first rgrp in the recent list
729 */
730
731static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
732 u64 rglast)
733{
734 struct gfs2_rgrpd *rgd = NULL;
735
736 spin_lock(&sdp->sd_rindex_spin);
737
738 if (list_empty(&sdp->sd_rindex_recent_list))
739 goto out;
740
741 if (!rglast)
742 goto first;
743
744 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
745 if (rgd->rd_ri.ri_addr == rglast)
746 goto out;
747 }
748
749first:
750 rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd,
751 rd_recent);
752out:
753 spin_unlock(&sdp->sd_rindex_spin);
754 return rgd;
755}
756
757/**
758 * recent_rgrp_next - get next RG from "recent" list
759 * @cur_rgd: current rgrp
760 * @remove:
761 *
762 * Returns: The next rgrp in the recent list
763 */
764
765static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
766 int remove)
767{
768 struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
769 struct list_head *head;
770 struct gfs2_rgrpd *rgd;
771
772 spin_lock(&sdp->sd_rindex_spin);
773
774 head = &sdp->sd_rindex_recent_list;
775
776 list_for_each_entry(rgd, head, rd_recent) {
777 if (rgd == cur_rgd) {
778 if (cur_rgd->rd_recent.next != head)
779 rgd = list_entry(cur_rgd->rd_recent.next,
780 struct gfs2_rgrpd, rd_recent);
781 else
782 rgd = NULL;
783
784 if (remove)
785 list_del(&cur_rgd->rd_recent);
786
787 goto out;
788 }
789 }
790
791 rgd = NULL;
792 if (!list_empty(head))
793 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
794
795out:
796 spin_unlock(&sdp->sd_rindex_spin);
797 return rgd;
798}
799
800/**
801 * recent_rgrp_add - add an RG to tail of "recent" list
802 * @new_rgd: The rgrp to add
803 *
804 */
805
806static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
807{
808 struct gfs2_sbd *sdp = new_rgd->rd_sbd;
809 struct gfs2_rgrpd *rgd;
810 unsigned int count = 0;
811 unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
812
813 spin_lock(&sdp->sd_rindex_spin);
814
815 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
816 if (rgd == new_rgd)
817 goto out;
818
819 if (++count >= max)
820 goto out;
821 }
822 list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
823
824out:
825 spin_unlock(&sdp->sd_rindex_spin);
826}
827
828/**
829 * forward_rgrp_get - get an rgrp to try next from full list
830 * @sdp: The GFS2 superblock
831 *
832 * Returns: The rgrp to try next
833 */
834
835static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
836{
837 struct gfs2_rgrpd *rgd;
838 unsigned int journals = gfs2_jindex_size(sdp);
839 unsigned int rg = 0, x;
840
841 spin_lock(&sdp->sd_rindex_spin);
842
843 rgd = sdp->sd_rindex_forward;
844 if (!rgd) {
845 if (sdp->sd_rgrps >= journals)
846 rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
847
848 for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg;
849 x++, rgd = gfs2_rgrpd_get_next(rgd))
850 /* Do Nothing */;
851
852 sdp->sd_rindex_forward = rgd;
853 }
854
855 spin_unlock(&sdp->sd_rindex_spin);
856
857 return rgd;
858}
859
860/**
861 * forward_rgrp_set - set the forward rgrp pointer
862 * @sdp: the filesystem
863 * @rgd: The new forward rgrp
864 *
865 */
866
867static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
868{
869 spin_lock(&sdp->sd_rindex_spin);
870 sdp->sd_rindex_forward = rgd;
871 spin_unlock(&sdp->sd_rindex_spin);
872}
873
874/**
875 * get_local_rgrp - Choose and lock a rgrp for allocation
876 * @ip: the inode to reserve space for
877 * @rgp: the chosen and locked rgrp
878 *
879 * Try to acquire rgrp in way which avoids contending with others.
880 *
881 * Returns: errno
882 */
883
884static int get_local_rgrp(struct gfs2_inode *ip)
885{
886 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
887 struct gfs2_rgrpd *rgd, *begin = NULL;
888 struct gfs2_alloc *al = &ip->i_alloc;
889 int flags = LM_FLAG_TRY;
890 int skipped = 0;
891 int loops = 0;
892 int error;
893
894 /* Try recently successful rgrps */
895
896 rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
897
898 while (rgd) {
899 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
900 LM_FLAG_TRY, &al->al_rgd_gh);
901 switch (error) {
902 case 0:
903 if (try_rgrp_fit(rgd, al))
904 goto out;
905 gfs2_glock_dq_uninit(&al->al_rgd_gh);
906 rgd = recent_rgrp_next(rgd, 1);
907 break;
908
909 case GLR_TRYFAILED:
910 rgd = recent_rgrp_next(rgd, 0);
911 break;
912
913 default:
914 return error;
915 }
916 }
917
918 /* Go through full list of rgrps */
919
920 begin = rgd = forward_rgrp_get(sdp);
921
922 for (;;) {
923 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
924 &al->al_rgd_gh);
925 switch (error) {
926 case 0:
927 if (try_rgrp_fit(rgd, al))
928 goto out;
929 gfs2_glock_dq_uninit(&al->al_rgd_gh);
930 break;
931
932 case GLR_TRYFAILED:
933 skipped++;
934 break;
935
936 default:
937 return error;
938 }
939
940 rgd = gfs2_rgrpd_get_next(rgd);
941 if (!rgd)
942 rgd = gfs2_rgrpd_get_first(sdp);
943
944 if (rgd == begin) {
945 if (++loops >= 2 || !skipped)
946 return -ENOSPC;
947 flags = 0;
948 }
949 }
950
951out:
952 ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
953
954 if (begin) {
955 recent_rgrp_add(rgd);
956 rgd = gfs2_rgrpd_get_next(rgd);
957 if (!rgd)
958 rgd = gfs2_rgrpd_get_first(sdp);
959 forward_rgrp_set(sdp, rgd);
960 }
961
962 return 0;
963}
964
965/**
966 * gfs2_inplace_reserve_i - Reserve space in the filesystem
967 * @ip: the inode to reserve space for
968 *
969 * Returns: errno
970 */
971
972int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
973{
974 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
975 struct gfs2_alloc *al = &ip->i_alloc;
976 int error;
977
978 if (gfs2_assert_warn(sdp, al->al_requested))
979 return -EINVAL;
980
981 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
982 if (error)
983 return error;
984
985 error = get_local_rgrp(ip);
986 if (error) {
987 gfs2_glock_dq_uninit(&al->al_ri_gh);
988 return error;
989 }
990
991 al->al_file = file;
992 al->al_line = line;
993
994 return 0;
995}
996
997/**
998 * gfs2_inplace_release - release an inplace reservation
999 * @ip: the inode the reservation was taken out on
1000 *
1001 * Release a reservation made by gfs2_inplace_reserve().
1002 */
1003
1004void gfs2_inplace_release(struct gfs2_inode *ip)
1005{
1006 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1007 struct gfs2_alloc *al = &ip->i_alloc;
1008
1009 if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
1010 fs_warn(sdp, "al_alloced = %u, al_requested = %u "
1011 "al_file = %s, al_line = %u\n",
1012 al->al_alloced, al->al_requested, al->al_file,
1013 al->al_line);
1014
1015 al->al_rgd = NULL;
1016 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1017 gfs2_glock_dq_uninit(&al->al_ri_gh);
1018}
1019
1020/**
1021 * gfs2_get_block_type - Check a block in a RG is of given type
1022 * @rgd: the resource group holding the block
1023 * @block: the block number
1024 *
1025 * Returns: The block type (GFS2_BLKST_*)
1026 */
1027
1028unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1029{
1030 struct gfs2_bitmap *bi = NULL;
1031 u32 length, rgrp_block, buf_block;
1032 unsigned int buf;
1033 unsigned char type;
1034
1035 length = rgd->rd_ri.ri_length;
1036 rgrp_block = block - rgd->rd_ri.ri_data0;
1037
1038 for (buf = 0; buf < length; buf++) {
1039 bi = rgd->rd_bits + buf;
1040 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1041 break;
1042 }
1043
1044 gfs2_assert(rgd->rd_sbd, buf < length);
1045 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
1046
1047 type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1048 bi->bi_len, buf_block);
1049
1050 return type;
1051}
1052
1053/**
1054 * rgblk_search - find a block in @old_state, change allocation
1055 * state to @new_state
1056 * @rgd: the resource group descriptor
1057 * @goal: the goal block within the RG (start here to search for avail block)
1058 * @old_state: GFS2_BLKST_XXX the before-allocation state to find
1059 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1060 *
1061 * Walk rgrp's bitmap to find bits that represent a block in @old_state.
1062 * Add the found bitmap buffer to the transaction.
1063 * Set the found bits to @new_state to change block's allocation state.
1064 *
1065 * This function never fails, because we wouldn't call it unless we
1066 * know (from reservation results, etc.) that a block is available.
1067 *
1068 * Scope of @goal and returned block is just within rgrp, not the whole
1069 * filesystem.
1070 *
1071 * Returns: the block number allocated
1072 */
1073
1074static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1075 unsigned char old_state, unsigned char new_state)
1076{
1077 struct gfs2_bitmap *bi = NULL;
1078 u32 length = rgd->rd_ri.ri_length;
1079 u32 blk = 0;
1080 unsigned int buf, x;
1081
1082 /* Find bitmap block that contains bits for goal block */
1083 for (buf = 0; buf < length; buf++) {
1084 bi = rgd->rd_bits + buf;
1085 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1086 break;
1087 }
1088
1089 gfs2_assert(rgd->rd_sbd, buf < length);
1090
1091 /* Convert scope of "goal" from rgrp-wide to within found bit block */
1092 goal -= bi->bi_start * GFS2_NBBY;
1093
1094 /* Search (up to entire) bitmap in this rgrp for allocatable block.
1095 "x <= length", instead of "x < length", because we typically start
1096 the search in the middle of a bit block, but if we can't find an
1097 allocatable block anywhere else, we want to be able wrap around and
1098 search in the first part of our first-searched bit block. */
1099 for (x = 0; x <= length; x++) {
1100 if (bi->bi_clone)
1101 blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
1102 bi->bi_len, goal, old_state);
1103 else
1104 blk = gfs2_bitfit(rgd,
1105 bi->bi_bh->b_data + bi->bi_offset,
1106 bi->bi_len, goal, old_state);
1107 if (blk != BFITNOENT)
1108 break;
1109
1110 /* Try next bitmap block (wrap back to rgrp header if at end) */
1111 buf = (buf + 1) % length;
1112 bi = rgd->rd_bits + buf;
1113 goal = 0;
1114 }
1115
1116 if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
1117 blk = 0;
1118
1119 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1120 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1121 bi->bi_len, blk, new_state);
1122 if (bi->bi_clone)
1123 gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
1124 bi->bi_len, blk, new_state);
1125
1126 return bi->bi_start * GFS2_NBBY + blk;
1127}
1128
1129/**
1130 * rgblk_free - Change alloc state of given block(s)
1131 * @sdp: the filesystem
1132 * @bstart: the start of a run of blocks to free
1133 * @blen: the length of the block run (all must lie within ONE RG!)
1134 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1135 *
1136 * Returns: Resource group containing the block(s)
1137 */
1138
1139static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1140 u32 blen, unsigned char new_state)
1141{
1142 struct gfs2_rgrpd *rgd;
1143 struct gfs2_bitmap *bi = NULL;
1144 u32 length, rgrp_blk, buf_blk;
1145 unsigned int buf;
1146
1147 rgd = gfs2_blk2rgrpd(sdp, bstart);
1148 if (!rgd) {
1149 if (gfs2_consist(sdp))
1150 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1151 return NULL;
1152 }
1153
1154 length = rgd->rd_ri.ri_length;
1155
1156 rgrp_blk = bstart - rgd->rd_ri.ri_data0;
1157
1158 while (blen--) {
1159 for (buf = 0; buf < length; buf++) {
1160 bi = rgd->rd_bits + buf;
1161 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1162 break;
1163 }
1164
1165 gfs2_assert(rgd->rd_sbd, buf < length);
1166
1167 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1168 rgrp_blk++;
1169
1170 if (!bi->bi_clone) {
1171 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1172 GFP_NOFS | __GFP_NOFAIL);
1173 memcpy(bi->bi_clone + bi->bi_offset,
1174 bi->bi_bh->b_data + bi->bi_offset,
1175 bi->bi_len);
1176 }
1177 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1178 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1179 bi->bi_len, buf_blk, new_state);
1180 }
1181
1182 return rgd;
1183}
1184
1185/**
1186 * gfs2_alloc_data - Allocate a data block
1187 * @ip: the inode to allocate the data block for
1188 *
1189 * Returns: the allocated block
1190 */
1191
1192u64 gfs2_alloc_data(struct gfs2_inode *ip)
1193{
1194 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1195 struct gfs2_alloc *al = &ip->i_alloc;
1196 struct gfs2_rgrpd *rgd = al->al_rgd;
1197 u32 goal, blk;
1198 u64 block;
1199
1200 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
1201 goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
1202 else
1203 goal = rgd->rd_last_alloc_data;
1204
1205 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1206 rgd->rd_last_alloc_data = blk;
1207
1208 block = rgd->rd_ri.ri_data0 + blk;
1209 ip->i_di.di_goal_data = block;
1210
1211 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1212 rgd->rd_rg.rg_free--;
1213
1214 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1215 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1216
1217 al->al_alloced++;
1218
1219 gfs2_statfs_change(sdp, 0, -1, 0);
1220 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1221
1222 spin_lock(&sdp->sd_rindex_spin);
1223 rgd->rd_free_clone--;
1224 spin_unlock(&sdp->sd_rindex_spin);
1225
1226 return block;
1227}
1228
1229/**
1230 * gfs2_alloc_meta - Allocate a metadata block
1231 * @ip: the inode to allocate the metadata block for
1232 *
1233 * Returns: the allocated block
1234 */
1235
1236u64 gfs2_alloc_meta(struct gfs2_inode *ip)
1237{
1238 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1239 struct gfs2_alloc *al = &ip->i_alloc;
1240 struct gfs2_rgrpd *rgd = al->al_rgd;
1241 u32 goal, blk;
1242 u64 block;
1243
1244 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
1245 goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
1246 else
1247 goal = rgd->rd_last_alloc_meta;
1248
1249 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1250 rgd->rd_last_alloc_meta = blk;
1251
1252 block = rgd->rd_ri.ri_data0 + blk;
1253 ip->i_di.di_goal_meta = block;
1254
1255 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1256 rgd->rd_rg.rg_free--;
1257
1258 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1259 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1260
1261 al->al_alloced++;
1262
1263 gfs2_statfs_change(sdp, 0, -1, 0);
1264 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1265 gfs2_trans_add_unrevoke(sdp, block);
1266
1267 spin_lock(&sdp->sd_rindex_spin);
1268 rgd->rd_free_clone--;
1269 spin_unlock(&sdp->sd_rindex_spin);
1270
1271 return block;
1272}
1273
1274/**
1275 * gfs2_alloc_di - Allocate a dinode
1276 * @dip: the directory that the inode is going in
1277 *
1278 * Returns: the block allocated
1279 */
1280
1281u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1282{
1283 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1284 struct gfs2_alloc *al = &dip->i_alloc;
1285 struct gfs2_rgrpd *rgd = al->al_rgd;
1286 u32 blk;
1287 u64 block;
1288
1289 blk = rgblk_search(rgd, rgd->rd_last_alloc_meta,
1290 GFS2_BLKST_FREE, GFS2_BLKST_DINODE);
1291
1292 rgd->rd_last_alloc_meta = blk;
1293
1294 block = rgd->rd_ri.ri_data0 + blk;
1295
1296 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1297 rgd->rd_rg.rg_free--;
1298 rgd->rd_rg.rg_dinodes++;
1299 *generation = rgd->rd_rg.rg_igeneration++;
1300 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1301 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1302
1303 al->al_alloced++;
1304
1305 gfs2_statfs_change(sdp, 0, -1, +1);
1306 gfs2_trans_add_unrevoke(sdp, block);
1307
1308 spin_lock(&sdp->sd_rindex_spin);
1309 rgd->rd_free_clone--;
1310 spin_unlock(&sdp->sd_rindex_spin);
1311
1312 return block;
1313}
1314
1315/**
1316 * gfs2_free_data - free a contiguous run of data block(s)
1317 * @ip: the inode these blocks are being freed from
1318 * @bstart: first block of a run of contiguous blocks
1319 * @blen: the length of the block run
1320 *
1321 */
1322
1323void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1324{
1325 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1326 struct gfs2_rgrpd *rgd;
1327
1328 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1329 if (!rgd)
1330 return;
1331
1332 rgd->rd_rg.rg_free += blen;
1333
1334 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1335 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1336
1337 gfs2_trans_add_rg(rgd);
1338
1339 gfs2_statfs_change(sdp, 0, +blen, 0);
1340 gfs2_quota_change(ip, -(s64)blen,
1341 ip->i_di.di_uid, ip->i_di.di_gid);
1342}
1343
1344/**
1345 * gfs2_free_meta - free a contiguous run of data block(s)
1346 * @ip: the inode these blocks are being freed from
1347 * @bstart: first block of a run of contiguous blocks
1348 * @blen: the length of the block run
1349 *
1350 */
1351
1352void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1353{
1354 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1355 struct gfs2_rgrpd *rgd;
1356
1357 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1358 if (!rgd)
1359 return;
1360
1361 rgd->rd_rg.rg_free += blen;
1362
1363 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1364 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1365
1366 gfs2_trans_add_rg(rgd);
1367
1368 gfs2_statfs_change(sdp, 0, +blen, 0);
1369 gfs2_quota_change(ip, -(s64)blen, ip->i_di.di_uid, ip->i_di.di_gid);
1370 gfs2_meta_wipe(ip, bstart, blen);
1371}
1372
1373void gfs2_unlink_di(struct inode *inode)
1374{
1375 struct gfs2_inode *ip = GFS2_I(inode);
1376 struct gfs2_sbd *sdp = GFS2_SB(inode);
1377 struct gfs2_rgrpd *rgd;
1378 u64 blkno = ip->i_num.no_addr;
1379
1380 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1381 if (!rgd)
1382 return;
1383 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1384 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1385 gfs2_trans_add_rg(rgd);
1386}
1387
1388static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1389{
1390 struct gfs2_sbd *sdp = rgd->rd_sbd;
1391 struct gfs2_rgrpd *tmp_rgd;
1392
1393 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1394 if (!tmp_rgd)
1395 return;
1396 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1397
1398 if (!rgd->rd_rg.rg_dinodes)
1399 gfs2_consist_rgrpd(rgd);
1400 rgd->rd_rg.rg_dinodes--;
1401 rgd->rd_rg.rg_free++;
1402
1403 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1404 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1405
1406 gfs2_statfs_change(sdp, 0, +1, -1);
1407 gfs2_trans_add_rg(rgd);
1408}
1409
1410
1411void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1412{
1413 gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
1414 gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
1415 gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
1416}
1417
1418/**
1419 * gfs2_rlist_add - add a RG to a list of RGs
1420 * @sdp: the filesystem
1421 * @rlist: the list of resource groups
1422 * @block: the block
1423 *
1424 * Figure out what RG a block belongs to and add that RG to the list
1425 *
1426 * FIXME: Don't use NOFAIL
1427 *
1428 */
1429
1430void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1431 u64 block)
1432{
1433 struct gfs2_rgrpd *rgd;
1434 struct gfs2_rgrpd **tmp;
1435 unsigned int new_space;
1436 unsigned int x;
1437
1438 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1439 return;
1440
1441 rgd = gfs2_blk2rgrpd(sdp, block);
1442 if (!rgd) {
1443 if (gfs2_consist(sdp))
1444 fs_err(sdp, "block = %llu\n", (unsigned long long)block);
1445 return;
1446 }
1447
1448 for (x = 0; x < rlist->rl_rgrps; x++)
1449 if (rlist->rl_rgd[x] == rgd)
1450 return;
1451
1452 if (rlist->rl_rgrps == rlist->rl_space) {
1453 new_space = rlist->rl_space + 10;
1454
1455 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1456 GFP_NOFS | __GFP_NOFAIL);
1457
1458 if (rlist->rl_rgd) {
1459 memcpy(tmp, rlist->rl_rgd,
1460 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1461 kfree(rlist->rl_rgd);
1462 }
1463
1464 rlist->rl_space = new_space;
1465 rlist->rl_rgd = tmp;
1466 }
1467
1468 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1469}
1470
1471/**
1472 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1473 * and initialize an array of glock holders for them
1474 * @rlist: the list of resource groups
1475 * @state: the lock state to acquire the RG lock in
1476 * @flags: the modifier flags for the holder structures
1477 *
1478 * FIXME: Don't use NOFAIL
1479 *
1480 */
1481
1482void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
1483 int flags)
1484{
1485 unsigned int x;
1486
1487 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1488 GFP_NOFS | __GFP_NOFAIL);
1489 for (x = 0; x < rlist->rl_rgrps; x++)
1490 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1491 state, flags,
1492 &rlist->rl_ghs[x]);
1493}
1494
1495/**
1496 * gfs2_rlist_free - free a resource group list
1497 * @list: the list of resource groups
1498 *
1499 */
1500
1501void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1502{
1503 unsigned int x;
1504
1505 kfree(rlist->rl_rgd);
1506
1507 if (rlist->rl_ghs) {
1508 for (x = 0; x < rlist->rl_rgrps; x++)
1509 gfs2_holder_uninit(&rlist->rl_ghs[x]);
1510 kfree(rlist->rl_ghs);
1511 }
1512}
1513
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
new file mode 100644
index 000000000000..9eedfd12bfff
--- /dev/null
+++ b/fs/gfs2/rgrp.h
@@ -0,0 +1,69 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __RGRP_DOT_H__
11#define __RGRP_DOT_H__
12
13struct gfs2_rgrpd;
14struct gfs2_sbd;
15struct gfs2_holder;
16
17void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
18
19struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk);
20struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
21struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
22
23void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
24int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
25
26int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
27void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
28void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
29
30void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
31
32struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
33static inline void gfs2_alloc_put(struct gfs2_inode *ip)
34{
35 return; /* Se we can see where ip->i_alloc is used */
36}
37
38int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
39 char *file, unsigned int line);
40#define gfs2_inplace_reserve(ip) \
41gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
42
43void gfs2_inplace_release(struct gfs2_inode *ip);
44
45unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
46
47u64 gfs2_alloc_data(struct gfs2_inode *ip);
48u64 gfs2_alloc_meta(struct gfs2_inode *ip);
49u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
50
51void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
52void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
53void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
54void gfs2_unlink_di(struct inode *inode);
55
56struct gfs2_rgrp_list {
57 unsigned int rl_rgrps;
58 unsigned int rl_space;
59 struct gfs2_rgrpd **rl_rgd;
60 struct gfs2_holder *rl_ghs;
61};
62
63void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
64 u64 block);
65void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
66 int flags);
67void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
68
69#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
new file mode 100644
index 000000000000..f6ce5e4eaf7e
--- /dev/null
+++ b/fs/gfs2/super.c
@@ -0,0 +1,976 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/bio.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "bmap.h"
23#include "dir.h"
24#include "glock.h"
25#include "glops.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "recovery.h"
31#include "rgrp.h"
32#include "super.h"
33#include "trans.h"
34#include "util.h"
35
36static const u32 gfs2_old_fs_formats[] = {
37 0
38};
39
40static const u32 gfs2_old_multihost_formats[] = {
41 0
42};
43
44/**
45 * gfs2_tune_init - Fill a gfs2_tune structure with default values
46 * @gt: tune
47 *
48 */
49
50void gfs2_tune_init(struct gfs2_tune *gt)
51{
52 spin_lock_init(&gt->gt_spin);
53
54 gt->gt_ilimit = 100;
55 gt->gt_ilimit_tries = 3;
56 gt->gt_ilimit_min = 1;
57 gt->gt_demote_secs = 300;
58 gt->gt_incore_log_blocks = 1024;
59 gt->gt_log_flush_secs = 60;
60 gt->gt_jindex_refresh_secs = 60;
61 gt->gt_scand_secs = 15;
62 gt->gt_recoverd_secs = 60;
63 gt->gt_logd_secs = 1;
64 gt->gt_quotad_secs = 5;
65 gt->gt_quota_simul_sync = 64;
66 gt->gt_quota_warn_period = 10;
67 gt->gt_quota_scale_num = 1;
68 gt->gt_quota_scale_den = 1;
69 gt->gt_quota_cache_secs = 300;
70 gt->gt_quota_quantum = 60;
71 gt->gt_atime_quantum = 3600;
72 gt->gt_new_files_jdata = 0;
73 gt->gt_new_files_directio = 0;
74 gt->gt_max_atomic_write = 4 << 20;
75 gt->gt_max_readahead = 1 << 18;
76 gt->gt_lockdump_size = 131072;
77 gt->gt_stall_secs = 600;
78 gt->gt_complain_secs = 10;
79 gt->gt_reclaim_limit = 5000;
80 gt->gt_entries_per_readdir = 32;
81 gt->gt_prefetch_secs = 10;
82 gt->gt_greedy_default = HZ / 10;
83 gt->gt_greedy_quantum = HZ / 40;
84 gt->gt_greedy_max = HZ / 4;
85 gt->gt_statfs_quantum = 30;
86 gt->gt_statfs_slow = 0;
87}
88
89/**
90 * gfs2_check_sb - Check superblock
91 * @sdp: the filesystem
92 * @sb: The superblock
93 * @silent: Don't print a message if the check fails
94 *
95 * Checks the version code of the FS is one that we understand how to
96 * read and that the sizes of the various on-disk structures have not
97 * changed.
98 */
99
100int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
101{
102 unsigned int x;
103
104 if (sb->sb_header.mh_magic != GFS2_MAGIC ||
105 sb->sb_header.mh_type != GFS2_METATYPE_SB) {
106 if (!silent)
107 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
108 return -EINVAL;
109 }
110
111 /* If format numbers match exactly, we're done. */
112
113 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
114 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
115 return 0;
116
117 if (sb->sb_fs_format != GFS2_FORMAT_FS) {
118 for (x = 0; gfs2_old_fs_formats[x]; x++)
119 if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
120 break;
121
122 if (!gfs2_old_fs_formats[x]) {
123 printk(KERN_WARNING
124 "GFS2: code version (%u, %u) is incompatible "
125 "with ondisk format (%u, %u)\n",
126 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
127 sb->sb_fs_format, sb->sb_multihost_format);
128 printk(KERN_WARNING
129 "GFS2: I don't know how to upgrade this FS\n");
130 return -EINVAL;
131 }
132 }
133
134 if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
135 for (x = 0; gfs2_old_multihost_formats[x]; x++)
136 if (gfs2_old_multihost_formats[x] ==
137 sb->sb_multihost_format)
138 break;
139
140 if (!gfs2_old_multihost_formats[x]) {
141 printk(KERN_WARNING
142 "GFS2: code version (%u, %u) is incompatible "
143 "with ondisk format (%u, %u)\n",
144 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
145 sb->sb_fs_format, sb->sb_multihost_format);
146 printk(KERN_WARNING
147 "GFS2: I don't know how to upgrade this FS\n");
148 return -EINVAL;
149 }
150 }
151
152 if (!sdp->sd_args.ar_upgrade) {
153 printk(KERN_WARNING
154 "GFS2: code version (%u, %u) is incompatible "
155 "with ondisk format (%u, %u)\n",
156 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
157 sb->sb_fs_format, sb->sb_multihost_format);
158 printk(KERN_INFO
159 "GFS2: Use the \"upgrade\" mount option to upgrade "
160 "the FS\n");
161 printk(KERN_INFO "GFS2: See the manual for more details\n");
162 return -EINVAL;
163 }
164
165 return 0;
166}
167
168
169static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
170{
171 struct page *page = bio->bi_private;
172 if (bio->bi_size)
173 return 1;
174
175 if (!error)
176 SetPageUptodate(page);
177 else
178 printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
179 unlock_page(page);
180 return 0;
181}
182
183static struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
184{
185 struct page *page;
186 struct bio *bio;
187
188 page = alloc_page(GFP_KERNEL);
189 if (unlikely(!page))
190 return NULL;
191
192 ClearPageUptodate(page);
193 ClearPageDirty(page);
194 lock_page(page);
195
196 bio = bio_alloc(GFP_KERNEL, 1);
197 if (unlikely(!bio)) {
198 __free_page(page);
199 return NULL;
200 }
201
202 bio->bi_sector = sector;
203 bio->bi_bdev = sb->s_bdev;
204 bio_add_page(bio, page, PAGE_SIZE, 0);
205
206 bio->bi_end_io = end_bio_io_page;
207 bio->bi_private = page;
208 submit_bio(READ_SYNC, bio);
209 wait_on_page_locked(page);
210 bio_put(bio);
211 if (!PageUptodate(page)) {
212 __free_page(page);
213 return NULL;
214 }
215 return page;
216}
217
218/**
219 * gfs2_read_sb - Read super block
220 * @sdp: The GFS2 superblock
221 * @gl: the glock for the superblock (assumed to be held)
222 * @silent: Don't print message if mount fails
223 *
224 */
225
226int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
227{
228 u32 hash_blocks, ind_blocks, leaf_blocks;
229 u32 tmp_blocks;
230 unsigned int x;
231 int error;
232 struct page *page;
233 char *sb;
234
235 page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
236 if (!page) {
237 if (!silent)
238 fs_err(sdp, "can't read superblock\n");
239 return -EIO;
240 }
241 sb = kmap(page);
242 gfs2_sb_in(&sdp->sd_sb, sb);
243 kunmap(page);
244 __free_page(page);
245
246 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
247 if (error)
248 return error;
249
250 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
251 GFS2_BASIC_BLOCK_SHIFT;
252 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
253 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
254 sizeof(struct gfs2_dinode)) / sizeof(u64);
255 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
256 sizeof(struct gfs2_meta_header)) / sizeof(u64);
257 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
258 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
259 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
260 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
261 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
262 sizeof(struct gfs2_meta_header)) /
263 sizeof(struct gfs2_quota_change);
264
265 /* Compute maximum reservation required to add a entry to a directory */
266
267 hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
268 sdp->sd_jbsize);
269
270 ind_blocks = 0;
271 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
272 tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
273 ind_blocks += tmp_blocks;
274 }
275
276 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
277
278 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
279
280 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
281 sizeof(struct gfs2_dinode);
282 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
283 for (x = 2;; x++) {
284 u64 space, d;
285 u32 m;
286
287 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
288 d = space;
289 m = do_div(d, sdp->sd_inptrs);
290
291 if (d != sdp->sd_heightsize[x - 1] || m)
292 break;
293 sdp->sd_heightsize[x] = space;
294 }
295 sdp->sd_max_height = x;
296 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
297
298 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
299 sizeof(struct gfs2_dinode);
300 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
301 for (x = 2;; x++) {
302 u64 space, d;
303 u32 m;
304
305 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
306 d = space;
307 m = do_div(d, sdp->sd_inptrs);
308
309 if (d != sdp->sd_jheightsize[x - 1] || m)
310 break;
311 sdp->sd_jheightsize[x] = space;
312 }
313 sdp->sd_max_jheight = x;
314 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
315
316 return 0;
317}
318
319/**
320 * gfs2_jindex_hold - Grab a lock on the jindex
321 * @sdp: The GFS2 superblock
322 * @ji_gh: the holder for the jindex glock
323 *
324 * This is very similar to the gfs2_rindex_hold() function, except that
325 * in general we hold the jindex lock for longer periods of time and
326 * we grab it far less frequently (in general) then the rgrp lock.
327 *
328 * Returns: errno
329 */
330
331int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
332{
333 struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
334 struct qstr name;
335 char buf[20];
336 struct gfs2_jdesc *jd;
337 int error;
338
339 name.name = buf;
340
341 mutex_lock(&sdp->sd_jindex_mutex);
342
343 for (;;) {
344 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
345 GL_LOCAL_EXCL, ji_gh);
346 if (error)
347 break;
348
349 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
350 name.hash = gfs2_disk_hash(name.name, name.len);
351
352 error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
353 if (error == -ENOENT) {
354 error = 0;
355 break;
356 }
357
358 gfs2_glock_dq_uninit(ji_gh);
359
360 if (error)
361 break;
362
363 error = -ENOMEM;
364 jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
365 if (!jd)
366 break;
367
368 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
369 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
370 if (!jd->jd_inode)
371 error = -ENOENT;
372 else
373 error = PTR_ERR(jd->jd_inode);
374 kfree(jd);
375 break;
376 }
377
378 spin_lock(&sdp->sd_jindex_spin);
379 jd->jd_jid = sdp->sd_journals++;
380 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
381 spin_unlock(&sdp->sd_jindex_spin);
382 }
383
384 mutex_unlock(&sdp->sd_jindex_mutex);
385
386 return error;
387}
388
389/**
390 * gfs2_jindex_free - Clear all the journal index information
391 * @sdp: The GFS2 superblock
392 *
393 */
394
395void gfs2_jindex_free(struct gfs2_sbd *sdp)
396{
397 struct list_head list;
398 struct gfs2_jdesc *jd;
399
400 spin_lock(&sdp->sd_jindex_spin);
401 list_add(&list, &sdp->sd_jindex_list);
402 list_del_init(&sdp->sd_jindex_list);
403 sdp->sd_journals = 0;
404 spin_unlock(&sdp->sd_jindex_spin);
405
406 while (!list_empty(&list)) {
407 jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
408 list_del(&jd->jd_list);
409 iput(jd->jd_inode);
410 kfree(jd);
411 }
412}
413
414static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
415{
416 struct gfs2_jdesc *jd;
417 int found = 0;
418
419 list_for_each_entry(jd, head, jd_list) {
420 if (jd->jd_jid == jid) {
421 found = 1;
422 break;
423 }
424 }
425
426 if (!found)
427 jd = NULL;
428
429 return jd;
430}
431
432struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
433{
434 struct gfs2_jdesc *jd;
435
436 spin_lock(&sdp->sd_jindex_spin);
437 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
438 spin_unlock(&sdp->sd_jindex_spin);
439
440 return jd;
441}
442
443void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
444{
445 struct gfs2_jdesc *jd;
446
447 spin_lock(&sdp->sd_jindex_spin);
448 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
449 if (jd)
450 jd->jd_dirty = 1;
451 spin_unlock(&sdp->sd_jindex_spin);
452}
453
454struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
455{
456 struct gfs2_jdesc *jd;
457 int found = 0;
458
459 spin_lock(&sdp->sd_jindex_spin);
460
461 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
462 if (jd->jd_dirty) {
463 jd->jd_dirty = 0;
464 found = 1;
465 break;
466 }
467 }
468 spin_unlock(&sdp->sd_jindex_spin);
469
470 if (!found)
471 jd = NULL;
472
473 return jd;
474}
475
476int gfs2_jdesc_check(struct gfs2_jdesc *jd)
477{
478 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
479 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
480 int ar;
481 int error;
482
483 if (ip->i_di.di_size < (8 << 20) || ip->i_di.di_size > (1 << 30) ||
484 (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
485 gfs2_consist_inode(ip);
486 return -EIO;
487 }
488 jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
489
490 error = gfs2_write_alloc_required(ip, 0, ip->i_di.di_size, &ar);
491 if (!error && ar) {
492 gfs2_consist_inode(ip);
493 error = -EIO;
494 }
495
496 return error;
497}
498
499/**
500 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
501 * @sdp: the filesystem
502 *
503 * Returns: errno
504 */
505
506int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
507{
508 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
509 struct gfs2_glock *j_gl = ip->i_gl;
510 struct gfs2_holder t_gh;
511 struct gfs2_log_header head;
512 int error;
513
514 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
515 GL_LOCAL_EXCL, &t_gh);
516 if (error)
517 return error;
518
519 gfs2_meta_cache_flush(ip);
520 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
521
522 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
523 if (error)
524 goto fail;
525
526 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
527 gfs2_consist(sdp);
528 error = -EIO;
529 goto fail;
530 }
531
532 /* Initialize some head of the log stuff */
533 sdp->sd_log_sequence = head.lh_sequence + 1;
534 gfs2_log_pointers_init(sdp, head.lh_blkno);
535
536 error = gfs2_quota_init(sdp);
537 if (error)
538 goto fail;
539
540 set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
541
542 gfs2_glock_dq_uninit(&t_gh);
543
544 return 0;
545
546fail:
547 t_gh.gh_flags |= GL_NOCACHE;
548 gfs2_glock_dq_uninit(&t_gh);
549
550 return error;
551}
552
553/**
554 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
555 * @sdp: the filesystem
556 *
557 * Returns: errno
558 */
559
560int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
561{
562 struct gfs2_holder t_gh;
563 int error;
564
565 gfs2_quota_sync(sdp);
566 gfs2_statfs_sync(sdp);
567
568 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
569 GL_LOCAL_EXCL | GL_NOCACHE,
570 &t_gh);
571 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
572 return error;
573
574 gfs2_meta_syncfs(sdp);
575 gfs2_log_shutdown(sdp);
576
577 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
578
579 if (t_gh.gh_gl)
580 gfs2_glock_dq_uninit(&t_gh);
581
582 gfs2_quota_cleanup(sdp);
583
584 return error;
585}
586
587int gfs2_statfs_init(struct gfs2_sbd *sdp)
588{
589 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
590 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
591 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
592 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
593 struct buffer_head *m_bh, *l_bh;
594 struct gfs2_holder gh;
595 int error;
596
597 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
598 &gh);
599 if (error)
600 return error;
601
602 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
603 if (error)
604 goto out;
605
606 if (sdp->sd_args.ar_spectator) {
607 spin_lock(&sdp->sd_statfs_spin);
608 gfs2_statfs_change_in(m_sc, m_bh->b_data +
609 sizeof(struct gfs2_dinode));
610 spin_unlock(&sdp->sd_statfs_spin);
611 } else {
612 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
613 if (error)
614 goto out_m_bh;
615
616 spin_lock(&sdp->sd_statfs_spin);
617 gfs2_statfs_change_in(m_sc, m_bh->b_data +
618 sizeof(struct gfs2_dinode));
619 gfs2_statfs_change_in(l_sc, l_bh->b_data +
620 sizeof(struct gfs2_dinode));
621 spin_unlock(&sdp->sd_statfs_spin);
622
623 brelse(l_bh);
624 }
625
626out_m_bh:
627 brelse(m_bh);
628out:
629 gfs2_glock_dq_uninit(&gh);
630 return 0;
631}
632
633void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
634 s64 dinodes)
635{
636 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
637 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
638 struct buffer_head *l_bh;
639 int error;
640
641 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
642 if (error)
643 return;
644
645 mutex_lock(&sdp->sd_statfs_mutex);
646 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
647 mutex_unlock(&sdp->sd_statfs_mutex);
648
649 spin_lock(&sdp->sd_statfs_spin);
650 l_sc->sc_total += total;
651 l_sc->sc_free += free;
652 l_sc->sc_dinodes += dinodes;
653 gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
654 spin_unlock(&sdp->sd_statfs_spin);
655
656 brelse(l_bh);
657}
658
659int gfs2_statfs_sync(struct gfs2_sbd *sdp)
660{
661 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
662 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
663 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
664 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
665 struct gfs2_holder gh;
666 struct buffer_head *m_bh, *l_bh;
667 int error;
668
669 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
670 &gh);
671 if (error)
672 return error;
673
674 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
675 if (error)
676 goto out;
677
678 spin_lock(&sdp->sd_statfs_spin);
679 gfs2_statfs_change_in(m_sc, m_bh->b_data +
680 sizeof(struct gfs2_dinode));
681 if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
682 spin_unlock(&sdp->sd_statfs_spin);
683 goto out_bh;
684 }
685 spin_unlock(&sdp->sd_statfs_spin);
686
687 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
688 if (error)
689 goto out_bh;
690
691 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
692 if (error)
693 goto out_bh2;
694
695 mutex_lock(&sdp->sd_statfs_mutex);
696 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
697 mutex_unlock(&sdp->sd_statfs_mutex);
698
699 spin_lock(&sdp->sd_statfs_spin);
700 m_sc->sc_total += l_sc->sc_total;
701 m_sc->sc_free += l_sc->sc_free;
702 m_sc->sc_dinodes += l_sc->sc_dinodes;
703 memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
704 memset(l_bh->b_data + sizeof(struct gfs2_dinode),
705 0, sizeof(struct gfs2_statfs_change));
706 spin_unlock(&sdp->sd_statfs_spin);
707
708 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
709 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
710
711 gfs2_trans_end(sdp);
712
713out_bh2:
714 brelse(l_bh);
715out_bh:
716 brelse(m_bh);
717out:
718 gfs2_glock_dq_uninit(&gh);
719 return error;
720}
721
722/**
723 * gfs2_statfs_i - Do a statfs
724 * @sdp: the filesystem
725 * @sg: the sg structure
726 *
727 * Returns: errno
728 */
729
730int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
731{
732 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
733 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
734
735 spin_lock(&sdp->sd_statfs_spin);
736
737 *sc = *m_sc;
738 sc->sc_total += l_sc->sc_total;
739 sc->sc_free += l_sc->sc_free;
740 sc->sc_dinodes += l_sc->sc_dinodes;
741
742 spin_unlock(&sdp->sd_statfs_spin);
743
744 if (sc->sc_free < 0)
745 sc->sc_free = 0;
746 if (sc->sc_free > sc->sc_total)
747 sc->sc_free = sc->sc_total;
748 if (sc->sc_dinodes < 0)
749 sc->sc_dinodes = 0;
750
751 return 0;
752}
753
754/**
755 * statfs_fill - fill in the sg for a given RG
756 * @rgd: the RG
757 * @sc: the sc structure
758 *
759 * Returns: 0 on success, -ESTALE if the LVB is invalid
760 */
761
762static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
763 struct gfs2_statfs_change *sc)
764{
765 gfs2_rgrp_verify(rgd);
766 sc->sc_total += rgd->rd_ri.ri_data;
767 sc->sc_free += rgd->rd_rg.rg_free;
768 sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
769 return 0;
770}
771
772/**
773 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
774 * @sdp: the filesystem
775 * @sc: the sc info that will be returned
776 *
777 * Any error (other than a signal) will cause this routine to fall back
778 * to the synchronous version.
779 *
780 * FIXME: This really shouldn't busy wait like this.
781 *
782 * Returns: errno
783 */
784
785int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
786{
787 struct gfs2_holder ri_gh;
788 struct gfs2_rgrpd *rgd_next;
789 struct gfs2_holder *gha, *gh;
790 unsigned int slots = 64;
791 unsigned int x;
792 int done;
793 int error = 0, err;
794
795 memset(sc, 0, sizeof(struct gfs2_statfs_change));
796 gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
797 if (!gha)
798 return -ENOMEM;
799
800 error = gfs2_rindex_hold(sdp, &ri_gh);
801 if (error)
802 goto out;
803
804 rgd_next = gfs2_rgrpd_get_first(sdp);
805
806 for (;;) {
807 done = 1;
808
809 for (x = 0; x < slots; x++) {
810 gh = gha + x;
811
812 if (gh->gh_gl && gfs2_glock_poll(gh)) {
813 err = gfs2_glock_wait(gh);
814 if (err) {
815 gfs2_holder_uninit(gh);
816 error = err;
817 } else {
818 if (!error)
819 error = statfs_slow_fill(
820 gh->gh_gl->gl_object, sc);
821 gfs2_glock_dq_uninit(gh);
822 }
823 }
824
825 if (gh->gh_gl)
826 done = 0;
827 else if (rgd_next && !error) {
828 error = gfs2_glock_nq_init(rgd_next->rd_gl,
829 LM_ST_SHARED,
830 GL_ASYNC,
831 gh);
832 rgd_next = gfs2_rgrpd_get_next(rgd_next);
833 done = 0;
834 }
835
836 if (signal_pending(current))
837 error = -ERESTARTSYS;
838 }
839
840 if (done)
841 break;
842
843 yield();
844 }
845
846 gfs2_glock_dq_uninit(&ri_gh);
847
848out:
849 kfree(gha);
850 return error;
851}
852
853struct lfcc {
854 struct list_head list;
855 struct gfs2_holder gh;
856};
857
858/**
859 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
860 * journals are clean
861 * @sdp: the file system
862 * @state: the state to put the transaction lock into
863 * @t_gh: the hold on the transaction lock
864 *
865 * Returns: errno
866 */
867
868static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
869 struct gfs2_holder *t_gh)
870{
871 struct gfs2_inode *ip;
872 struct gfs2_holder ji_gh;
873 struct gfs2_jdesc *jd;
874 struct lfcc *lfcc;
875 LIST_HEAD(list);
876 struct gfs2_log_header lh;
877 int error;
878
879 error = gfs2_jindex_hold(sdp, &ji_gh);
880 if (error)
881 return error;
882
883 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
884 lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
885 if (!lfcc) {
886 error = -ENOMEM;
887 goto out;
888 }
889 ip = GFS2_I(jd->jd_inode);
890 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
891 if (error) {
892 kfree(lfcc);
893 goto out;
894 }
895 list_add(&lfcc->list, &list);
896 }
897
898 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
899 LM_FLAG_PRIORITY | GL_NOCACHE,
900 t_gh);
901
902 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
903 error = gfs2_jdesc_check(jd);
904 if (error)
905 break;
906 error = gfs2_find_jhead(jd, &lh);
907 if (error)
908 break;
909 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
910 error = -EBUSY;
911 break;
912 }
913 }
914
915 if (error)
916 gfs2_glock_dq_uninit(t_gh);
917
918out:
919 while (!list_empty(&list)) {
920 lfcc = list_entry(list.next, struct lfcc, list);
921 list_del(&lfcc->list);
922 gfs2_glock_dq_uninit(&lfcc->gh);
923 kfree(lfcc);
924 }
925 gfs2_glock_dq_uninit(&ji_gh);
926 return error;
927}
928
929/**
930 * gfs2_freeze_fs - freezes the file system
931 * @sdp: the file system
932 *
933 * This function flushes data and meta data for all machines by
934 * aquiring the transaction log exclusively. All journals are
935 * ensured to be in a clean state as well.
936 *
937 * Returns: errno
938 */
939
940int gfs2_freeze_fs(struct gfs2_sbd *sdp)
941{
942 int error = 0;
943
944 mutex_lock(&sdp->sd_freeze_lock);
945
946 if (!sdp->sd_freeze_count++) {
947 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
948 if (error)
949 sdp->sd_freeze_count--;
950 }
951
952 mutex_unlock(&sdp->sd_freeze_lock);
953
954 return error;
955}
956
957/**
958 * gfs2_unfreeze_fs - unfreezes the file system
959 * @sdp: the file system
960 *
961 * This function allows the file system to proceed by unlocking
962 * the exclusively held transaction lock. Other GFS2 nodes are
963 * now free to acquire the lock shared and go on with their lives.
964 *
965 */
966
967void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
968{
969 mutex_lock(&sdp->sd_freeze_lock);
970
971 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
972 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
973
974 mutex_unlock(&sdp->sd_freeze_lock);
975}
976
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
new file mode 100644
index 000000000000..5fa5119cfba6
--- /dev/null
+++ b/fs/gfs2/super.h
@@ -0,0 +1,54 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __SUPER_DOT_H__
11#define __SUPER_DOT_H__
12
13#include "incore.h"
14
15void gfs2_tune_init(struct gfs2_tune *gt);
16
17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
19
20static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
21{
22 unsigned int x;
23 spin_lock(&sdp->sd_jindex_spin);
24 x = sdp->sd_journals;
25 spin_unlock(&sdp->sd_jindex_spin);
26 return x;
27}
28
29int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
30void gfs2_jindex_free(struct gfs2_sbd *sdp);
31
32struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
33void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
34struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
35int gfs2_jdesc_check(struct gfs2_jdesc *jd);
36
37int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
38 struct gfs2_inode **ipp);
39
40int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
41int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
42
43int gfs2_statfs_init(struct gfs2_sbd *sdp);
44void gfs2_statfs_change(struct gfs2_sbd *sdp,
45 s64 total, s64 free, s64 dinodes);
46int gfs2_statfs_sync(struct gfs2_sbd *sdp);
47int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
48int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
49
50int gfs2_freeze_fs(struct gfs2_sbd *sdp);
51void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
52
53#endif /* __SUPER_DOT_H__ */
54
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
new file mode 100644
index 000000000000..0e0ec988f731
--- /dev/null
+++ b/fs/gfs2/sys.c
@@ -0,0 +1,583 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/kobject.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19#include <asm/uaccess.h>
20
21#include "gfs2.h"
22#include "incore.h"
23#include "lm.h"
24#include "sys.h"
25#include "super.h"
26#include "glock.h"
27#include "quota.h"
28#include "util.h"
29
30char *gfs2_sys_margs;
31spinlock_t gfs2_sys_margs_lock;
32
33static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
34{
35 return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id);
36}
37
38static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
39{
40 return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
41}
42
43static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
44{
45 unsigned int count;
46
47 mutex_lock(&sdp->sd_freeze_lock);
48 count = sdp->sd_freeze_count;
49 mutex_unlock(&sdp->sd_freeze_lock);
50
51 return snprintf(buf, PAGE_SIZE, "%u\n", count);
52}
53
54static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
55{
56 ssize_t ret = len;
57 int error = 0;
58 int n = simple_strtol(buf, NULL, 0);
59
60 if (!capable(CAP_SYS_ADMIN))
61 return -EACCES;
62
63 switch (n) {
64 case 0:
65 gfs2_unfreeze_fs(sdp);
66 break;
67 case 1:
68 error = gfs2_freeze_fs(sdp);
69 break;
70 default:
71 ret = -EINVAL;
72 }
73
74 if (error)
75 fs_warn(sdp, "freeze %d error %d", n, error);
76
77 return ret;
78}
79
80static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
81{
82 unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
83 return snprintf(buf, PAGE_SIZE, "%u\n", b);
84}
85
86static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
87{
88 if (!capable(CAP_SYS_ADMIN))
89 return -EACCES;
90
91 if (simple_strtol(buf, NULL, 0) != 1)
92 return -EINVAL;
93
94 gfs2_lm_withdraw(sdp,
95 "GFS2: fsid=%s: withdrawing from cluster at user's request\n",
96 sdp->sd_fsname);
97 return len;
98}
99
100static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
101 size_t len)
102{
103 if (!capable(CAP_SYS_ADMIN))
104 return -EACCES;
105
106 if (simple_strtol(buf, NULL, 0) != 1)
107 return -EINVAL;
108
109 gfs2_statfs_sync(sdp);
110 return len;
111}
112
113static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
114{
115 if (!capable(CAP_SYS_ADMIN))
116 return -EACCES;
117
118 if (simple_strtol(buf, NULL, 0) != 1)
119 return -EINVAL;
120
121 gfs2_gl_hash_clear(sdp, NO_WAIT);
122 return len;
123}
124
125static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
126 size_t len)
127{
128 if (!capable(CAP_SYS_ADMIN))
129 return -EACCES;
130
131 if (simple_strtol(buf, NULL, 0) != 1)
132 return -EINVAL;
133
134 gfs2_quota_sync(sdp);
135 return len;
136}
137
138static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
139 size_t len)
140{
141 u32 id;
142
143 if (!capable(CAP_SYS_ADMIN))
144 return -EACCES;
145
146 id = simple_strtoul(buf, NULL, 0);
147
148 gfs2_quota_refresh(sdp, 1, id);
149 return len;
150}
151
152static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
153 size_t len)
154{
155 u32 id;
156
157 if (!capable(CAP_SYS_ADMIN))
158 return -EACCES;
159
160 id = simple_strtoul(buf, NULL, 0);
161
162 gfs2_quota_refresh(sdp, 0, id);
163 return len;
164}
165
166struct gfs2_attr {
167 struct attribute attr;
168 ssize_t (*show)(struct gfs2_sbd *, char *);
169 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
170};
171
172#define GFS2_ATTR(name, mode, show, store) \
173static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
174
175GFS2_ATTR(id, 0444, id_show, NULL);
176GFS2_ATTR(fsname, 0444, fsname_show, NULL);
177GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
178GFS2_ATTR(shrink, 0200, NULL, shrink_store);
179GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
180GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
181GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
182GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
183GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
184
185static struct attribute *gfs2_attrs[] = {
186 &gfs2_attr_id.attr,
187 &gfs2_attr_fsname.attr,
188 &gfs2_attr_freeze.attr,
189 &gfs2_attr_shrink.attr,
190 &gfs2_attr_withdraw.attr,
191 &gfs2_attr_statfs_sync.attr,
192 &gfs2_attr_quota_sync.attr,
193 &gfs2_attr_quota_refresh_user.attr,
194 &gfs2_attr_quota_refresh_group.attr,
195 NULL,
196};
197
198static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
199 char *buf)
200{
201 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
202 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
203 return a->show ? a->show(sdp, buf) : 0;
204}
205
206static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
207 const char *buf, size_t len)
208{
209 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
210 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
211 return a->store ? a->store(sdp, buf, len) : len;
212}
213
214static struct sysfs_ops gfs2_attr_ops = {
215 .show = gfs2_attr_show,
216 .store = gfs2_attr_store,
217};
218
219static struct kobj_type gfs2_ktype = {
220 .default_attrs = gfs2_attrs,
221 .sysfs_ops = &gfs2_attr_ops,
222};
223
224static struct kset gfs2_kset = {
225 .subsys = &fs_subsys,
226 .kobj = {.name = "gfs2"},
227 .ktype = &gfs2_ktype,
228};
229
230/*
231 * display struct lm_lockstruct fields
232 */
233
234struct lockstruct_attr {
235 struct attribute attr;
236 ssize_t (*show)(struct gfs2_sbd *, char *);
237};
238
239#define LOCKSTRUCT_ATTR(name, fmt) \
240static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
241{ \
242 return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \
243} \
244static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
245
246LOCKSTRUCT_ATTR(jid, "%u\n");
247LOCKSTRUCT_ATTR(first, "%u\n");
248LOCKSTRUCT_ATTR(lvb_size, "%u\n");
249LOCKSTRUCT_ATTR(flags, "%d\n");
250
251static struct attribute *lockstruct_attrs[] = {
252 &lockstruct_attr_jid.attr,
253 &lockstruct_attr_first.attr,
254 &lockstruct_attr_lvb_size.attr,
255 &lockstruct_attr_flags.attr,
256 NULL,
257};
258
259/*
260 * display struct gfs2_args fields
261 */
262
263struct args_attr {
264 struct attribute attr;
265 ssize_t (*show)(struct gfs2_sbd *, char *);
266};
267
268#define ARGS_ATTR(name, fmt) \
269static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
270{ \
271 return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \
272} \
273static struct args_attr args_attr_##name = __ATTR_RO(name)
274
275ARGS_ATTR(lockproto, "%s\n");
276ARGS_ATTR(locktable, "%s\n");
277ARGS_ATTR(hostdata, "%s\n");
278ARGS_ATTR(spectator, "%d\n");
279ARGS_ATTR(ignore_local_fs, "%d\n");
280ARGS_ATTR(localcaching, "%d\n");
281ARGS_ATTR(localflocks, "%d\n");
282ARGS_ATTR(debug, "%d\n");
283ARGS_ATTR(upgrade, "%d\n");
284ARGS_ATTR(num_glockd, "%u\n");
285ARGS_ATTR(posix_acl, "%d\n");
286ARGS_ATTR(quota, "%u\n");
287ARGS_ATTR(suiddir, "%d\n");
288ARGS_ATTR(data, "%d\n");
289
290/* one oddball doesn't fit the macro mold */
291static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
292{
293 return snprintf(buf, PAGE_SIZE, "%d\n",
294 !!test_bit(SDF_NOATIME, &sdp->sd_flags));
295}
296static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
297
298static struct attribute *args_attrs[] = {
299 &args_attr_lockproto.attr,
300 &args_attr_locktable.attr,
301 &args_attr_hostdata.attr,
302 &args_attr_spectator.attr,
303 &args_attr_ignore_local_fs.attr,
304 &args_attr_localcaching.attr,
305 &args_attr_localflocks.attr,
306 &args_attr_debug.attr,
307 &args_attr_upgrade.attr,
308 &args_attr_num_glockd.attr,
309 &args_attr_posix_acl.attr,
310 &args_attr_quota.attr,
311 &args_attr_suiddir.attr,
312 &args_attr_data.attr,
313 &args_attr_noatime.attr,
314 NULL,
315};
316
317/*
318 * display counters from superblock
319 */
320
321struct counters_attr {
322 struct attribute attr;
323 ssize_t (*show)(struct gfs2_sbd *, char *);
324};
325
326#define COUNTERS_ATTR(name, fmt) \
327static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
328{ \
329 return snprintf(buf, PAGE_SIZE, fmt, \
330 (unsigned int)atomic_read(&sdp->sd_##name)); \
331} \
332static struct counters_attr counters_attr_##name = __ATTR_RO(name)
333
334COUNTERS_ATTR(glock_count, "%u\n");
335COUNTERS_ATTR(glock_held_count, "%u\n");
336COUNTERS_ATTR(inode_count, "%u\n");
337COUNTERS_ATTR(reclaimed, "%u\n");
338
339static struct attribute *counters_attrs[] = {
340 &counters_attr_glock_count.attr,
341 &counters_attr_glock_held_count.attr,
342 &counters_attr_inode_count.attr,
343 &counters_attr_reclaimed.attr,
344 NULL,
345};
346
347/*
348 * get and set struct gfs2_tune fields
349 */
350
351static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
352{
353 return snprintf(buf, PAGE_SIZE, "%u %u\n",
354 sdp->sd_tune.gt_quota_scale_num,
355 sdp->sd_tune.gt_quota_scale_den);
356}
357
358static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
359 size_t len)
360{
361 struct gfs2_tune *gt = &sdp->sd_tune;
362 unsigned int x, y;
363
364 if (!capable(CAP_SYS_ADMIN))
365 return -EACCES;
366
367 if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
368 return -EINVAL;
369
370 spin_lock(&gt->gt_spin);
371 gt->gt_quota_scale_num = x;
372 gt->gt_quota_scale_den = y;
373 spin_unlock(&gt->gt_spin);
374 return len;
375}
376
377static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
378 int check_zero, const char *buf, size_t len)
379{
380 struct gfs2_tune *gt = &sdp->sd_tune;
381 unsigned int x;
382
383 if (!capable(CAP_SYS_ADMIN))
384 return -EACCES;
385
386 x = simple_strtoul(buf, NULL, 0);
387
388 if (check_zero && !x)
389 return -EINVAL;
390
391 spin_lock(&gt->gt_spin);
392 *field = x;
393 spin_unlock(&gt->gt_spin);
394 return len;
395}
396
397struct tune_attr {
398 struct attribute attr;
399 ssize_t (*show)(struct gfs2_sbd *, char *);
400 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
401};
402
403#define TUNE_ATTR_3(name, show, store) \
404static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
405
406#define TUNE_ATTR_2(name, store) \
407static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
408{ \
409 return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \
410} \
411TUNE_ATTR_3(name, name##_show, store)
412
413#define TUNE_ATTR(name, check_zero) \
414static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
415{ \
416 return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
417} \
418TUNE_ATTR_2(name, name##_store)
419
420#define TUNE_ATTR_DAEMON(name, process) \
421static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
422{ \
423 ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \
424 wake_up_process(sdp->sd_##process); \
425 return r; \
426} \
427TUNE_ATTR_2(name, name##_store)
428
429TUNE_ATTR(ilimit, 0);
430TUNE_ATTR(ilimit_tries, 0);
431TUNE_ATTR(ilimit_min, 0);
432TUNE_ATTR(demote_secs, 0);
433TUNE_ATTR(incore_log_blocks, 0);
434TUNE_ATTR(log_flush_secs, 0);
435TUNE_ATTR(jindex_refresh_secs, 0);
436TUNE_ATTR(quota_warn_period, 0);
437TUNE_ATTR(quota_quantum, 0);
438TUNE_ATTR(atime_quantum, 0);
439TUNE_ATTR(max_readahead, 0);
440TUNE_ATTR(complain_secs, 0);
441TUNE_ATTR(reclaim_limit, 0);
442TUNE_ATTR(prefetch_secs, 0);
443TUNE_ATTR(statfs_slow, 0);
444TUNE_ATTR(new_files_jdata, 0);
445TUNE_ATTR(new_files_directio, 0);
446TUNE_ATTR(quota_simul_sync, 1);
447TUNE_ATTR(quota_cache_secs, 1);
448TUNE_ATTR(max_atomic_write, 1);
449TUNE_ATTR(stall_secs, 1);
450TUNE_ATTR(entries_per_readdir, 1);
451TUNE_ATTR(greedy_default, 1);
452TUNE_ATTR(greedy_quantum, 1);
453TUNE_ATTR(greedy_max, 1);
454TUNE_ATTR(statfs_quantum, 1);
455TUNE_ATTR_DAEMON(scand_secs, scand_process);
456TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
457TUNE_ATTR_DAEMON(logd_secs, logd_process);
458TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
459TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
460
461static struct attribute *tune_attrs[] = {
462 &tune_attr_ilimit.attr,
463 &tune_attr_ilimit_tries.attr,
464 &tune_attr_ilimit_min.attr,
465 &tune_attr_demote_secs.attr,
466 &tune_attr_incore_log_blocks.attr,
467 &tune_attr_log_flush_secs.attr,
468 &tune_attr_jindex_refresh_secs.attr,
469 &tune_attr_quota_warn_period.attr,
470 &tune_attr_quota_quantum.attr,
471 &tune_attr_atime_quantum.attr,
472 &tune_attr_max_readahead.attr,
473 &tune_attr_complain_secs.attr,
474 &tune_attr_reclaim_limit.attr,
475 &tune_attr_prefetch_secs.attr,
476 &tune_attr_statfs_slow.attr,
477 &tune_attr_quota_simul_sync.attr,
478 &tune_attr_quota_cache_secs.attr,
479 &tune_attr_max_atomic_write.attr,
480 &tune_attr_stall_secs.attr,
481 &tune_attr_entries_per_readdir.attr,
482 &tune_attr_greedy_default.attr,
483 &tune_attr_greedy_quantum.attr,
484 &tune_attr_greedy_max.attr,
485 &tune_attr_statfs_quantum.attr,
486 &tune_attr_scand_secs.attr,
487 &tune_attr_recoverd_secs.attr,
488 &tune_attr_logd_secs.attr,
489 &tune_attr_quotad_secs.attr,
490 &tune_attr_quota_scale.attr,
491 &tune_attr_new_files_jdata.attr,
492 &tune_attr_new_files_directio.attr,
493 NULL,
494};
495
496static struct attribute_group lockstruct_group = {
497 .name = "lockstruct",
498 .attrs = lockstruct_attrs,
499};
500
501static struct attribute_group counters_group = {
502 .name = "counters",
503 .attrs = counters_attrs,
504};
505
506static struct attribute_group args_group = {
507 .name = "args",
508 .attrs = args_attrs,
509};
510
511static struct attribute_group tune_group = {
512 .name = "tune",
513 .attrs = tune_attrs,
514};
515
516int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
517{
518 int error;
519
520 sdp->sd_kobj.kset = &gfs2_kset;
521 sdp->sd_kobj.ktype = &gfs2_ktype;
522
523 error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name);
524 if (error)
525 goto fail;
526
527 error = kobject_register(&sdp->sd_kobj);
528 if (error)
529 goto fail;
530
531 error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
532 if (error)
533 goto fail_reg;
534
535 error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
536 if (error)
537 goto fail_lockstruct;
538
539 error = sysfs_create_group(&sdp->sd_kobj, &args_group);
540 if (error)
541 goto fail_counters;
542
543 error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
544 if (error)
545 goto fail_args;
546
547 return 0;
548
549fail_args:
550 sysfs_remove_group(&sdp->sd_kobj, &args_group);
551fail_counters:
552 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
553fail_lockstruct:
554 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
555fail_reg:
556 kobject_unregister(&sdp->sd_kobj);
557fail:
558 fs_err(sdp, "error %d adding sysfs files", error);
559 return error;
560}
561
562void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
563{
564 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
565 sysfs_remove_group(&sdp->sd_kobj, &args_group);
566 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
567 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
568 kobject_unregister(&sdp->sd_kobj);
569}
570
571int gfs2_sys_init(void)
572{
573 gfs2_sys_margs = NULL;
574 spin_lock_init(&gfs2_sys_margs_lock);
575 return kset_register(&gfs2_kset);
576}
577
578void gfs2_sys_uninit(void)
579{
580 kfree(gfs2_sys_margs);
581 kset_unregister(&gfs2_kset);
582}
583
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
new file mode 100644
index 000000000000..1ca8cdac5304
--- /dev/null
+++ b/fs/gfs2/sys.h
@@ -0,0 +1,27 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __SYS_DOT_H__
11#define __SYS_DOT_H__
12
13#include <linux/spinlock.h>
14struct gfs2_sbd;
15
16/* Allow args to be passed to GFS2 when using an initial ram disk */
17extern char *gfs2_sys_margs;
18extern spinlock_t gfs2_sys_margs_lock;
19
20int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
21void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
22
23int gfs2_sys_init(void);
24void gfs2_sys_uninit(void);
25
26#endif /* __SYS_DOT_H__ */
27
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
new file mode 100644
index 000000000000..f8dabf8446bb
--- /dev/null
+++ b/fs/gfs2/trans.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/kallsyms.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "glock.h"
22#include "log.h"
23#include "lops.h"
24#include "meta_io.h"
25#include "trans.h"
26#include "util.h"
27
28int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
29 unsigned int revokes)
30{
31 struct gfs2_trans *tr;
32 int error;
33
34 BUG_ON(current->journal_info);
35 BUG_ON(blocks == 0 && revokes == 0);
36
37 tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
38 if (!tr)
39 return -ENOMEM;
40
41 tr->tr_ip = (unsigned long)__builtin_return_address(0);
42 tr->tr_blocks = blocks;
43 tr->tr_revokes = revokes;
44 tr->tr_reserved = 1;
45 if (blocks)
46 tr->tr_reserved += 6 + blocks;
47 if (revokes)
48 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
49 sizeof(u64));
50 INIT_LIST_HEAD(&tr->tr_list_buf);
51
52 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
53
54 error = gfs2_glock_nq(&tr->tr_t_gh);
55 if (error)
56 goto fail_holder_uninit;
57
58 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
59 tr->tr_t_gh.gh_flags |= GL_NOCACHE;
60 error = -EROFS;
61 goto fail_gunlock;
62 }
63
64 error = gfs2_log_reserve(sdp, tr->tr_reserved);
65 if (error)
66 goto fail_gunlock;
67
68 current->journal_info = tr;
69
70 return 0;
71
72fail_gunlock:
73 gfs2_glock_dq(&tr->tr_t_gh);
74
75fail_holder_uninit:
76 gfs2_holder_uninit(&tr->tr_t_gh);
77 kfree(tr);
78
79 return error;
80}
81
82void gfs2_trans_end(struct gfs2_sbd *sdp)
83{
84 struct gfs2_trans *tr = current->journal_info;
85
86 BUG_ON(!tr);
87 current->journal_info = NULL;
88
89 if (!tr->tr_touched) {
90 gfs2_log_release(sdp, tr->tr_reserved);
91 gfs2_glock_dq(&tr->tr_t_gh);
92 gfs2_holder_uninit(&tr->tr_t_gh);
93 kfree(tr);
94 return;
95 }
96
97 if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) {
98 fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ",
99 tr->tr_num_buf, tr->tr_blocks);
100 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
101 }
102 if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) {
103 fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ",
104 tr->tr_num_revoke, tr->tr_revokes);
105 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
106 }
107
108 gfs2_log_commit(sdp, tr);
109 gfs2_glock_dq(&tr->tr_t_gh);
110 gfs2_holder_uninit(&tr->tr_t_gh);
111 kfree(tr);
112
113 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
114 gfs2_log_flush(sdp, NULL);
115}
116
117void gfs2_trans_add_gl(struct gfs2_glock *gl)
118{
119 lops_add(gl->gl_sbd, &gl->gl_le);
120}
121
122/**
123 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
124 * @gl: the glock the buffer belongs to
125 * @bh: The buffer to add
126 * @meta: True in the case of adding metadata
127 *
128 */
129
130void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
131{
132 struct gfs2_sbd *sdp = gl->gl_sbd;
133 struct gfs2_bufdata *bd;
134
135 bd = bh->b_private;
136 if (bd)
137 gfs2_assert(sdp, bd->bd_gl == gl);
138 else {
139 gfs2_attach_bufdata(gl, bh, meta);
140 bd = bh->b_private;
141 }
142 lops_add(sdp, &bd->bd_le);
143}
144
145void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno)
146{
147 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
148 GFP_NOFS | __GFP_NOFAIL);
149 lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
150 rv->rv_blkno = blkno;
151 lops_add(sdp, &rv->rv_le);
152}
153
154void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
155{
156 struct gfs2_revoke *rv;
157 int found = 0;
158
159 gfs2_log_lock(sdp);
160
161 list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
162 if (rv->rv_blkno == blkno) {
163 list_del(&rv->rv_le.le_list);
164 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
165 sdp->sd_log_num_revoke--;
166 found = 1;
167 break;
168 }
169 }
170
171 gfs2_log_unlock(sdp);
172
173 if (found) {
174 struct gfs2_trans *tr = current->journal_info;
175 kfree(rv);
176 tr->tr_num_revoke_rm++;
177 }
178}
179
180void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
181{
182 lops_add(rgd->rd_sbd, &rgd->rd_le);
183}
184
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
new file mode 100644
index 000000000000..23d4cbe1de5b
--- /dev/null
+++ b/fs/gfs2/trans.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __TRANS_DOT_H__
11#define __TRANS_DOT_H__
12
13#include <linux/buffer_head.h>
14struct gfs2_sbd;
15struct gfs2_rgrpd;
16struct gfs2_glock;
17
18#define RES_DINODE 1
19#define RES_INDIRECT 1
20#define RES_JDATA 1
21#define RES_DATA 1
22#define RES_LEAF 1
23#define RES_RG_BIT 2
24#define RES_EATTR 1
25#define RES_STATFS 1
26#define RES_QUOTA 2
27
28int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
29 unsigned int revokes);
30
31void gfs2_trans_end(struct gfs2_sbd *sdp);
32
33void gfs2_trans_add_gl(struct gfs2_glock *gl);
34void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno);
36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
38
39#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
new file mode 100644
index 000000000000..196c604faadc
--- /dev/null
+++ b/fs/gfs2/util.c
@@ -0,0 +1,245 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "glock.h"
23#include "lm.h"
24#include "util.h"
25
26kmem_cache_t *gfs2_glock_cachep __read_mostly;
27kmem_cache_t *gfs2_inode_cachep __read_mostly;
28kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
29
30void gfs2_assert_i(struct gfs2_sbd *sdp)
31{
32 printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n",
33 sdp->sd_fsname);
34}
35
36/**
37 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
38 * Returns: -1 if this call withdrew the machine,
39 * -2 if it was already withdrawn
40 */
41
42int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
43 const char *function, char *file, unsigned int line)
44{
45 int me;
46 me = gfs2_lm_withdraw(sdp,
47 "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
48 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
49 sdp->sd_fsname, assertion,
50 sdp->sd_fsname, function, file, line);
51 dump_stack();
52 return (me) ? -1 : -2;
53}
54
55/**
56 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
57 * Returns: -1 if we printed something
58 * -2 if we didn't
59 */
60
61int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
62 const char *function, char *file, unsigned int line)
63{
64 if (time_before(jiffies,
65 sdp->sd_last_warning +
66 gfs2_tune_get(sdp, gt_complain_secs) * HZ))
67 return -2;
68
69 printk(KERN_WARNING
70 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
71 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
72 sdp->sd_fsname, assertion,
73 sdp->sd_fsname, function, file, line);
74
75 if (sdp->sd_args.ar_debug)
76 BUG();
77 else
78 dump_stack();
79
80 sdp->sd_last_warning = jiffies;
81
82 return -1;
83}
84
85/**
86 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
87 * Returns: -1 if this call withdrew the machine,
88 * 0 if it was already withdrawn
89 */
90
91int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
92 char *file, unsigned int line)
93{
94 int rv;
95 rv = gfs2_lm_withdraw(sdp,
96 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
97 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
98 sdp->sd_fsname,
99 sdp->sd_fsname, function, file, line);
100 return rv;
101}
102
103/**
104 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
105 * Returns: -1 if this call withdrew the machine,
106 * 0 if it was already withdrawn
107 */
108
109int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
110 const char *function, char *file, unsigned int line)
111{
112 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
113 int rv;
114 rv = gfs2_lm_withdraw(sdp,
115 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
116 "GFS2: fsid=%s: inode = %llu %llu\n"
117 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
118 sdp->sd_fsname,
119 sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
120 (unsigned long long)ip->i_num.no_addr,
121 sdp->sd_fsname, function, file, line);
122 return rv;
123}
124
125/**
126 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
127 * Returns: -1 if this call withdrew the machine,
128 * 0 if it was already withdrawn
129 */
130
131int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
132 const char *function, char *file, unsigned int line)
133{
134 struct gfs2_sbd *sdp = rgd->rd_sbd;
135 int rv;
136 rv = gfs2_lm_withdraw(sdp,
137 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
138 "GFS2: fsid=%s: RG = %llu\n"
139 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
140 sdp->sd_fsname,
141 sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
142 sdp->sd_fsname, function, file, line);
143 return rv;
144}
145
146/**
147 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
148 * Returns: -1 if this call withdrew the machine,
149 * -2 if it was already withdrawn
150 */
151
152int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
153 const char *type, const char *function, char *file,
154 unsigned int line)
155{
156 int me;
157 me = gfs2_lm_withdraw(sdp,
158 "GFS2: fsid=%s: fatal: invalid metadata block\n"
159 "GFS2: fsid=%s: bh = %llu (%s)\n"
160 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
161 sdp->sd_fsname,
162 sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type,
163 sdp->sd_fsname, function, file, line);
164 return (me) ? -1 : -2;
165}
166
167/**
168 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
169 * Returns: -1 if this call withdrew the machine,
170 * -2 if it was already withdrawn
171 */
172
173int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
174 u16 type, u16 t, const char *function,
175 char *file, unsigned int line)
176{
177 int me;
178 me = gfs2_lm_withdraw(sdp,
179 "GFS2: fsid=%s: fatal: invalid metadata block\n"
180 "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n"
181 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
182 sdp->sd_fsname,
183 sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t,
184 sdp->sd_fsname, function, file, line);
185 return (me) ? -1 : -2;
186}
187
188/**
189 * gfs2_io_error_i - Flag an I/O error and withdraw
190 * Returns: -1 if this call withdrew the machine,
191 * 0 if it was already withdrawn
192 */
193
194int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
195 unsigned int line)
196{
197 int rv;
198 rv = gfs2_lm_withdraw(sdp,
199 "GFS2: fsid=%s: fatal: I/O error\n"
200 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
201 sdp->sd_fsname,
202 sdp->sd_fsname, function, file, line);
203 return rv;
204}
205
206/**
207 * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
208 * Returns: -1 if this call withdrew the machine,
209 * 0 if it was already withdrawn
210 */
211
212int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
213 const char *function, char *file, unsigned int line)
214{
215 int rv;
216 rv = gfs2_lm_withdraw(sdp,
217 "GFS2: fsid=%s: fatal: I/O error\n"
218 "GFS2: fsid=%s: block = %llu\n"
219 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
220 sdp->sd_fsname,
221 sdp->sd_fsname, (unsigned long long)bh->b_blocknr,
222 sdp->sd_fsname, function, file, line);
223 return rv;
224}
225
226void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
227 unsigned int bit, int new_value)
228{
229 unsigned int c, o, b = bit;
230 int old_value;
231
232 c = b / (8 * PAGE_SIZE);
233 b %= 8 * PAGE_SIZE;
234 o = b / 8;
235 b %= 8;
236
237 old_value = (bitmap[c][o] & (1 << b));
238 gfs2_assert_withdraw(sdp, !old_value != !new_value);
239
240 if (new_value)
241 bitmap[c][o] |= 1 << b;
242 else
243 bitmap[c][o] &= ~(1 << b);
244}
245
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
new file mode 100644
index 000000000000..76a50899fe9e
--- /dev/null
+++ b/fs/gfs2/util.h
@@ -0,0 +1,170 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__
12
13#include "incore.h"
14
15#define fs_printk(level, fs, fmt, arg...) \
16 printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg)
17
18#define fs_info(fs, fmt, arg...) \
19 fs_printk(KERN_INFO , fs , fmt , ## arg)
20
21#define fs_warn(fs, fmt, arg...) \
22 fs_printk(KERN_WARNING , fs , fmt , ## arg)
23
24#define fs_err(fs, fmt, arg...) \
25 fs_printk(KERN_ERR, fs , fmt , ## arg)
26
27
28void gfs2_assert_i(struct gfs2_sbd *sdp);
29
30#define gfs2_assert(sdp, assertion) \
31do { \
32 if (unlikely(!(assertion))) { \
33 gfs2_assert_i(sdp); \
34 BUG(); \
35 } \
36} while (0)
37
38
39int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
40 const char *function, char *file, unsigned int line);
41
42#define gfs2_assert_withdraw(sdp, assertion) \
43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
44 __FUNCTION__, __FILE__, __LINE__))
45
46
47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
48 const char *function, char *file, unsigned int line);
49
50#define gfs2_assert_warn(sdp, assertion) \
51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
52 __FUNCTION__, __FILE__, __LINE__))
53
54
55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
56 const char *function, char *file, unsigned int line);
57
58#define gfs2_consist(sdp) \
59gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
60
61
62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
63 const char *function, char *file, unsigned int line);
64
65#define gfs2_consist_inode(ip) \
66gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
67
68
69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
70 const char *function, char *file, unsigned int line);
71
72#define gfs2_consist_rgrpd(rgd) \
73gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
74
75
76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
77 const char *type, const char *function,
78 char *file, unsigned int line);
79
80static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
81 struct buffer_head *bh,
82 const char *function,
83 char *file, unsigned int line)
84{
85 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
86 u32 magic = mh->mh_magic;
87 magic = be32_to_cpu(magic);
88 if (unlikely(magic != GFS2_MAGIC))
89 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
90 file, line);
91 return 0;
92}
93
94#define gfs2_meta_check(sdp, bh) \
95gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
96
97
98int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
99 u16 type, u16 t,
100 const char *function,
101 char *file, unsigned int line);
102
103static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
104 struct buffer_head *bh,
105 u16 type,
106 const char *function,
107 char *file, unsigned int line)
108{
109 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
110 u32 magic = mh->mh_magic;
111 u16 t = be32_to_cpu(mh->mh_type);
112 magic = be32_to_cpu(magic);
113 if (unlikely(magic != GFS2_MAGIC))
114 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
115 file, line);
116 if (unlikely(t != type))
117 return gfs2_metatype_check_ii(sdp, bh, type, t, function,
118 file, line);
119 return 0;
120}
121
122#define gfs2_metatype_check(sdp, bh, type) \
123gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
124
125static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
126 u16 format)
127{
128 struct gfs2_meta_header *mh;
129 mh = (struct gfs2_meta_header *)bh->b_data;
130 mh->mh_type = cpu_to_be32(type);
131 mh->mh_format = cpu_to_be32(format);
132}
133
134
135int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
136 char *file, unsigned int line);
137
138#define gfs2_io_error(sdp) \
139gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
140
141
142int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
143 const char *function, char *file, unsigned int line);
144
145#define gfs2_io_error_bh(sdp, bh) \
146gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
147
148
149extern kmem_cache_t *gfs2_glock_cachep;
150extern kmem_cache_t *gfs2_inode_cachep;
151extern kmem_cache_t *gfs2_bufdata_cachep;
152
153static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
154 unsigned int *p)
155{
156 unsigned int x;
157 spin_lock(&gt->gt_spin);
158 x = *p;
159 spin_unlock(&gt->gt_spin);
160 return x;
161}
162
163#define gfs2_tune_get(sdp, field) \
164gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
165
166void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
167 unsigned int bit, int new_value);
168
169#endif /* __UTIL_DOT_H__ */
170