aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2')
-rw-r--r--fs/gfs2/Kconfig44
-rw-r--r--fs/gfs2/Makefile10
-rw-r--r--fs/gfs2/acl.c309
-rw-r--r--fs/gfs2/acl.h39
-rw-r--r--fs/gfs2/bmap.c1221
-rw-r--r--fs/gfs2/bmap.h31
-rw-r--r--fs/gfs2/daemon.c196
-rw-r--r--fs/gfs2/daemon.h19
-rw-r--r--fs/gfs2/dir.c1961
-rw-r--r--fs/gfs2/dir.h79
-rw-r--r--fs/gfs2/eaops.c230
-rw-r--r--fs/gfs2/eaops.h30
-rw-r--r--fs/gfs2/eattr.c1501
-rw-r--r--fs/gfs2/eattr.h100
-rw-r--r--fs/gfs2/gfs2.h31
-rw-r--r--fs/gfs2/glock.c2231
-rw-r--r--fs/gfs2/glock.h153
-rw-r--r--fs/gfs2/glops.c615
-rw-r--r--fs/gfs2/glops.h25
-rw-r--r--fs/gfs2/incore.h634
-rw-r--r--fs/gfs2/inode.c1379
-rw-r--r--fs/gfs2/inode.h56
-rw-r--r--fs/gfs2/lm.c217
-rw-r--r--fs/gfs2/lm.h42
-rw-r--r--fs/gfs2/locking.c184
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c524
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h187
-rw-r--r--fs/gfs2/locking/dlm/main.c64
-rw-r--r--fs/gfs2/locking/dlm/mount.c255
-rw-r--r--fs/gfs2/locking/dlm/plock.c301
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c226
-rw-r--r--fs/gfs2/locking/dlm/thread.c359
-rw-r--r--fs/gfs2/locking/nolock/Makefile3
-rw-r--r--fs/gfs2/locking/nolock/main.c246
-rw-r--r--fs/gfs2/log.c687
-rw-r--r--fs/gfs2/log.h65
-rw-r--r--fs/gfs2/lops.c809
-rw-r--r--fs/gfs2/lops.h99
-rw-r--r--fs/gfs2/main.c150
-rw-r--r--fs/gfs2/meta_io.c590
-rw-r--r--fs/gfs2/meta_io.h78
-rw-r--r--fs/gfs2/mount.c214
-rw-r--r--fs/gfs2/mount.h17
-rw-r--r--fs/gfs2/ondisk.c308
-rw-r--r--fs/gfs2/ops_address.c790
-rw-r--r--fs/gfs2/ops_address.h22
-rw-r--r--fs/gfs2/ops_dentry.c119
-rw-r--r--fs/gfs2/ops_dentry.h17
-rw-r--r--fs/gfs2/ops_export.c298
-rw-r--r--fs/gfs2/ops_export.h22
-rw-r--r--fs/gfs2/ops_file.c661
-rw-r--r--fs/gfs2/ops_file.h24
-rw-r--r--fs/gfs2/ops_fstype.c928
-rw-r--r--fs/gfs2/ops_fstype.h18
-rw-r--r--fs/gfs2/ops_inode.c1151
-rw-r--r--fs/gfs2/ops_inode.h20
-rw-r--r--fs/gfs2/ops_super.c468
-rw-r--r--fs/gfs2/ops_super.h17
-rw-r--r--fs/gfs2/ops_vm.c184
-rw-r--r--fs/gfs2/ops_vm.h18
-rw-r--r--fs/gfs2/quota.c1227
-rw-r--r--fs/gfs2/quota.h35
-rw-r--r--fs/gfs2/recovery.c570
-rw-r--r--fs/gfs2/recovery.h34
-rw-r--r--fs/gfs2/rgrp.c1513
-rw-r--r--fs/gfs2/rgrp.h69
-rw-r--r--fs/gfs2/super.c976
-rw-r--r--fs/gfs2/super.h55
-rw-r--r--fs/gfs2/sys.c583
-rw-r--r--fs/gfs2/sys.h27
-rw-r--r--fs/gfs2/trans.c184
-rw-r--r--fs/gfs2/trans.h39
-rw-r--r--fs/gfs2/util.c245
-rw-r--r--fs/gfs2/util.h170
75 files changed, 27006 insertions, 0 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
new file mode 100644
index 000000000000..8c27de8b9568
--- /dev/null
+++ b/fs/gfs2/Kconfig
@@ -0,0 +1,44 @@
1config GFS2_FS
2 tristate "GFS2 file system support"
3 depends on EXPERIMENTAL
4 select FS_POSIX_ACL
5 help
6 A cluster filesystem.
7
8 Allows a cluster of computers to simultaneously use a block device
9 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
10 and writes to the block device like a local filesystem, but also uses
11 a lock module to allow the computers coordinate their I/O so
12 filesystem consistency is maintained. One of the nifty features of
13 GFS is perfect consistency -- changes made to the filesystem on one
14 machine show up immediately on all other machines in the cluster.
15
16 To use the GFS2 filesystem, you will need to enable one or more of
17 the below locking modules. Documentation and utilities for GFS2 can
18 be found here: http://sources.redhat.com/cluster
19
20config GFS2_FS_LOCKING_NOLOCK
21 tristate "GFS2 \"nolock\" locking module"
22 depends on GFS2_FS
23 help
24 Single node locking module for GFS2.
25
26 Use this module if you want to use GFS2 on a single node without
27 its clustering features. You can still take advantage of the
28 large file support, and upgrade to running a full cluster later on
29 if required.
30
31 If you will only be using GFS2 in cluster mode, you do not need this
32 module.
33
34config GFS2_FS_LOCKING_DLM
35 tristate "GFS2 DLM locking module"
36 depends on GFS2_FS
37 select DLM
38 help
39 Multiple node locking module for GFS2
40
41 Most users of GFS2 will require this module. It provides the locking
42 interface between GFS2 and the DLM, which is required to use GFS2
43 in a cluster environment.
44
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
new file mode 100644
index 000000000000..e3f1ada643ac
--- /dev/null
+++ b/fs/gfs2/Makefile
@@ -0,0 +1,10 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
3 glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
4 mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
5 ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o util.o
7
8obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
9obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
10
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
new file mode 100644
index 000000000000..5f959b8ce406
--- /dev/null
+++ b/fs/gfs2/acl.c
@@ -0,0 +1,309 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "glock.h"
26#include "inode.h"
27#include "meta_io.h"
28#include "trans.h"
29#include "util.h"
30
31#define ACL_ACCESS 1
32#define ACL_DEFAULT 0
33
34int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
35 struct gfs2_ea_request *er,
36 int *remove, mode_t *mode)
37{
38 struct posix_acl *acl;
39 int error;
40
41 error = gfs2_acl_validate_remove(ip, access);
42 if (error)
43 return error;
44
45 if (!er->er_data)
46 return -EINVAL;
47
48 acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
49 if (IS_ERR(acl))
50 return PTR_ERR(acl);
51 if (!acl) {
52 *remove = 1;
53 return 0;
54 }
55
56 error = posix_acl_valid(acl);
57 if (error)
58 goto out;
59
60 if (access) {
61 error = posix_acl_equiv_mode(acl, mode);
62 if (!error)
63 *remove = 1;
64 else if (error > 0)
65 error = 0;
66 }
67
68out:
69 posix_acl_release(acl);
70 return error;
71}
72
73int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
74{
75 if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl)
76 return -EOPNOTSUPP;
77 if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
78 return -EPERM;
79 if (S_ISLNK(ip->i_di.di_mode))
80 return -EOPNOTSUPP;
81 if (!access && !S_ISDIR(ip->i_di.di_mode))
82 return -EACCES;
83
84 return 0;
85}
86
87static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
88 struct gfs2_ea_location *el, char **data, unsigned int *len)
89{
90 struct gfs2_ea_request er;
91 struct gfs2_ea_location el_this;
92 int error;
93
94 if (!ip->i_di.di_eattr)
95 return 0;
96
97 memset(&er, 0, sizeof(struct gfs2_ea_request));
98 if (access) {
99 er.er_name = GFS2_POSIX_ACL_ACCESS;
100 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
101 } else {
102 er.er_name = GFS2_POSIX_ACL_DEFAULT;
103 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
104 }
105 er.er_type = GFS2_EATYPE_SYS;
106
107 if (!el)
108 el = &el_this;
109
110 error = gfs2_ea_find(ip, &er, el);
111 if (error)
112 return error;
113 if (!el->el_ea)
114 return 0;
115 if (!GFS2_EA_DATA_LEN(el->el_ea))
116 goto out;
117
118 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
119 er.er_data = kmalloc(er.er_data_len, GFP_KERNEL);
120 error = -ENOMEM;
121 if (!er.er_data)
122 goto out;
123
124 error = gfs2_ea_get_copy(ip, el, er.er_data);
125 if (error)
126 goto out_kfree;
127
128 if (acl) {
129 *acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
130 if (IS_ERR(*acl))
131 error = PTR_ERR(*acl);
132 }
133
134out_kfree:
135 if (error || !data)
136 kfree(er.er_data);
137 else {
138 *data = er.er_data;
139 *len = er.er_data_len;
140 }
141out:
142 if (error || el == &el_this)
143 brelse(el->el_bh);
144 return error;
145}
146
147/**
148 * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
149 * @inode: the file we want to do something to
150 * @mask: what we want to do
151 *
152 * Returns: errno
153 */
154
155int gfs2_check_acl_locked(struct inode *inode, int mask)
156{
157 struct posix_acl *acl = NULL;
158 int error;
159
160 error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL);
161 if (error)
162 return error;
163
164 if (acl) {
165 error = posix_acl_permission(inode, acl, mask);
166 posix_acl_release(acl);
167 return error;
168 }
169
170 return -EAGAIN;
171}
172
173int gfs2_check_acl(struct inode *inode, int mask)
174{
175 struct gfs2_inode *ip = GFS2_I(inode);
176 struct gfs2_holder i_gh;
177 int error;
178
179 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
180 if (!error) {
181 error = gfs2_check_acl_locked(inode, mask);
182 gfs2_glock_dq_uninit(&i_gh);
183 }
184
185 return error;
186}
187
188static int munge_mode(struct gfs2_inode *ip, mode_t mode)
189{
190 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
191 struct buffer_head *dibh;
192 int error;
193
194 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
195 if (error)
196 return error;
197
198 error = gfs2_meta_inode_buffer(ip, &dibh);
199 if (!error) {
200 gfs2_assert_withdraw(sdp,
201 (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
202 ip->i_di.di_mode = mode;
203 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
204 gfs2_dinode_out(&ip->i_di, dibh->b_data);
205 brelse(dibh);
206 }
207
208 gfs2_trans_end(sdp);
209
210 return 0;
211}
212
213int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
214{
215 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
216 struct posix_acl *acl = NULL, *clone;
217 struct gfs2_ea_request er;
218 mode_t mode = ip->i_di.di_mode;
219 int error;
220
221 if (!sdp->sd_args.ar_posix_acl)
222 return 0;
223 if (S_ISLNK(ip->i_di.di_mode))
224 return 0;
225
226 memset(&er, 0, sizeof(struct gfs2_ea_request));
227 er.er_type = GFS2_EATYPE_SYS;
228
229 error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
230 &er.er_data, &er.er_data_len);
231 if (error)
232 return error;
233 if (!acl) {
234 mode &= ~current->fs->umask;
235 if (mode != ip->i_di.di_mode)
236 error = munge_mode(ip, mode);
237 return error;
238 }
239
240 clone = posix_acl_clone(acl, GFP_KERNEL);
241 error = -ENOMEM;
242 if (!clone)
243 goto out;
244 posix_acl_release(acl);
245 acl = clone;
246
247 if (S_ISDIR(ip->i_di.di_mode)) {
248 er.er_name = GFS2_POSIX_ACL_DEFAULT;
249 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
250 error = gfs2_system_eaops.eo_set(ip, &er);
251 if (error)
252 goto out;
253 }
254
255 error = posix_acl_create_masq(acl, &mode);
256 if (error < 0)
257 goto out;
258 if (error > 0) {
259 er.er_name = GFS2_POSIX_ACL_ACCESS;
260 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
261 posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
262 er.er_mode = mode;
263 er.er_flags = GFS2_ERF_MODE;
264 error = gfs2_system_eaops.eo_set(ip, &er);
265 if (error)
266 goto out;
267 } else
268 munge_mode(ip, mode);
269
270out:
271 posix_acl_release(acl);
272 kfree(er.er_data);
273 return error;
274}
275
276int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
277{
278 struct posix_acl *acl = NULL, *clone;
279 struct gfs2_ea_location el;
280 char *data;
281 unsigned int len;
282 int error;
283
284 error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
285 if (error)
286 return error;
287 if (!acl)
288 return gfs2_setattr_simple(ip, attr);
289
290 clone = posix_acl_clone(acl, GFP_KERNEL);
291 error = -ENOMEM;
292 if (!clone)
293 goto out;
294 posix_acl_release(acl);
295 acl = clone;
296
297 error = posix_acl_chmod_masq(acl, attr->ia_mode);
298 if (!error) {
299 posix_acl_to_xattr(acl, data, len);
300 error = gfs2_ea_acl_chmod(ip, &el, attr, data);
301 }
302
303out:
304 posix_acl_release(acl);
305 brelse(el.el_bh);
306 kfree(data);
307 return error;
308}
309
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
new file mode 100644
index 000000000000..05c294fe0d78
--- /dev/null
+++ b/fs/gfs2/acl.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __ACL_DOT_H__
11#define __ACL_DOT_H__
12
13#include "incore.h"
14
15#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
16#define GFS2_POSIX_ACL_ACCESS_LEN 16
17#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
18#define GFS2_POSIX_ACL_DEFAULT_LEN 17
19
20#define GFS2_ACL_IS_ACCESS(name, len) \
21 ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \
22 !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len)))
23
24#define GFS2_ACL_IS_DEFAULT(name, len) \
25 ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
26 !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
27
28struct gfs2_ea_request;
29
30int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
31 struct gfs2_ea_request *er,
32 int *remove, mode_t *mode);
33int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
34int gfs2_check_acl_locked(struct inode *inode, int mask);
35int gfs2_check_acl(struct inode *inode, int mask);
36int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
37int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
38
39#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
new file mode 100644
index 000000000000..cc57f2ecd219
--- /dev/null
+++ b/fs/gfs2/bmap.c
@@ -0,0 +1,1221 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "inode.h"
24#include "meta_io.h"
25#include "quota.h"
26#include "rgrp.h"
27#include "trans.h"
28#include "dir.h"
29#include "util.h"
30#include "ops_address.h"
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
37 __u16 mp_list[GFS2_MAX_META_HEIGHT];
38};
39
40typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
41 struct buffer_head *bh, u64 *top,
42 u64 *bottom, unsigned int height,
43 void *data);
44
45struct strip_mine {
46 int sm_first;
47 unsigned int sm_height;
48};
49
50/**
51 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
52 * @ip: the inode
53 * @dibh: the dinode buffer
54 * @block: the block number that was allocated
55 * @private: any locked page held by the caller process
56 *
57 * Returns: errno
58 */
59
60static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
61 u64 block, struct page *page)
62{
63 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
64 struct inode *inode = &ip->i_inode;
65 struct buffer_head *bh;
66 int release = 0;
67
68 if (!page || page->index) {
69 page = grab_cache_page(inode->i_mapping, 0);
70 if (!page)
71 return -ENOMEM;
72 release = 1;
73 }
74
75 if (!PageUptodate(page)) {
76 void *kaddr = kmap(page);
77
78 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
79 ip->i_di.di_size);
80 memset(kaddr + ip->i_di.di_size, 0,
81 PAGE_CACHE_SIZE - ip->i_di.di_size);
82 kunmap(page);
83
84 SetPageUptodate(page);
85 }
86
87 if (!page_has_buffers(page))
88 create_empty_buffers(page, 1 << inode->i_blkbits,
89 (1 << BH_Uptodate));
90
91 bh = page_buffers(page);
92
93 if (!buffer_mapped(bh))
94 map_bh(bh, inode->i_sb, block);
95
96 set_buffer_uptodate(bh);
97 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
98 gfs2_trans_add_bh(ip->i_gl, bh, 0);
99 mark_buffer_dirty(bh);
100
101 if (release) {
102 unlock_page(page);
103 page_cache_release(page);
104 }
105
106 return 0;
107}
108
109/**
110 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
111 * @ip: The GFS2 inode to unstuff
112 * @unstuffer: the routine that handles unstuffing a non-zero length file
113 * @private: private data for the unstuffer
114 *
115 * This routine unstuffs a dinode and returns it to a "normal" state such
116 * that the height can be grown in the traditional way.
117 *
118 * Returns: errno
119 */
120
121int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
122{
123 struct buffer_head *bh, *dibh;
124 struct gfs2_dinode *di;
125 u64 block = 0;
126 int isdir = gfs2_is_dir(ip);
127 int error;
128
129 down_write(&ip->i_rw_mutex);
130
131 error = gfs2_meta_inode_buffer(ip, &dibh);
132 if (error)
133 goto out;
134
135 if (ip->i_di.di_size) {
136 /* Get a free block, fill it with the stuffed data,
137 and write it out to disk */
138
139 if (isdir) {
140 block = gfs2_alloc_meta(ip);
141
142 error = gfs2_dir_get_new_buffer(ip, block, &bh);
143 if (error)
144 goto out_brelse;
145 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
146 dibh, sizeof(struct gfs2_dinode));
147 brelse(bh);
148 } else {
149 block = gfs2_alloc_data(ip);
150
151 error = gfs2_unstuffer_page(ip, dibh, block, page);
152 if (error)
153 goto out_brelse;
154 }
155 }
156
157 /* Set up the pointer to the new block */
158
159 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
160 di = (struct gfs2_dinode *)dibh->b_data;
161 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
162
163 if (ip->i_di.di_size) {
164 *(__be64 *)(di + 1) = cpu_to_be64(block);
165 ip->i_di.di_blocks++;
166 di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
167 }
168
169 ip->i_di.di_height = 1;
170 di->di_height = cpu_to_be16(1);
171
172out_brelse:
173 brelse(dibh);
174out:
175 up_write(&ip->i_rw_mutex);
176 return error;
177}
178
179/**
180 * calc_tree_height - Calculate the height of a metadata tree
181 * @ip: The GFS2 inode
182 * @size: The proposed size of the file
183 *
184 * Work out how tall a metadata tree needs to be in order to accommodate a
185 * file of a particular size. If size is less than the current size of
186 * the inode, then the current size of the inode is used instead of the
187 * supplied one.
188 *
189 * Returns: the height the tree should be
190 */
191
192static unsigned int calc_tree_height(struct gfs2_inode *ip, u64 size)
193{
194 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
195 u64 *arr;
196 unsigned int max, height;
197
198 if (ip->i_di.di_size > size)
199 size = ip->i_di.di_size;
200
201 if (gfs2_is_dir(ip)) {
202 arr = sdp->sd_jheightsize;
203 max = sdp->sd_max_jheight;
204 } else {
205 arr = sdp->sd_heightsize;
206 max = sdp->sd_max_height;
207 }
208
209 for (height = 0; height < max; height++)
210 if (arr[height] >= size)
211 break;
212
213 return height;
214}
215
216/**
217 * build_height - Build a metadata tree of the requested height
218 * @ip: The GFS2 inode
219 * @height: The height to build to
220 *
221 *
222 * Returns: errno
223 */
224
225static int build_height(struct inode *inode, unsigned height)
226{
227 struct gfs2_inode *ip = GFS2_I(inode);
228 unsigned new_height = height - ip->i_di.di_height;
229 struct buffer_head *dibh;
230 struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
231 struct gfs2_dinode *di;
232 int error;
233 u64 *bp;
234 u64 bn;
235 unsigned n;
236
237 if (height <= ip->i_di.di_height)
238 return 0;
239
240 error = gfs2_meta_inode_buffer(ip, &dibh);
241 if (error)
242 return error;
243
244 for(n = 0; n < new_height; n++) {
245 bn = gfs2_alloc_meta(ip);
246 blocks[n] = gfs2_meta_new(ip->i_gl, bn);
247 gfs2_trans_add_bh(ip->i_gl, blocks[n], 1);
248 }
249
250 n = 0;
251 bn = blocks[0]->b_blocknr;
252 if (new_height > 1) {
253 for(; n < new_height-1; n++) {
254 gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN,
255 GFS2_FORMAT_IN);
256 gfs2_buffer_clear_tail(blocks[n],
257 sizeof(struct gfs2_meta_header));
258 bp = (u64 *)(blocks[n]->b_data +
259 sizeof(struct gfs2_meta_header));
260 *bp = cpu_to_be64(blocks[n+1]->b_blocknr);
261 brelse(blocks[n]);
262 blocks[n] = NULL;
263 }
264 }
265 gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
266 gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header),
267 dibh, sizeof(struct gfs2_dinode));
268 brelse(blocks[n]);
269 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
270 di = (struct gfs2_dinode *)dibh->b_data;
271 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
272 *(__be64 *)(di + 1) = cpu_to_be64(bn);
273 ip->i_di.di_height += new_height;
274 ip->i_di.di_blocks += new_height;
275 di->di_height = cpu_to_be16(ip->i_di.di_height);
276 di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
277 brelse(dibh);
278 return error;
279}
280
281/**
282 * find_metapath - Find path through the metadata tree
283 * @ip: The inode pointer
284 * @mp: The metapath to return the result in
285 * @block: The disk block to look up
286 *
287 * This routine returns a struct metapath structure that defines a path
288 * through the metadata of inode "ip" to get to block "block".
289 *
290 * Example:
291 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
292 * filesystem with a blocksize of 4096.
293 *
294 * find_metapath() would return a struct metapath structure set to:
295 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
296 * and mp_list[2] = 165.
297 *
298 * That means that in order to get to the block containing the byte at
299 * offset 101342453, we would load the indirect block pointed to by pointer
300 * 0 in the dinode. We would then load the indirect block pointed to by
301 * pointer 48 in that indirect block. We would then load the data block
302 * pointed to by pointer 165 in that indirect block.
303 *
304 * ----------------------------------------
305 * | Dinode | |
306 * | | 4|
307 * | |0 1 2 3 4 5 9|
308 * | | 6|
309 * ----------------------------------------
310 * |
311 * |
312 * V
313 * ----------------------------------------
314 * | Indirect Block |
315 * | 5|
316 * | 4 4 4 4 4 5 5 1|
317 * |0 5 6 7 8 9 0 1 2|
318 * ----------------------------------------
319 * |
320 * |
321 * V
322 * ----------------------------------------
323 * | Indirect Block |
324 * | 1 1 1 1 1 5|
325 * | 6 6 6 6 6 1|
326 * |0 3 4 5 6 7 2|
327 * ----------------------------------------
328 * |
329 * |
330 * V
331 * ----------------------------------------
332 * | Data block containing offset |
333 * | 101342453 |
334 * | |
335 * | |
336 * ----------------------------------------
337 *
338 */
339
340static void find_metapath(struct gfs2_inode *ip, u64 block,
341 struct metapath *mp)
342{
343 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
344 u64 b = block;
345 unsigned int i;
346
347 for (i = ip->i_di.di_height; i--;)
348 mp->mp_list[i] = do_div(b, sdp->sd_inptrs);
349
350}
351
352/**
353 * metapointer - Return pointer to start of metadata in a buffer
354 * @bh: The buffer
355 * @height: The metadata height (0 = dinode)
356 * @mp: The metapath
357 *
358 * Return a pointer to the block number of the next height of the metadata
359 * tree given a buffer containing the pointer to the current height of the
360 * metadata tree.
361 */
362
363static inline u64 *metapointer(struct buffer_head *bh, int *boundary,
364 unsigned int height, const struct metapath *mp)
365{
366 unsigned int head_size = (height > 0) ?
367 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
368 u64 *ptr;
369 *boundary = 0;
370 ptr = ((u64 *)(bh->b_data + head_size)) + mp->mp_list[height];
371 if (ptr + 1 == (u64 *)(bh->b_data + bh->b_size))
372 *boundary = 1;
373 return ptr;
374}
375
376/**
377 * lookup_block - Get the next metadata block in metadata tree
378 * @ip: The GFS2 inode
379 * @bh: Buffer containing the pointers to metadata blocks
380 * @height: The height of the tree (0 = dinode)
381 * @mp: The metapath
382 * @create: Non-zero if we may create a new meatdata block
383 * @new: Used to indicate if we did create a new metadata block
384 * @block: the returned disk block number
385 *
386 * Given a metatree, complete to a particular height, checks to see if the next
387 * height of the tree exists. If not the next height of the tree is created.
388 * The block number of the next height of the metadata tree is returned.
389 *
390 */
391
392static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
393 unsigned int height, struct metapath *mp, int create,
394 int *new, u64 *block)
395{
396 int boundary;
397 u64 *ptr = metapointer(bh, &boundary, height, mp);
398
399 if (*ptr) {
400 *block = be64_to_cpu(*ptr);
401 return boundary;
402 }
403
404 *block = 0;
405
406 if (!create)
407 return 0;
408
409 if (height == ip->i_di.di_height - 1 && !gfs2_is_dir(ip))
410 *block = gfs2_alloc_data(ip);
411 else
412 *block = gfs2_alloc_meta(ip);
413
414 gfs2_trans_add_bh(ip->i_gl, bh, 1);
415
416 *ptr = cpu_to_be64(*block);
417 ip->i_di.di_blocks++;
418
419 *new = 1;
420 return 0;
421}
422
423/**
424 * gfs2_block_pointers - Map a block from an inode to a disk block
425 * @inode: The inode
426 * @lblock: The logical block number
427 * @map_bh: The bh to be mapped
428 * @mp: metapath to use
429 *
430 * Find the block number on the current device which corresponds to an
431 * inode's block. If the block had to be created, "new" will be set.
432 *
433 * Returns: errno
434 */
435
436static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
437 struct buffer_head *bh_map, struct metapath *mp,
438 unsigned int maxlen)
439{
440 struct gfs2_inode *ip = GFS2_I(inode);
441 struct gfs2_sbd *sdp = GFS2_SB(inode);
442 struct buffer_head *bh;
443 unsigned int bsize;
444 unsigned int height;
445 unsigned int end_of_metadata;
446 unsigned int x;
447 int error = 0;
448 int new = 0;
449 u64 dblock = 0;
450 int boundary;
451
452 BUG_ON(maxlen == 0);
453
454 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
455 return 0;
456
457 bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
458
459 height = calc_tree_height(ip, (lblock + 1) * bsize);
460 if (ip->i_di.di_height < height) {
461 if (!create)
462 return 0;
463
464 error = build_height(inode, height);
465 if (error)
466 return error;
467 }
468
469 find_metapath(ip, lblock, mp);
470 end_of_metadata = ip->i_di.di_height - 1;
471
472 error = gfs2_meta_inode_buffer(ip, &bh);
473 if (error)
474 return error;
475
476 for (x = 0; x < end_of_metadata; x++) {
477 lookup_block(ip, bh, x, mp, create, &new, &dblock);
478 brelse(bh);
479 if (!dblock)
480 return 0;
481
482 error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh);
483 if (error)
484 return error;
485 }
486
487 boundary = lookup_block(ip, bh, end_of_metadata, mp, create, &new, &dblock);
488 clear_buffer_mapped(bh_map);
489 clear_buffer_new(bh_map);
490 clear_buffer_boundary(bh_map);
491
492 if (dblock) {
493 map_bh(bh_map, inode->i_sb, dblock);
494 if (boundary)
495 set_buffer_boundary(bh);
496 if (new) {
497 struct buffer_head *dibh;
498 error = gfs2_meta_inode_buffer(ip, &dibh);
499 if (!error) {
500 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
501 gfs2_dinode_out(&ip->i_di, dibh->b_data);
502 brelse(dibh);
503 }
504 set_buffer_new(bh_map);
505 goto out_brelse;
506 }
507 while(--maxlen && !buffer_boundary(bh_map)) {
508 u64 eblock;
509
510 mp->mp_list[end_of_metadata]++;
511 boundary = lookup_block(ip, bh, end_of_metadata, mp, 0, &new, &eblock);
512 if (eblock != ++dblock)
513 break;
514 bh_map->b_size += (1 << inode->i_blkbits);
515 if (boundary)
516 set_buffer_boundary(bh_map);
517 }
518 }
519out_brelse:
520 brelse(bh);
521 return 0;
522}
523
524
525static inline void bmap_lock(struct inode *inode, int create)
526{
527 struct gfs2_inode *ip = GFS2_I(inode);
528 if (create)
529 down_write(&ip->i_rw_mutex);
530 else
531 down_read(&ip->i_rw_mutex);
532}
533
534static inline void bmap_unlock(struct inode *inode, int create)
535{
536 struct gfs2_inode *ip = GFS2_I(inode);
537 if (create)
538 up_write(&ip->i_rw_mutex);
539 else
540 up_read(&ip->i_rw_mutex);
541}
542
543int gfs2_block_map(struct inode *inode, u64 lblock, int create,
544 struct buffer_head *bh, unsigned int maxlen)
545{
546 struct metapath mp;
547 int ret;
548
549 bmap_lock(inode, create);
550 ret = gfs2_block_pointers(inode, lblock, create, bh, &mp, maxlen);
551 bmap_unlock(inode, create);
552 return ret;
553}
554
555int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
556{
557 struct metapath mp;
558 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0, .b_size = 0 };
559 int ret;
560 int create = *new;
561
562 BUG_ON(!extlen);
563 BUG_ON(!dblock);
564 BUG_ON(!new);
565
566 bmap_lock(inode, create);
567 ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp, 32);
568 bmap_unlock(inode, create);
569 *extlen = bh.b_size >> inode->i_blkbits;
570 *dblock = bh.b_blocknr;
571 if (buffer_new(&bh))
572 *new = 1;
573 else
574 *new = 0;
575 return ret;
576}
577
578/**
579 * recursive_scan - recursively scan through the end of a file
580 * @ip: the inode
581 * @dibh: the dinode buffer
582 * @mp: the path through the metadata to the point to start
583 * @height: the height the recursion is at
584 * @block: the indirect block to look at
585 * @first: 1 if this is the first block
586 * @bc: the call to make for each piece of metadata
587 * @data: data opaque to this function to pass to @bc
588 *
589 * When this is first called @height and @block should be zero and
590 * @first should be 1.
591 *
592 * Returns: errno
593 */
594
595static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
596 struct metapath *mp, unsigned int height,
597 u64 block, int first, block_call_t bc,
598 void *data)
599{
600 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
601 struct buffer_head *bh = NULL;
602 u64 *top, *bottom;
603 u64 bn;
604 int error;
605 int mh_size = sizeof(struct gfs2_meta_header);
606
607 if (!height) {
608 error = gfs2_meta_inode_buffer(ip, &bh);
609 if (error)
610 return error;
611 dibh = bh;
612
613 top = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
614 bottom = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
615 } else {
616 error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
617 if (error)
618 return error;
619
620 top = (u64 *)(bh->b_data + mh_size) +
621 (first ? mp->mp_list[height] : 0);
622
623 bottom = (u64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
624 }
625
626 error = bc(ip, dibh, bh, top, bottom, height, data);
627 if (error)
628 goto out;
629
630 if (height < ip->i_di.di_height - 1)
631 for (; top < bottom; top++, first = 0) {
632 if (!*top)
633 continue;
634
635 bn = be64_to_cpu(*top);
636
637 error = recursive_scan(ip, dibh, mp, height + 1, bn,
638 first, bc, data);
639 if (error)
640 break;
641 }
642
643out:
644 brelse(bh);
645 return error;
646}
647
648/**
649 * do_strip - Look for a layer a particular layer of the file and strip it off
650 * @ip: the inode
651 * @dibh: the dinode buffer
652 * @bh: A buffer of pointers
653 * @top: The first pointer in the buffer
654 * @bottom: One more than the last pointer
655 * @height: the height this buffer is at
656 * @data: a pointer to a struct strip_mine
657 *
658 * Returns: errno
659 */
660
661static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
662 struct buffer_head *bh, u64 *top, u64 *bottom,
663 unsigned int height, void *data)
664{
665 struct strip_mine *sm = data;
666 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
667 struct gfs2_rgrp_list rlist;
668 u64 bn, bstart;
669 u32 blen;
670 u64 *p;
671 unsigned int rg_blocks = 0;
672 int metadata;
673 unsigned int revokes = 0;
674 int x;
675 int error;
676
677 if (!*top)
678 sm->sm_first = 0;
679
680 if (height != sm->sm_height)
681 return 0;
682
683 if (sm->sm_first) {
684 top++;
685 sm->sm_first = 0;
686 }
687
688 metadata = (height != ip->i_di.di_height - 1);
689 if (metadata)
690 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
691
692 error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
693 if (error)
694 return error;
695
696 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
697 bstart = 0;
698 blen = 0;
699
700 for (p = top; p < bottom; p++) {
701 if (!*p)
702 continue;
703
704 bn = be64_to_cpu(*p);
705
706 if (bstart + blen == bn)
707 blen++;
708 else {
709 if (bstart)
710 gfs2_rlist_add(sdp, &rlist, bstart);
711
712 bstart = bn;
713 blen = 1;
714 }
715 }
716
717 if (bstart)
718 gfs2_rlist_add(sdp, &rlist, bstart);
719 else
720 goto out; /* Nothing to do */
721
722 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
723
724 for (x = 0; x < rlist.rl_rgrps; x++) {
725 struct gfs2_rgrpd *rgd;
726 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
727 rg_blocks += rgd->rd_ri.ri_length;
728 }
729
730 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
731 if (error)
732 goto out_rlist;
733
734 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
735 RES_INDIRECT + RES_STATFS + RES_QUOTA,
736 revokes);
737 if (error)
738 goto out_rg_gunlock;
739
740 down_write(&ip->i_rw_mutex);
741
742 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
743 gfs2_trans_add_bh(ip->i_gl, bh, 1);
744
745 bstart = 0;
746 blen = 0;
747
748 for (p = top; p < bottom; p++) {
749 if (!*p)
750 continue;
751
752 bn = be64_to_cpu(*p);
753
754 if (bstart + blen == bn)
755 blen++;
756 else {
757 if (bstart) {
758 if (metadata)
759 gfs2_free_meta(ip, bstart, blen);
760 else
761 gfs2_free_data(ip, bstart, blen);
762 }
763
764 bstart = bn;
765 blen = 1;
766 }
767
768 *p = 0;
769 if (!ip->i_di.di_blocks)
770 gfs2_consist_inode(ip);
771 ip->i_di.di_blocks--;
772 }
773 if (bstart) {
774 if (metadata)
775 gfs2_free_meta(ip, bstart, blen);
776 else
777 gfs2_free_data(ip, bstart, blen);
778 }
779
780 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
781
782 gfs2_dinode_out(&ip->i_di, dibh->b_data);
783
784 up_write(&ip->i_rw_mutex);
785
786 gfs2_trans_end(sdp);
787
788out_rg_gunlock:
789 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
790out_rlist:
791 gfs2_rlist_free(&rlist);
792out:
793 gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
794 return error;
795}
796
797/**
798 * do_grow - Make a file look bigger than it is
799 * @ip: the inode
800 * @size: the size to set the file to
801 *
802 * Called with an exclusive lock on @ip.
803 *
804 * Returns: errno
805 */
806
807static int do_grow(struct gfs2_inode *ip, u64 size)
808{
809 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
810 struct gfs2_alloc *al;
811 struct buffer_head *dibh;
812 unsigned int h;
813 int error;
814
815 al = gfs2_alloc_get(ip);
816
817 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
818 if (error)
819 goto out;
820
821 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
822 if (error)
823 goto out_gunlock_q;
824
825 al->al_requested = sdp->sd_max_height + RES_DATA;
826
827 error = gfs2_inplace_reserve(ip);
828 if (error)
829 goto out_gunlock_q;
830
831 error = gfs2_trans_begin(sdp,
832 sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
833 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
834 if (error)
835 goto out_ipres;
836
837 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
838 if (gfs2_is_stuffed(ip)) {
839 error = gfs2_unstuff_dinode(ip, NULL);
840 if (error)
841 goto out_end_trans;
842 }
843
844 h = calc_tree_height(ip, size);
845 if (ip->i_di.di_height < h) {
846 down_write(&ip->i_rw_mutex);
847 error = build_height(&ip->i_inode, h);
848 up_write(&ip->i_rw_mutex);
849 if (error)
850 goto out_end_trans;
851 }
852 }
853
854 ip->i_di.di_size = size;
855 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
856
857 error = gfs2_meta_inode_buffer(ip, &dibh);
858 if (error)
859 goto out_end_trans;
860
861 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
862 gfs2_dinode_out(&ip->i_di, dibh->b_data);
863 brelse(dibh);
864
865out_end_trans:
866 gfs2_trans_end(sdp);
867out_ipres:
868 gfs2_inplace_release(ip);
869out_gunlock_q:
870 gfs2_quota_unlock(ip);
871out:
872 gfs2_alloc_put(ip);
873 return error;
874}
875
876
877/**
878 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
879 *
880 * This is partly borrowed from ext3.
881 */
882static int gfs2_block_truncate_page(struct address_space *mapping)
883{
884 struct inode *inode = mapping->host;
885 struct gfs2_inode *ip = GFS2_I(inode);
886 struct gfs2_sbd *sdp = GFS2_SB(inode);
887 loff_t from = inode->i_size;
888 unsigned long index = from >> PAGE_CACHE_SHIFT;
889 unsigned offset = from & (PAGE_CACHE_SIZE-1);
890 unsigned blocksize, iblock, length, pos;
891 struct buffer_head *bh;
892 struct page *page;
893 void *kaddr;
894 int err;
895
896 page = grab_cache_page(mapping, index);
897 if (!page)
898 return 0;
899
900 blocksize = inode->i_sb->s_blocksize;
901 length = blocksize - (offset & (blocksize - 1));
902 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
903
904 if (!page_has_buffers(page))
905 create_empty_buffers(page, blocksize, 0);
906
907 /* Find the buffer that contains "offset" */
908 bh = page_buffers(page);
909 pos = blocksize;
910 while (offset >= pos) {
911 bh = bh->b_this_page;
912 iblock++;
913 pos += blocksize;
914 }
915
916 err = 0;
917
918 if (!buffer_mapped(bh)) {
919 gfs2_get_block(inode, iblock, bh, 0);
920 /* unmapped? It's a hole - nothing to do */
921 if (!buffer_mapped(bh))
922 goto unlock;
923 }
924
925 /* Ok, it's mapped. Make sure it's up-to-date */
926 if (PageUptodate(page))
927 set_buffer_uptodate(bh);
928
929 if (!buffer_uptodate(bh)) {
930 err = -EIO;
931 ll_rw_block(READ, 1, &bh);
932 wait_on_buffer(bh);
933 /* Uhhuh. Read error. Complain and punt. */
934 if (!buffer_uptodate(bh))
935 goto unlock;
936 }
937
938 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
939 gfs2_trans_add_bh(ip->i_gl, bh, 0);
940
941 kaddr = kmap_atomic(page, KM_USER0);
942 memset(kaddr + offset, 0, length);
943 flush_dcache_page(page);
944 kunmap_atomic(kaddr, KM_USER0);
945
946unlock:
947 unlock_page(page);
948 page_cache_release(page);
949 return err;
950}
951
952static int trunc_start(struct gfs2_inode *ip, u64 size)
953{
954 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
955 struct buffer_head *dibh;
956 int journaled = gfs2_is_jdata(ip);
957 int error;
958
959 error = gfs2_trans_begin(sdp,
960 RES_DINODE + (journaled ? RES_JDATA : 0), 0);
961 if (error)
962 return error;
963
964 error = gfs2_meta_inode_buffer(ip, &dibh);
965 if (error)
966 goto out;
967
968 if (gfs2_is_stuffed(ip)) {
969 ip->i_di.di_size = size;
970 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
971 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
972 gfs2_dinode_out(&ip->i_di, dibh->b_data);
973 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
974 error = 1;
975
976 } else {
977 if (size & (u64)(sdp->sd_sb.sb_bsize - 1))
978 error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
979
980 if (!error) {
981 ip->i_di.di_size = size;
982 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
983 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
984 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
985 gfs2_dinode_out(&ip->i_di, dibh->b_data);
986 }
987 }
988
989 brelse(dibh);
990
991out:
992 gfs2_trans_end(sdp);
993 return error;
994}
995
996static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
997{
998 unsigned int height = ip->i_di.di_height;
999 u64 lblock;
1000 struct metapath mp;
1001 int error;
1002
1003 if (!size)
1004 lblock = 0;
1005 else
1006 lblock = (size - 1) >> GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize_shift;
1007
1008 find_metapath(ip, lblock, &mp);
1009 gfs2_alloc_get(ip);
1010
1011 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1012 if (error)
1013 goto out;
1014
1015 while (height--) {
1016 struct strip_mine sm;
1017 sm.sm_first = !!size;
1018 sm.sm_height = height;
1019
1020 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
1021 if (error)
1022 break;
1023 }
1024
1025 gfs2_quota_unhold(ip);
1026
1027out:
1028 gfs2_alloc_put(ip);
1029 return error;
1030}
1031
1032static int trunc_end(struct gfs2_inode *ip)
1033{
1034 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1035 struct buffer_head *dibh;
1036 int error;
1037
1038 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1039 if (error)
1040 return error;
1041
1042 down_write(&ip->i_rw_mutex);
1043
1044 error = gfs2_meta_inode_buffer(ip, &dibh);
1045 if (error)
1046 goto out;
1047
1048 if (!ip->i_di.di_size) {
1049 ip->i_di.di_height = 0;
1050 ip->i_di.di_goal_meta =
1051 ip->i_di.di_goal_data =
1052 ip->i_num.no_addr;
1053 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1054 }
1055 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
1056 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
1057
1058 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1059 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1060 brelse(dibh);
1061
1062out:
1063 up_write(&ip->i_rw_mutex);
1064 gfs2_trans_end(sdp);
1065 return error;
1066}
1067
1068/**
1069 * do_shrink - make a file smaller
1070 * @ip: the inode
1071 * @size: the size to make the file
1072 * @truncator: function to truncate the last partial block
1073 *
1074 * Called with an exclusive lock on @ip.
1075 *
1076 * Returns: errno
1077 */
1078
1079static int do_shrink(struct gfs2_inode *ip, u64 size)
1080{
1081 int error;
1082
1083 error = trunc_start(ip, size);
1084 if (error < 0)
1085 return error;
1086 if (error > 0)
1087 return 0;
1088
1089 error = trunc_dealloc(ip, size);
1090 if (!error)
1091 error = trunc_end(ip);
1092
1093 return error;
1094}
1095
1096/**
1097 * gfs2_truncatei - make a file a given size
1098 * @ip: the inode
1099 * @size: the size to make the file
1100 * @truncator: function to truncate the last partial block
1101 *
1102 * The file size can grow, shrink, or stay the same size.
1103 *
1104 * Returns: errno
1105 */
1106
1107int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
1108{
1109 int error;
1110
1111 if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_di.di_mode)))
1112 return -EINVAL;
1113
1114 if (size > ip->i_di.di_size)
1115 error = do_grow(ip, size);
1116 else
1117 error = do_shrink(ip, size);
1118
1119 return error;
1120}
1121
1122int gfs2_truncatei_resume(struct gfs2_inode *ip)
1123{
1124 int error;
1125 error = trunc_dealloc(ip, ip->i_di.di_size);
1126 if (!error)
1127 error = trunc_end(ip);
1128 return error;
1129}
1130
1131int gfs2_file_dealloc(struct gfs2_inode *ip)
1132{
1133 return trunc_dealloc(ip, 0);
1134}
1135
1136/**
1137 * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
1138 * @ip: the file
1139 * @len: the number of bytes to be written to the file
1140 * @data_blocks: returns the number of data blocks required
1141 * @ind_blocks: returns the number of indirect blocks required
1142 *
1143 */
1144
1145void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
1146 unsigned int *data_blocks, unsigned int *ind_blocks)
1147{
1148 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1149 unsigned int tmp;
1150
1151 if (gfs2_is_dir(ip)) {
1152 *data_blocks = DIV_ROUND_UP(len, sdp->sd_jbsize) + 2;
1153 *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
1154 } else {
1155 *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
1156 *ind_blocks = 3 * (sdp->sd_max_height - 1);
1157 }
1158
1159 for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
1160 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
1161 *ind_blocks += tmp;
1162 }
1163}
1164
1165/**
1166 * gfs2_write_alloc_required - figure out if a write will require an allocation
1167 * @ip: the file being written to
1168 * @offset: the offset to write to
1169 * @len: the number of bytes being written
1170 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
1171 *
1172 * Returns: errno
1173 */
1174
1175int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1176 unsigned int len, int *alloc_required)
1177{
1178 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1179 u64 lblock, lblock_stop, dblock;
1180 u32 extlen;
1181 int new = 0;
1182 int error = 0;
1183
1184 *alloc_required = 0;
1185
1186 if (!len)
1187 return 0;
1188
1189 if (gfs2_is_stuffed(ip)) {
1190 if (offset + len >
1191 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1192 *alloc_required = 1;
1193 return 0;
1194 }
1195
1196 if (gfs2_is_dir(ip)) {
1197 unsigned int bsize = sdp->sd_jbsize;
1198 lblock = offset;
1199 do_div(lblock, bsize);
1200 lblock_stop = offset + len + bsize - 1;
1201 do_div(lblock_stop, bsize);
1202 } else {
1203 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1204 lblock = offset >> shift;
1205 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1206 }
1207
1208 for (; lblock < lblock_stop; lblock += extlen) {
1209 error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
1210 if (error)
1211 return error;
1212
1213 if (!dblock) {
1214 *alloc_required = 1;
1215 return 0;
1216 }
1217 }
1218
1219 return 0;
1220}
1221
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
new file mode 100644
index 000000000000..0fd379b4cd9e
--- /dev/null
+++ b/fs/gfs2/bmap.h
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __BMAP_DOT_H__
11#define __BMAP_DOT_H__
12
13struct inode;
14struct gfs2_inode;
15struct page;
16
17int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
18int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh, unsigned int maxlen);
19int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
20
21int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
22int gfs2_truncatei_resume(struct gfs2_inode *ip);
23int gfs2_file_dealloc(struct gfs2_inode *ip);
24
25void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
26 unsigned int *data_blocks,
27 unsigned int *ind_blocks);
28int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
29 unsigned int len, int *alloc_required);
30
31#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
new file mode 100644
index 000000000000..cab1f68d4685
--- /dev/null
+++ b/fs/gfs2/daemon.c
@@ -0,0 +1,196 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <linux/delay.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "daemon.h"
23#include "glock.h"
24#include "log.h"
25#include "quota.h"
26#include "recovery.h"
27#include "super.h"
28#include "util.h"
29
30/* This uses schedule_timeout() instead of msleep() because it's good for
31 the daemons to wake up more often than the timeout when unmounting so
32 the user's unmount doesn't sit there forever.
33
34 The kthread functions used to start these daemons block and flush signals. */
35
36/**
37 * gfs2_scand - Look for cached glocks and inodes to toss from memory
38 * @sdp: Pointer to GFS2 superblock
39 *
40 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
41 * See gfs2_glockd()
42 */
43
44int gfs2_scand(void *data)
45{
46 struct gfs2_sbd *sdp = data;
47 unsigned long t;
48
49 while (!kthread_should_stop()) {
50 gfs2_scand_internal(sdp);
51 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
52 schedule_timeout_interruptible(t);
53 }
54
55 return 0;
56}
57
58/**
59 * gfs2_glockd - Reclaim unused glock structures
60 * @sdp: Pointer to GFS2 superblock
61 *
62 * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
63 * Number of daemons can be set by user, with num_glockd mount option.
64 */
65
66int gfs2_glockd(void *data)
67{
68 struct gfs2_sbd *sdp = data;
69
70 while (!kthread_should_stop()) {
71 while (atomic_read(&sdp->sd_reclaim_count))
72 gfs2_reclaim_glock(sdp);
73
74 wait_event_interruptible(sdp->sd_reclaim_wq,
75 (atomic_read(&sdp->sd_reclaim_count) ||
76 kthread_should_stop()));
77 }
78
79 return 0;
80}
81
82/**
83 * gfs2_recoverd - Recover dead machine's journals
84 * @sdp: Pointer to GFS2 superblock
85 *
86 */
87
88int gfs2_recoverd(void *data)
89{
90 struct gfs2_sbd *sdp = data;
91 unsigned long t;
92
93 while (!kthread_should_stop()) {
94 gfs2_check_journals(sdp);
95 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
96 schedule_timeout_interruptible(t);
97 }
98
99 return 0;
100}
101
102/**
103 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
104 * @sdp: Pointer to GFS2 superblock
105 *
106 * Also, periodically check to make sure that we're using the most recent
107 * journal index.
108 */
109
110int gfs2_logd(void *data)
111{
112 struct gfs2_sbd *sdp = data;
113 struct gfs2_holder ji_gh;
114 unsigned long t;
115
116 while (!kthread_should_stop()) {
117 /* Advance the log tail */
118
119 t = sdp->sd_log_flush_time +
120 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
121
122 gfs2_ail1_empty(sdp, DIO_ALL);
123
124 if (time_after_eq(jiffies, t)) {
125 gfs2_log_flush(sdp, NULL);
126 sdp->sd_log_flush_time = jiffies;
127 }
128
129 /* Check for latest journal index */
130
131 t = sdp->sd_jindex_refresh_time +
132 gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
133
134 if (time_after_eq(jiffies, t)) {
135 if (!gfs2_jindex_hold(sdp, &ji_gh))
136 gfs2_glock_dq_uninit(&ji_gh);
137 sdp->sd_jindex_refresh_time = jiffies;
138 }
139
140 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
141 schedule_timeout_interruptible(t);
142 }
143
144 return 0;
145}
146
147/**
148 * gfs2_quotad - Write cached quota changes into the quota file
149 * @sdp: Pointer to GFS2 superblock
150 *
151 */
152
153int gfs2_quotad(void *data)
154{
155 struct gfs2_sbd *sdp = data;
156 unsigned long t;
157 int error;
158
159 while (!kthread_should_stop()) {
160 /* Update the master statfs file */
161
162 t = sdp->sd_statfs_sync_time +
163 gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
164
165 if (time_after_eq(jiffies, t)) {
166 error = gfs2_statfs_sync(sdp);
167 if (error &&
168 error != -EROFS &&
169 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
170 fs_err(sdp, "quotad: (1) error=%d\n", error);
171 sdp->sd_statfs_sync_time = jiffies;
172 }
173
174 /* Update quota file */
175
176 t = sdp->sd_quota_sync_time +
177 gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
178
179 if (time_after_eq(jiffies, t)) {
180 error = gfs2_quota_sync(sdp);
181 if (error &&
182 error != -EROFS &&
183 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
184 fs_err(sdp, "quotad: (2) error=%d\n", error);
185 sdp->sd_quota_sync_time = jiffies;
186 }
187
188 gfs2_quota_scan(sdp);
189
190 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
191 schedule_timeout_interruptible(t);
192 }
193
194 return 0;
195}
196
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
new file mode 100644
index 000000000000..801007120fb2
--- /dev/null
+++ b/fs/gfs2/daemon.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __DAEMON_DOT_H__
11#define __DAEMON_DOT_H__
12
13int gfs2_scand(void *data);
14int gfs2_glockd(void *data);
15int gfs2_recoverd(void *data);
16int gfs2_logd(void *data);
17int gfs2_quotad(void *data);
18
19#endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
new file mode 100644
index 000000000000..459498cac93b
--- /dev/null
+++ b/fs/gfs2/dir.c
@@ -0,0 +1,1961 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10/*
11 * Implements Extendible Hashing as described in:
12 * "Extendible Hashing" by Fagin, et al in
13 * __ACM Trans. on Database Systems__, Sept 1979.
14 *
15 *
16 * Here's the layout of dirents which is essentially the same as that of ext2
17 * within a single block. The field de_name_len is the number of bytes
18 * actually required for the name (no null terminator). The field de_rec_len
19 * is the number of bytes allocated to the dirent. The offset of the next
20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21 * deleted, the preceding dirent inherits its allocated space, ie
22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23 * by adding de_rec_len to the current dirent, this essentially causes the
24 * deleted dirent to get jumped over when iterating through all the dirents.
25 *
26 * When deleting the first dirent in a block, there is no previous dirent so
27 * the field de_ino is set to zero to designate it as deleted. When allocating
28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30 * dirent is allocated. Otherwise it must go through all the 'used' dirents
31 * searching for one in which the amount of total space minus the amount of
32 * used space will provide enough space for the new dirent.
33 *
34 * There are two types of blocks in which dirents reside. In a stuffed dinode,
35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36 * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37 * beginning of the leaf block. The dirents reside in leaves when
38 *
39 * dip->i_di.di_flags & GFS2_DIF_EXHASH is true
40 *
41 * Otherwise, the dirents are "linear", within a single stuffed dinode block.
42 *
43 * When the dirents are in leaves, the actual contents of the directory file are
44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The
45 * dirents are NOT in the directory file itself. There can be more than one
46 * block pointer in the array that points to the same leaf. In fact, when a
47 * directory is first converted from linear to exhash, all of the pointers
48 * point to the same leaf.
49 *
50 * When a leaf is completely full, the size of the hash table can be
51 * doubled unless it is already at the maximum size which is hard coded into
52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53 * but never before the maximum hash table size has been reached.
54 */
55
56#include <linux/sched.h>
57#include <linux/slab.h>
58#include <linux/spinlock.h>
59#include <linux/buffer_head.h>
60#include <linux/sort.h>
61#include <linux/gfs2_ondisk.h>
62#include <linux/crc32.h>
63#include <linux/vmalloc.h>
64#include <linux/lm_interface.h>
65
66#include "gfs2.h"
67#include "incore.h"
68#include "dir.h"
69#include "glock.h"
70#include "inode.h"
71#include "meta_io.h"
72#include "quota.h"
73#include "rgrp.h"
74#include "trans.h"
75#include "bmap.h"
76#include "util.h"
77
78#define IS_LEAF 1 /* Hashed (leaf) directory */
79#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
80
81#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
82#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
83
84typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
85 u64 leaf_no, void *data);
86typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
87 const struct qstr *name, void *opaque);
88
89
90int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
91 struct buffer_head **bhp)
92{
93 struct buffer_head *bh;
94
95 bh = gfs2_meta_new(ip->i_gl, block);
96 gfs2_trans_add_bh(ip->i_gl, bh, 1);
97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
99 *bhp = bh;
100 return 0;
101}
102
103static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block,
104 struct buffer_head **bhp)
105{
106 struct buffer_head *bh;
107 int error;
108
109 error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh);
110 if (error)
111 return error;
112 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) {
113 brelse(bh);
114 return -EIO;
115 }
116 *bhp = bh;
117 return 0;
118}
119
120static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
121 unsigned int offset, unsigned int size)
122{
123 struct buffer_head *dibh;
124 int error;
125
126 error = gfs2_meta_inode_buffer(ip, &dibh);
127 if (error)
128 return error;
129
130 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132 if (ip->i_di.di_size < offset + size)
133 ip->i_di.di_size = offset + size;
134 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
135 gfs2_dinode_out(&ip->i_di, dibh->b_data);
136
137 brelse(dibh);
138
139 return size;
140}
141
142
143
144/**
145 * gfs2_dir_write_data - Write directory information to the inode
146 * @ip: The GFS2 inode
147 * @buf: The buffer containing information to be written
148 * @offset: The file offset to start writing at
149 * @size: The amount of data to write
150 *
151 * Returns: The number of bytes correctly written or error code
152 */
153static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
154 u64 offset, unsigned int size)
155{
156 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
157 struct buffer_head *dibh;
158 u64 lblock, dblock;
159 u32 extlen = 0;
160 unsigned int o;
161 int copied = 0;
162 int error = 0;
163
164 if (!size)
165 return 0;
166
167 if (gfs2_is_stuffed(ip) &&
168 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
169 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
170 size);
171
172 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
173 return -EINVAL;
174
175 if (gfs2_is_stuffed(ip)) {
176 error = gfs2_unstuff_dinode(ip, NULL);
177 if (error)
178 return error;
179 }
180
181 lblock = offset;
182 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
183
184 while (copied < size) {
185 unsigned int amount;
186 struct buffer_head *bh;
187 int new;
188
189 amount = size - copied;
190 if (amount > sdp->sd_sb.sb_bsize - o)
191 amount = sdp->sd_sb.sb_bsize - o;
192
193 if (!extlen) {
194 new = 1;
195 error = gfs2_extent_map(&ip->i_inode, lblock, &new,
196 &dblock, &extlen);
197 if (error)
198 goto fail;
199 error = -EIO;
200 if (gfs2_assert_withdraw(sdp, dblock))
201 goto fail;
202 }
203
204 if (amount == sdp->sd_jbsize || new)
205 error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
206 else
207 error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
208
209 if (error)
210 goto fail;
211
212 gfs2_trans_add_bh(ip->i_gl, bh, 1);
213 memcpy(bh->b_data + o, buf, amount);
214 brelse(bh);
215 if (error)
216 goto fail;
217
218 buf += amount;
219 copied += amount;
220 lblock++;
221 dblock++;
222 extlen--;
223
224 o = sizeof(struct gfs2_meta_header);
225 }
226
227out:
228 error = gfs2_meta_inode_buffer(ip, &dibh);
229 if (error)
230 return error;
231
232 if (ip->i_di.di_size < offset + copied)
233 ip->i_di.di_size = offset + copied;
234 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
235
236 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
237 gfs2_dinode_out(&ip->i_di, dibh->b_data);
238 brelse(dibh);
239
240 return copied;
241fail:
242 if (copied)
243 goto out;
244 return error;
245}
246
247static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
248 u64 offset, unsigned int size)
249{
250 struct buffer_head *dibh;
251 int error;
252
253 error = gfs2_meta_inode_buffer(ip, &dibh);
254 if (!error) {
255 offset += sizeof(struct gfs2_dinode);
256 memcpy(buf, dibh->b_data + offset, size);
257 brelse(dibh);
258 }
259
260 return (error) ? error : size;
261}
262
263
264/**
265 * gfs2_dir_read_data - Read a data from a directory inode
266 * @ip: The GFS2 Inode
267 * @buf: The buffer to place result into
268 * @offset: File offset to begin jdata_readng from
269 * @size: Amount of data to transfer
270 *
271 * Returns: The amount of data actually copied or the error
272 */
273static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
274 unsigned int size, unsigned ra)
275{
276 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
277 u64 lblock, dblock;
278 u32 extlen = 0;
279 unsigned int o;
280 int copied = 0;
281 int error = 0;
282
283 if (offset >= ip->i_di.di_size)
284 return 0;
285
286 if (offset + size > ip->i_di.di_size)
287 size = ip->i_di.di_size - offset;
288
289 if (!size)
290 return 0;
291
292 if (gfs2_is_stuffed(ip))
293 return gfs2_dir_read_stuffed(ip, buf, offset, size);
294
295 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
296 return -EINVAL;
297
298 lblock = offset;
299 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
300
301 while (copied < size) {
302 unsigned int amount;
303 struct buffer_head *bh;
304 int new;
305
306 amount = size - copied;
307 if (amount > sdp->sd_sb.sb_bsize - o)
308 amount = sdp->sd_sb.sb_bsize - o;
309
310 if (!extlen) {
311 new = 0;
312 error = gfs2_extent_map(&ip->i_inode, lblock, &new,
313 &dblock, &extlen);
314 if (error || !dblock)
315 goto fail;
316 BUG_ON(extlen < 1);
317 if (!ra)
318 extlen = 1;
319 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
320 }
321 if (!bh) {
322 error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
323 if (error)
324 goto fail;
325 }
326 error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD);
327 if (error) {
328 brelse(bh);
329 goto fail;
330 }
331 dblock++;
332 extlen--;
333 memcpy(buf, bh->b_data + o, amount);
334 brelse(bh);
335 bh = NULL;
336 buf += amount;
337 copied += amount;
338 lblock++;
339 o = sizeof(struct gfs2_meta_header);
340 }
341
342 return copied;
343fail:
344 return (copied) ? copied : error;
345}
346
347static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
348 const struct qstr *name, int ret)
349{
350 if (dent->de_inum.no_addr != 0 &&
351 be32_to_cpu(dent->de_hash) == name->hash &&
352 be16_to_cpu(dent->de_name_len) == name->len &&
353 memcmp(dent+1, name->name, name->len) == 0)
354 return ret;
355 return 0;
356}
357
358static int gfs2_dirent_find(const struct gfs2_dirent *dent,
359 const struct qstr *name,
360 void *opaque)
361{
362 return __gfs2_dirent_find(dent, name, 1);
363}
364
365static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
366 const struct qstr *name,
367 void *opaque)
368{
369 return __gfs2_dirent_find(dent, name, 2);
370}
371
372/*
373 * name->name holds ptr to start of block.
374 * name->len holds size of block.
375 */
376static int gfs2_dirent_last(const struct gfs2_dirent *dent,
377 const struct qstr *name,
378 void *opaque)
379{
380 const char *start = name->name;
381 const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len);
382 if (name->len == (end - start))
383 return 1;
384 return 0;
385}
386
387static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
388 const struct qstr *name,
389 void *opaque)
390{
391 unsigned required = GFS2_DIRENT_SIZE(name->len);
392 unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
393 unsigned totlen = be16_to_cpu(dent->de_rec_len);
394
395 if (!dent->de_inum.no_addr)
396 actual = GFS2_DIRENT_SIZE(0);
397 if (totlen - actual >= required)
398 return 1;
399 return 0;
400}
401
402struct dirent_gather {
403 const struct gfs2_dirent **pdent;
404 unsigned offset;
405};
406
407static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
408 const struct qstr *name,
409 void *opaque)
410{
411 struct dirent_gather *g = opaque;
412 if (dent->de_inum.no_addr) {
413 g->pdent[g->offset++] = dent;
414 }
415 return 0;
416}
417
418/*
419 * Other possible things to check:
420 * - Inode located within filesystem size (and on valid block)
421 * - Valid directory entry type
422 * Not sure how heavy-weight we want to make this... could also check
423 * hash is correct for example, but that would take a lot of extra time.
424 * For now the most important thing is to check that the various sizes
425 * are correct.
426 */
427static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
428 unsigned int size, unsigned int len, int first)
429{
430 const char *msg = "gfs2_dirent too small";
431 if (unlikely(size < sizeof(struct gfs2_dirent)))
432 goto error;
433 msg = "gfs2_dirent misaligned";
434 if (unlikely(offset & 0x7))
435 goto error;
436 msg = "gfs2_dirent points beyond end of block";
437 if (unlikely(offset + size > len))
438 goto error;
439 msg = "zero inode number";
440 if (unlikely(!first && !dent->de_inum.no_addr))
441 goto error;
442 msg = "name length is greater than space in dirent";
443 if (dent->de_inum.no_addr &&
444 unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
445 size))
446 goto error;
447 return 0;
448error:
449 printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg,
450 first ? "first in block" : "not first in block");
451 return -EIO;
452}
453
454static int gfs2_dirent_offset(const void *buf)
455{
456 const struct gfs2_meta_header *h = buf;
457 int offset;
458
459 BUG_ON(buf == NULL);
460
461 switch(be32_to_cpu(h->mh_type)) {
462 case GFS2_METATYPE_LF:
463 offset = sizeof(struct gfs2_leaf);
464 break;
465 case GFS2_METATYPE_DI:
466 offset = sizeof(struct gfs2_dinode);
467 break;
468 default:
469 goto wrong_type;
470 }
471 return offset;
472wrong_type:
473 printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n",
474 be32_to_cpu(h->mh_type));
475 return -1;
476}
477
478static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf,
479 unsigned int len, gfs2_dscan_t scan,
480 const struct qstr *name,
481 void *opaque)
482{
483 struct gfs2_dirent *dent, *prev;
484 unsigned offset;
485 unsigned size;
486 int ret = 0;
487
488 ret = gfs2_dirent_offset(buf);
489 if (ret < 0)
490 goto consist_inode;
491
492 offset = ret;
493 prev = NULL;
494 dent = buf + offset;
495 size = be16_to_cpu(dent->de_rec_len);
496 if (gfs2_check_dirent(dent, offset, size, len, 1))
497 goto consist_inode;
498 do {
499 ret = scan(dent, name, opaque);
500 if (ret)
501 break;
502 offset += size;
503 if (offset == len)
504 break;
505 prev = dent;
506 dent = buf + offset;
507 size = be16_to_cpu(dent->de_rec_len);
508 if (gfs2_check_dirent(dent, offset, size, len, 0))
509 goto consist_inode;
510 } while(1);
511
512 switch(ret) {
513 case 0:
514 return NULL;
515 case 1:
516 return dent;
517 case 2:
518 return prev ? prev : dent;
519 default:
520 BUG_ON(ret > 0);
521 return ERR_PTR(ret);
522 }
523
524consist_inode:
525 gfs2_consist_inode(GFS2_I(inode));
526 return ERR_PTR(-EIO);
527}
528
529
530/**
531 * dirent_first - Return the first dirent
532 * @dip: the directory
533 * @bh: The buffer
534 * @dent: Pointer to list of dirents
535 *
536 * return first dirent whether bh points to leaf or stuffed dinode
537 *
538 * Returns: IS_LEAF, IS_DINODE, or -errno
539 */
540
541static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
542 struct gfs2_dirent **dent)
543{
544 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
545
546 if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
547 if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh))
548 return -EIO;
549 *dent = (struct gfs2_dirent *)(bh->b_data +
550 sizeof(struct gfs2_leaf));
551 return IS_LEAF;
552 } else {
553 if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI))
554 return -EIO;
555 *dent = (struct gfs2_dirent *)(bh->b_data +
556 sizeof(struct gfs2_dinode));
557 return IS_DINODE;
558 }
559}
560
561static int dirent_check_reclen(struct gfs2_inode *dip,
562 const struct gfs2_dirent *d, const void *end_p)
563{
564 const void *ptr = d;
565 u16 rec_len = be16_to_cpu(d->de_rec_len);
566
567 if (unlikely(rec_len < sizeof(struct gfs2_dirent)))
568 goto broken;
569 ptr += rec_len;
570 if (ptr < end_p)
571 return rec_len;
572 if (ptr == end_p)
573 return -ENOENT;
574broken:
575 gfs2_consist_inode(dip);
576 return -EIO;
577}
578
579/**
580 * dirent_next - Next dirent
581 * @dip: the directory
582 * @bh: The buffer
583 * @dent: Pointer to list of dirents
584 *
585 * Returns: 0 on success, error code otherwise
586 */
587
588static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
589 struct gfs2_dirent **dent)
590{
591 struct gfs2_dirent *cur = *dent, *tmp;
592 char *bh_end = bh->b_data + bh->b_size;
593 int ret;
594
595 ret = dirent_check_reclen(dip, cur, bh_end);
596 if (ret < 0)
597 return ret;
598
599 tmp = (void *)cur + ret;
600 ret = dirent_check_reclen(dip, tmp, bh_end);
601 if (ret == -EIO)
602 return ret;
603
604 /* Only the first dent could ever have de_inum.no_addr == 0 */
605 if (!tmp->de_inum.no_addr) {
606 gfs2_consist_inode(dip);
607 return -EIO;
608 }
609
610 *dent = tmp;
611 return 0;
612}
613
614/**
615 * dirent_del - Delete a dirent
616 * @dip: The GFS2 inode
617 * @bh: The buffer
618 * @prev: The previous dirent
619 * @cur: The current dirent
620 *
621 */
622
623static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
624 struct gfs2_dirent *prev, struct gfs2_dirent *cur)
625{
626 u16 cur_rec_len, prev_rec_len;
627
628 if (!cur->de_inum.no_addr) {
629 gfs2_consist_inode(dip);
630 return;
631 }
632
633 gfs2_trans_add_bh(dip->i_gl, bh, 1);
634
635 /* If there is no prev entry, this is the first entry in the block.
636 The de_rec_len is already as big as it needs to be. Just zero
637 out the inode number and return. */
638
639 if (!prev) {
640 cur->de_inum.no_addr = 0; /* No endianess worries */
641 return;
642 }
643
644 /* Combine this dentry with the previous one. */
645
646 prev_rec_len = be16_to_cpu(prev->de_rec_len);
647 cur_rec_len = be16_to_cpu(cur->de_rec_len);
648
649 if ((char *)prev + prev_rec_len != (char *)cur)
650 gfs2_consist_inode(dip);
651 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
652 gfs2_consist_inode(dip);
653
654 prev_rec_len += cur_rec_len;
655 prev->de_rec_len = cpu_to_be16(prev_rec_len);
656}
657
658/*
659 * Takes a dent from which to grab space as an argument. Returns the
660 * newly created dent.
661 */
662static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
663 struct gfs2_dirent *dent,
664 const struct qstr *name,
665 struct buffer_head *bh)
666{
667 struct gfs2_inode *ip = GFS2_I(inode);
668 struct gfs2_dirent *ndent;
669 unsigned offset = 0, totlen;
670
671 if (dent->de_inum.no_addr)
672 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
673 totlen = be16_to_cpu(dent->de_rec_len);
674 BUG_ON(offset + name->len > totlen);
675 gfs2_trans_add_bh(ip->i_gl, bh, 1);
676 ndent = (struct gfs2_dirent *)((char *)dent + offset);
677 dent->de_rec_len = cpu_to_be16(offset);
678 gfs2_qstr2dirent(name, totlen - offset, ndent);
679 return ndent;
680}
681
682static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode,
683 struct buffer_head *bh,
684 const struct qstr *name)
685{
686 struct gfs2_dirent *dent;
687 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
688 gfs2_dirent_find_space, name, NULL);
689 if (!dent || IS_ERR(dent))
690 return dent;
691 return gfs2_init_dirent(inode, dent, name, bh);
692}
693
694static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
695 struct buffer_head **bhp)
696{
697 int error;
698
699 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp);
700 if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) {
701 /* printk(KERN_INFO "block num=%llu\n", leaf_no); */
702 error = -EIO;
703 }
704
705 return error;
706}
707
708/**
709 * get_leaf_nr - Get a leaf number associated with the index
710 * @dip: The GFS2 inode
711 * @index:
712 * @leaf_out:
713 *
714 * Returns: 0 on success, error code otherwise
715 */
716
717static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
718 u64 *leaf_out)
719{
720 u64 leaf_no;
721 int error;
722
723 error = gfs2_dir_read_data(dip, (char *)&leaf_no,
724 index * sizeof(u64),
725 sizeof(u64), 0);
726 if (error != sizeof(u64))
727 return (error < 0) ? error : -EIO;
728
729 *leaf_out = be64_to_cpu(leaf_no);
730
731 return 0;
732}
733
734static int get_first_leaf(struct gfs2_inode *dip, u32 index,
735 struct buffer_head **bh_out)
736{
737 u64 leaf_no;
738 int error;
739
740 error = get_leaf_nr(dip, index, &leaf_no);
741 if (!error)
742 error = get_leaf(dip, leaf_no, bh_out);
743
744 return error;
745}
746
747static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
748 const struct qstr *name,
749 gfs2_dscan_t scan,
750 struct buffer_head **pbh)
751{
752 struct buffer_head *bh;
753 struct gfs2_dirent *dent;
754 struct gfs2_inode *ip = GFS2_I(inode);
755 int error;
756
757 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
758 struct gfs2_leaf *leaf;
759 unsigned hsize = 1 << ip->i_di.di_depth;
760 unsigned index;
761 u64 ln;
762 if (hsize * sizeof(u64) != ip->i_di.di_size) {
763 gfs2_consist_inode(ip);
764 return ERR_PTR(-EIO);
765 }
766
767 index = name->hash >> (32 - ip->i_di.di_depth);
768 error = get_first_leaf(ip, index, &bh);
769 if (error)
770 return ERR_PTR(error);
771 do {
772 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
773 scan, name, NULL);
774 if (dent)
775 goto got_dent;
776 leaf = (struct gfs2_leaf *)bh->b_data;
777 ln = be64_to_cpu(leaf->lf_next);
778 brelse(bh);
779 if (!ln)
780 break;
781
782 error = get_leaf(ip, ln, &bh);
783 } while(!error);
784
785 return error ? ERR_PTR(error) : NULL;
786 }
787
788
789 error = gfs2_meta_inode_buffer(ip, &bh);
790 if (error)
791 return ERR_PTR(error);
792 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
793got_dent:
794 if (unlikely(dent == NULL || IS_ERR(dent))) {
795 brelse(bh);
796 bh = NULL;
797 }
798 *pbh = bh;
799 return dent;
800}
801
802static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth)
803{
804 struct gfs2_inode *ip = GFS2_I(inode);
805 u64 bn = gfs2_alloc_meta(ip);
806 struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn);
807 struct gfs2_leaf *leaf;
808 struct gfs2_dirent *dent;
809 struct qstr name = { .name = "", .len = 0, .hash = 0 };
810 if (!bh)
811 return NULL;
812
813 gfs2_trans_add_bh(ip->i_gl, bh, 1);
814 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
815 leaf = (struct gfs2_leaf *)bh->b_data;
816 leaf->lf_depth = cpu_to_be16(depth);
817 leaf->lf_entries = 0;
818 leaf->lf_dirent_format = cpu_to_be16(GFS2_FORMAT_DE);
819 leaf->lf_next = 0;
820 memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved));
821 dent = (struct gfs2_dirent *)(leaf+1);
822 gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
823 *pbh = bh;
824 return leaf;
825}
826
827/**
828 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
829 * @dip: The GFS2 inode
830 *
831 * Returns: 0 on success, error code otherwise
832 */
833
834static int dir_make_exhash(struct inode *inode)
835{
836 struct gfs2_inode *dip = GFS2_I(inode);
837 struct gfs2_sbd *sdp = GFS2_SB(inode);
838 struct gfs2_dirent *dent;
839 struct qstr args;
840 struct buffer_head *bh, *dibh;
841 struct gfs2_leaf *leaf;
842 int y;
843 u32 x;
844 u64 *lp, bn;
845 int error;
846
847 error = gfs2_meta_inode_buffer(dip, &dibh);
848 if (error)
849 return error;
850
851 /* Turn over a new leaf */
852
853 leaf = new_leaf(inode, &bh, 0);
854 if (!leaf)
855 return -ENOSPC;
856 bn = bh->b_blocknr;
857
858 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
859 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
860
861 /* Copy dirents */
862
863 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
864 sizeof(struct gfs2_dinode));
865
866 /* Find last entry */
867
868 x = 0;
869 args.len = bh->b_size - sizeof(struct gfs2_dinode) +
870 sizeof(struct gfs2_leaf);
871 args.name = bh->b_data;
872 dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size,
873 gfs2_dirent_last, &args, NULL);
874 if (!dent) {
875 brelse(bh);
876 brelse(dibh);
877 return -EIO;
878 }
879 if (IS_ERR(dent)) {
880 brelse(bh);
881 brelse(dibh);
882 return PTR_ERR(dent);
883 }
884
885 /* Adjust the last dirent's record length
886 (Remember that dent still points to the last entry.) */
887
888 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) +
889 sizeof(struct gfs2_dinode) -
890 sizeof(struct gfs2_leaf));
891
892 brelse(bh);
893
894 /* We're done with the new leaf block, now setup the new
895 hash table. */
896
897 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
898 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
899
900 lp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
901
902 for (x = sdp->sd_hash_ptrs; x--; lp++)
903 *lp = cpu_to_be64(bn);
904
905 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
906 dip->i_di.di_blocks++;
907 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
908 dip->i_di.di_payload_format = 0;
909
910 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
911 dip->i_di.di_depth = y;
912
913 gfs2_dinode_out(&dip->i_di, dibh->b_data);
914
915 brelse(dibh);
916
917 return 0;
918}
919
920/**
921 * dir_split_leaf - Split a leaf block into two
922 * @dip: The GFS2 inode
923 * @index:
924 * @leaf_no:
925 *
926 * Returns: 0 on success, error code on failure
927 */
928
929static int dir_split_leaf(struct inode *inode, const struct qstr *name)
930{
931 struct gfs2_inode *dip = GFS2_I(inode);
932 struct buffer_head *nbh, *obh, *dibh;
933 struct gfs2_leaf *nleaf, *oleaf;
934 struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new;
935 u32 start, len, half_len, divider;
936 u64 bn, *lp, leaf_no;
937 u32 index;
938 int x, moved = 0;
939 int error;
940
941 index = name->hash >> (32 - dip->i_di.di_depth);
942 error = get_leaf_nr(dip, index, &leaf_no);
943 if (error)
944 return error;
945
946 /* Get the old leaf block */
947 error = get_leaf(dip, leaf_no, &obh);
948 if (error)
949 return error;
950
951 oleaf = (struct gfs2_leaf *)obh->b_data;
952 if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) {
953 brelse(obh);
954 return 1; /* can't split */
955 }
956
957 gfs2_trans_add_bh(dip->i_gl, obh, 1);
958
959 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
960 if (!nleaf) {
961 brelse(obh);
962 return -ENOSPC;
963 }
964 bn = nbh->b_blocknr;
965
966 /* Compute the start and len of leaf pointers in the hash table. */
967 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
968 half_len = len >> 1;
969 if (!half_len) {
970 printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index);
971 gfs2_consist_inode(dip);
972 error = -EIO;
973 goto fail_brelse;
974 }
975
976 start = (index & ~(len - 1));
977
978 /* Change the pointers.
979 Don't bother distinguishing stuffed from non-stuffed.
980 This code is complicated enough already. */
981 lp = kmalloc(half_len * sizeof(u64), GFP_NOFS | __GFP_NOFAIL);
982 /* Change the pointers */
983 for (x = 0; x < half_len; x++)
984 lp[x] = cpu_to_be64(bn);
985
986 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
987 half_len * sizeof(u64));
988 if (error != half_len * sizeof(u64)) {
989 if (error >= 0)
990 error = -EIO;
991 goto fail_lpfree;
992 }
993
994 kfree(lp);
995
996 /* Compute the divider */
997 divider = (start + half_len) << (32 - dip->i_di.di_depth);
998
999 /* Copy the entries */
1000 dirent_first(dip, obh, &dent);
1001
1002 do {
1003 next = dent;
1004 if (dirent_next(dip, obh, &next))
1005 next = NULL;
1006
1007 if (dent->de_inum.no_addr &&
1008 be32_to_cpu(dent->de_hash) < divider) {
1009 struct qstr str;
1010 str.name = (char*)(dent+1);
1011 str.len = be16_to_cpu(dent->de_name_len);
1012 str.hash = be32_to_cpu(dent->de_hash);
1013 new = gfs2_dirent_alloc(inode, nbh, &str);
1014 if (IS_ERR(new)) {
1015 error = PTR_ERR(new);
1016 break;
1017 }
1018
1019 new->de_inum = dent->de_inum; /* No endian worries */
1020 new->de_type = dent->de_type; /* No endian worries */
1021 nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1);
1022
1023 dirent_del(dip, obh, prev, dent);
1024
1025 if (!oleaf->lf_entries)
1026 gfs2_consist_inode(dip);
1027 oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1);
1028
1029 if (!prev)
1030 prev = dent;
1031
1032 moved = 1;
1033 } else {
1034 prev = dent;
1035 }
1036 dent = next;
1037 } while (dent);
1038
1039 oleaf->lf_depth = nleaf->lf_depth;
1040
1041 error = gfs2_meta_inode_buffer(dip, &dibh);
1042 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1043 dip->i_di.di_blocks++;
1044 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1045 brelse(dibh);
1046 }
1047
1048 brelse(obh);
1049 brelse(nbh);
1050
1051 return error;
1052
1053fail_lpfree:
1054 kfree(lp);
1055
1056fail_brelse:
1057 brelse(obh);
1058 brelse(nbh);
1059 return error;
1060}
1061
1062/**
1063 * dir_double_exhash - Double size of ExHash table
1064 * @dip: The GFS2 dinode
1065 *
1066 * Returns: 0 on success, error code on failure
1067 */
1068
1069static int dir_double_exhash(struct gfs2_inode *dip)
1070{
1071 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1072 struct buffer_head *dibh;
1073 u32 hsize;
1074 u64 *buf;
1075 u64 *from, *to;
1076 u64 block;
1077 int x;
1078 int error = 0;
1079
1080 hsize = 1 << dip->i_di.di_depth;
1081 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1082 gfs2_consist_inode(dip);
1083 return -EIO;
1084 }
1085
1086 /* Allocate both the "from" and "to" buffers in one big chunk */
1087
1088 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
1089
1090 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
1091 error = gfs2_dir_read_data(dip, (char *)buf,
1092 block * sdp->sd_hash_bsize,
1093 sdp->sd_hash_bsize, 1);
1094 if (error != sdp->sd_hash_bsize) {
1095 if (error >= 0)
1096 error = -EIO;
1097 goto fail;
1098 }
1099
1100 from = buf;
1101 to = (u64 *)((char *)buf + sdp->sd_hash_bsize);
1102
1103 for (x = sdp->sd_hash_ptrs; x--; from++) {
1104 *to++ = *from; /* No endianess worries */
1105 *to++ = *from;
1106 }
1107
1108 error = gfs2_dir_write_data(dip,
1109 (char *)buf + sdp->sd_hash_bsize,
1110 block * sdp->sd_sb.sb_bsize,
1111 sdp->sd_sb.sb_bsize);
1112 if (error != sdp->sd_sb.sb_bsize) {
1113 if (error >= 0)
1114 error = -EIO;
1115 goto fail;
1116 }
1117 }
1118
1119 kfree(buf);
1120
1121 error = gfs2_meta_inode_buffer(dip, &dibh);
1122 if (!gfs2_assert_withdraw(sdp, !error)) {
1123 dip->i_di.di_depth++;
1124 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1125 brelse(dibh);
1126 }
1127
1128 return error;
1129
1130fail:
1131 kfree(buf);
1132 return error;
1133}
1134
1135/**
1136 * compare_dents - compare directory entries by hash value
1137 * @a: first dent
1138 * @b: second dent
1139 *
1140 * When comparing the hash entries of @a to @b:
1141 * gt: returns 1
1142 * lt: returns -1
1143 * eq: returns 0
1144 */
1145
1146static int compare_dents(const void *a, const void *b)
1147{
1148 const struct gfs2_dirent *dent_a, *dent_b;
1149 u32 hash_a, hash_b;
1150 int ret = 0;
1151
1152 dent_a = *(const struct gfs2_dirent **)a;
1153 hash_a = be32_to_cpu(dent_a->de_hash);
1154
1155 dent_b = *(const struct gfs2_dirent **)b;
1156 hash_b = be32_to_cpu(dent_b->de_hash);
1157
1158 if (hash_a > hash_b)
1159 ret = 1;
1160 else if (hash_a < hash_b)
1161 ret = -1;
1162 else {
1163 unsigned int len_a = be16_to_cpu(dent_a->de_name_len);
1164 unsigned int len_b = be16_to_cpu(dent_b->de_name_len);
1165
1166 if (len_a > len_b)
1167 ret = 1;
1168 else if (len_a < len_b)
1169 ret = -1;
1170 else
1171 ret = memcmp(dent_a + 1, dent_b + 1, len_a);
1172 }
1173
1174 return ret;
1175}
1176
1177/**
1178 * do_filldir_main - read out directory entries
1179 * @dip: The GFS2 inode
1180 * @offset: The offset in the file to read from
1181 * @opaque: opaque data to pass to filldir
1182 * @filldir: The function to pass entries to
1183 * @darr: an array of struct gfs2_dirent pointers to read
1184 * @entries: the number of entries in darr
1185 * @copied: pointer to int that's non-zero if a entry has been copied out
1186 *
1187 * Jump through some hoops to make sure that if there are hash collsions,
1188 * they are read out at the beginning of a buffer. We want to minimize
1189 * the possibility that they will fall into different readdir buffers or
1190 * that someone will want to seek to that location.
1191 *
1192 * Returns: errno, >0 on exception from filldir
1193 */
1194
1195static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1196 void *opaque, gfs2_filldir_t filldir,
1197 const struct gfs2_dirent **darr, u32 entries,
1198 int *copied)
1199{
1200 const struct gfs2_dirent *dent, *dent_next;
1201 struct gfs2_inum inum;
1202 u64 off, off_next;
1203 unsigned int x, y;
1204 int run = 0;
1205 int error = 0;
1206
1207 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
1208
1209 dent_next = darr[0];
1210 off_next = be32_to_cpu(dent_next->de_hash);
1211 off_next = gfs2_disk_hash2offset(off_next);
1212
1213 for (x = 0, y = 1; x < entries; x++, y++) {
1214 dent = dent_next;
1215 off = off_next;
1216
1217 if (y < entries) {
1218 dent_next = darr[y];
1219 off_next = be32_to_cpu(dent_next->de_hash);
1220 off_next = gfs2_disk_hash2offset(off_next);
1221
1222 if (off < *offset)
1223 continue;
1224 *offset = off;
1225
1226 if (off_next == off) {
1227 if (*copied && !run)
1228 return 1;
1229 run = 1;
1230 } else
1231 run = 0;
1232 } else {
1233 if (off < *offset)
1234 continue;
1235 *offset = off;
1236 }
1237
1238 gfs2_inum_in(&inum, (char *)&dent->de_inum);
1239
1240 error = filldir(opaque, (const char *)(dent + 1),
1241 be16_to_cpu(dent->de_name_len),
1242 off, &inum,
1243 be16_to_cpu(dent->de_type));
1244 if (error)
1245 return 1;
1246
1247 *copied = 1;
1248 }
1249
1250 /* Increment the *offset by one, so the next time we come into the
1251 do_filldir fxn, we get the next entry instead of the last one in the
1252 current leaf */
1253
1254 (*offset)++;
1255
1256 return 0;
1257}
1258
1259static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1260 gfs2_filldir_t filldir, int *copied,
1261 unsigned *depth, u64 leaf_no)
1262{
1263 struct gfs2_inode *ip = GFS2_I(inode);
1264 struct buffer_head *bh;
1265 struct gfs2_leaf *lf;
1266 unsigned entries = 0;
1267 unsigned leaves = 0;
1268 const struct gfs2_dirent **darr, *dent;
1269 struct dirent_gather g;
1270 struct buffer_head **larr;
1271 int leaf = 0;
1272 int error, i;
1273 u64 lfn = leaf_no;
1274
1275 do {
1276 error = get_leaf(ip, lfn, &bh);
1277 if (error)
1278 goto out;
1279 lf = (struct gfs2_leaf *)bh->b_data;
1280 if (leaves == 0)
1281 *depth = be16_to_cpu(lf->lf_depth);
1282 entries += be16_to_cpu(lf->lf_entries);
1283 leaves++;
1284 lfn = be64_to_cpu(lf->lf_next);
1285 brelse(bh);
1286 } while(lfn);
1287
1288 if (!entries)
1289 return 0;
1290
1291 error = -ENOMEM;
1292 larr = vmalloc((leaves + entries) * sizeof(void *));
1293 if (!larr)
1294 goto out;
1295 darr = (const struct gfs2_dirent **)(larr + leaves);
1296 g.pdent = darr;
1297 g.offset = 0;
1298 lfn = leaf_no;
1299
1300 do {
1301 error = get_leaf(ip, lfn, &bh);
1302 if (error)
1303 goto out_kfree;
1304 lf = (struct gfs2_leaf *)bh->b_data;
1305 lfn = be64_to_cpu(lf->lf_next);
1306 if (lf->lf_entries) {
1307 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
1308 gfs2_dirent_gather, NULL, &g);
1309 error = PTR_ERR(dent);
1310 if (IS_ERR(dent)) {
1311 goto out_kfree;
1312 }
1313 error = 0;
1314 larr[leaf++] = bh;
1315 } else {
1316 brelse(bh);
1317 }
1318 } while(lfn);
1319
1320 error = do_filldir_main(ip, offset, opaque, filldir, darr,
1321 entries, copied);
1322out_kfree:
1323 for(i = 0; i < leaf; i++)
1324 brelse(larr[i]);
1325 vfree(larr);
1326out:
1327 return error;
1328}
1329
1330/**
1331 * dir_e_read - Reads the entries from a directory into a filldir buffer
1332 * @dip: dinode pointer
1333 * @offset: the hash of the last entry read shifted to the right once
1334 * @opaque: buffer for the filldir function to fill
1335 * @filldir: points to the filldir function to use
1336 *
1337 * Returns: errno
1338 */
1339
1340static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1341 gfs2_filldir_t filldir)
1342{
1343 struct gfs2_inode *dip = GFS2_I(inode);
1344 struct gfs2_sbd *sdp = GFS2_SB(inode);
1345 u32 hsize, len = 0;
1346 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1347 u32 hash, index;
1348 u64 *lp;
1349 int copied = 0;
1350 int error = 0;
1351 unsigned depth = 0;
1352
1353 hsize = 1 << dip->i_di.di_depth;
1354 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1355 gfs2_consist_inode(dip);
1356 return -EIO;
1357 }
1358
1359 hash = gfs2_dir_offset2hash(*offset);
1360 index = hash >> (32 - dip->i_di.di_depth);
1361
1362 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1363 if (!lp)
1364 return -ENOMEM;
1365
1366 while (index < hsize) {
1367 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1368 ht_offset = index - lp_offset;
1369
1370 if (ht_offset_cur != ht_offset) {
1371 error = gfs2_dir_read_data(dip, (char *)lp,
1372 ht_offset * sizeof(u64),
1373 sdp->sd_hash_bsize, 1);
1374 if (error != sdp->sd_hash_bsize) {
1375 if (error >= 0)
1376 error = -EIO;
1377 goto out;
1378 }
1379 ht_offset_cur = ht_offset;
1380 }
1381
1382 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1383 &copied, &depth,
1384 be64_to_cpu(lp[lp_offset]));
1385 if (error)
1386 break;
1387
1388 len = 1 << (dip->i_di.di_depth - depth);
1389 index = (index & ~(len - 1)) + len;
1390 }
1391
1392out:
1393 kfree(lp);
1394 if (error > 0)
1395 error = 0;
1396 return error;
1397}
1398
1399int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1400 gfs2_filldir_t filldir)
1401{
1402 struct gfs2_inode *dip = GFS2_I(inode);
1403 struct dirent_gather g;
1404 const struct gfs2_dirent **darr, *dent;
1405 struct buffer_head *dibh;
1406 int copied = 0;
1407 int error;
1408
1409 if (!dip->i_di.di_entries)
1410 return 0;
1411
1412 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1413 return dir_e_read(inode, offset, opaque, filldir);
1414
1415 if (!gfs2_is_stuffed(dip)) {
1416 gfs2_consist_inode(dip);
1417 return -EIO;
1418 }
1419
1420 error = gfs2_meta_inode_buffer(dip, &dibh);
1421 if (error)
1422 return error;
1423
1424 error = -ENOMEM;
1425 darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *),
1426 GFP_KERNEL);
1427 if (darr) {
1428 g.pdent = darr;
1429 g.offset = 0;
1430 dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
1431 gfs2_dirent_gather, NULL, &g);
1432 if (IS_ERR(dent)) {
1433 error = PTR_ERR(dent);
1434 goto out;
1435 }
1436 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1437 dip->i_di.di_entries, &copied);
1438out:
1439 kfree(darr);
1440 }
1441
1442 if (error > 0)
1443 error = 0;
1444
1445 brelse(dibh);
1446
1447 return error;
1448}
1449
1450/**
1451 * gfs2_dir_search - Search a directory
1452 * @dip: The GFS2 inode
1453 * @filename:
1454 * @inode:
1455 *
1456 * This routine searches a directory for a file or another directory.
1457 * Assumes a glock is held on dip.
1458 *
1459 * Returns: errno
1460 */
1461
1462int gfs2_dir_search(struct inode *dir, const struct qstr *name,
1463 struct gfs2_inum *inum, unsigned int *type)
1464{
1465 struct buffer_head *bh;
1466 struct gfs2_dirent *dent;
1467
1468 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1469 if (dent) {
1470 if (IS_ERR(dent))
1471 return PTR_ERR(dent);
1472 if (inum)
1473 gfs2_inum_in(inum, (char *)&dent->de_inum);
1474 if (type)
1475 *type = be16_to_cpu(dent->de_type);
1476 brelse(bh);
1477 return 0;
1478 }
1479 return -ENOENT;
1480}
1481
1482static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1483{
1484 struct buffer_head *bh, *obh;
1485 struct gfs2_inode *ip = GFS2_I(inode);
1486 struct gfs2_leaf *leaf, *oleaf;
1487 int error;
1488 u32 index;
1489 u64 bn;
1490
1491 index = name->hash >> (32 - ip->i_di.di_depth);
1492 error = get_first_leaf(ip, index, &obh);
1493 if (error)
1494 return error;
1495 do {
1496 oleaf = (struct gfs2_leaf *)obh->b_data;
1497 bn = be64_to_cpu(oleaf->lf_next);
1498 if (!bn)
1499 break;
1500 brelse(obh);
1501 error = get_leaf(ip, bn, &obh);
1502 if (error)
1503 return error;
1504 } while(1);
1505
1506 gfs2_trans_add_bh(ip->i_gl, obh, 1);
1507
1508 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1509 if (!leaf) {
1510 brelse(obh);
1511 return -ENOSPC;
1512 }
1513 oleaf->lf_next = cpu_to_be64(bh->b_blocknr);
1514 brelse(bh);
1515 brelse(obh);
1516
1517 error = gfs2_meta_inode_buffer(ip, &bh);
1518 if (error)
1519 return error;
1520 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1521 ip->i_di.di_blocks++;
1522 gfs2_dinode_out(&ip->i_di, bh->b_data);
1523 brelse(bh);
1524 return 0;
1525}
1526
1527/**
1528 * gfs2_dir_add - Add new filename into directory
1529 * @dip: The GFS2 inode
1530 * @filename: The new name
1531 * @inode: The inode number of the entry
1532 * @type: The type of the entry
1533 *
1534 * Returns: 0 on success, error code on failure
1535 */
1536
1537int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1538 const struct gfs2_inum *inum, unsigned type)
1539{
1540 struct gfs2_inode *ip = GFS2_I(inode);
1541 struct buffer_head *bh;
1542 struct gfs2_dirent *dent;
1543 struct gfs2_leaf *leaf;
1544 int error;
1545
1546 while(1) {
1547 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space,
1548 &bh);
1549 if (dent) {
1550 if (IS_ERR(dent))
1551 return PTR_ERR(dent);
1552 dent = gfs2_init_dirent(inode, dent, name, bh);
1553 gfs2_inum_out(inum, (char *)&dent->de_inum);
1554 dent->de_type = cpu_to_be16(type);
1555 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
1556 leaf = (struct gfs2_leaf *)bh->b_data;
1557 leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1);
1558 }
1559 brelse(bh);
1560 error = gfs2_meta_inode_buffer(ip, &bh);
1561 if (error)
1562 break;
1563 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1564 ip->i_di.di_entries++;
1565 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
1566 gfs2_dinode_out(&ip->i_di, bh->b_data);
1567 brelse(bh);
1568 error = 0;
1569 break;
1570 }
1571 if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
1572 error = dir_make_exhash(inode);
1573 if (error)
1574 break;
1575 continue;
1576 }
1577 error = dir_split_leaf(inode, name);
1578 if (error == 0)
1579 continue;
1580 if (error < 0)
1581 break;
1582 if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
1583 error = dir_double_exhash(ip);
1584 if (error)
1585 break;
1586 error = dir_split_leaf(inode, name);
1587 if (error < 0)
1588 break;
1589 if (error == 0)
1590 continue;
1591 }
1592 error = dir_new_leaf(inode, name);
1593 if (!error)
1594 continue;
1595 error = -ENOSPC;
1596 break;
1597 }
1598 return error;
1599}
1600
1601
1602/**
1603 * gfs2_dir_del - Delete a directory entry
1604 * @dip: The GFS2 inode
1605 * @filename: The filename
1606 *
1607 * Returns: 0 on success, error code on failure
1608 */
1609
1610int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1611{
1612 struct gfs2_dirent *dent, *prev = NULL;
1613 struct buffer_head *bh;
1614 int error;
1615
1616 /* Returns _either_ the entry (if its first in block) or the
1617 previous entry otherwise */
1618 dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh);
1619 if (!dent) {
1620 gfs2_consist_inode(dip);
1621 return -EIO;
1622 }
1623 if (IS_ERR(dent)) {
1624 gfs2_consist_inode(dip);
1625 return PTR_ERR(dent);
1626 }
1627 /* If not first in block, adjust pointers accordingly */
1628 if (gfs2_dirent_find(dent, name, NULL) == 0) {
1629 prev = dent;
1630 dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
1631 }
1632
1633 dirent_del(dip, bh, prev, dent);
1634 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
1635 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
1636 u16 entries = be16_to_cpu(leaf->lf_entries);
1637 if (!entries)
1638 gfs2_consist_inode(dip);
1639 leaf->lf_entries = cpu_to_be16(--entries);
1640 }
1641 brelse(bh);
1642
1643 error = gfs2_meta_inode_buffer(dip, &bh);
1644 if (error)
1645 return error;
1646
1647 if (!dip->i_di.di_entries)
1648 gfs2_consist_inode(dip);
1649 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1650 dip->i_di.di_entries--;
1651 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1652 gfs2_dinode_out(&dip->i_di, bh->b_data);
1653 brelse(bh);
1654 mark_inode_dirty(&dip->i_inode);
1655
1656 return error;
1657}
1658
1659/**
1660 * gfs2_dir_mvino - Change inode number of directory entry
1661 * @dip: The GFS2 inode
1662 * @filename:
1663 * @new_inode:
1664 *
1665 * This routine changes the inode number of a directory entry. It's used
1666 * by rename to change ".." when a directory is moved.
1667 * Assumes a glock is held on dvp.
1668 *
1669 * Returns: errno
1670 */
1671
1672int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1673 struct gfs2_inum *inum, unsigned int new_type)
1674{
1675 struct buffer_head *bh;
1676 struct gfs2_dirent *dent;
1677 int error;
1678
1679 dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh);
1680 if (!dent) {
1681 gfs2_consist_inode(dip);
1682 return -EIO;
1683 }
1684 if (IS_ERR(dent))
1685 return PTR_ERR(dent);
1686
1687 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1688 gfs2_inum_out(inum, (char *)&dent->de_inum);
1689 dent->de_type = cpu_to_be16(new_type);
1690
1691 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
1692 brelse(bh);
1693 error = gfs2_meta_inode_buffer(dip, &bh);
1694 if (error)
1695 return error;
1696 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1697 }
1698
1699 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1700 gfs2_dinode_out(&dip->i_di, bh->b_data);
1701 brelse(bh);
1702 return 0;
1703}
1704
1705/**
1706 * foreach_leaf - call a function for each leaf in a directory
1707 * @dip: the directory
1708 * @lc: the function to call for each each
1709 * @data: private data to pass to it
1710 *
1711 * Returns: errno
1712 */
1713
1714static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1715{
1716 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1717 struct buffer_head *bh;
1718 struct gfs2_leaf *leaf;
1719 u32 hsize, len;
1720 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1721 u32 index = 0;
1722 u64 *lp;
1723 u64 leaf_no;
1724 int error = 0;
1725
1726 hsize = 1 << dip->i_di.di_depth;
1727 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1728 gfs2_consist_inode(dip);
1729 return -EIO;
1730 }
1731
1732 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1733 if (!lp)
1734 return -ENOMEM;
1735
1736 while (index < hsize) {
1737 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1738 ht_offset = index - lp_offset;
1739
1740 if (ht_offset_cur != ht_offset) {
1741 error = gfs2_dir_read_data(dip, (char *)lp,
1742 ht_offset * sizeof(u64),
1743 sdp->sd_hash_bsize, 1);
1744 if (error != sdp->sd_hash_bsize) {
1745 if (error >= 0)
1746 error = -EIO;
1747 goto out;
1748 }
1749 ht_offset_cur = ht_offset;
1750 }
1751
1752 leaf_no = be64_to_cpu(lp[lp_offset]);
1753 if (leaf_no) {
1754 error = get_leaf(dip, leaf_no, &bh);
1755 if (error)
1756 goto out;
1757 leaf = (struct gfs2_leaf *)bh->b_data;
1758 len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth));
1759 brelse(bh);
1760
1761 error = lc(dip, index, len, leaf_no, data);
1762 if (error)
1763 goto out;
1764
1765 index = (index & ~(len - 1)) + len;
1766 } else
1767 index++;
1768 }
1769
1770 if (index != hsize) {
1771 gfs2_consist_inode(dip);
1772 error = -EIO;
1773 }
1774
1775out:
1776 kfree(lp);
1777
1778 return error;
1779}
1780
1781/**
1782 * leaf_dealloc - Deallocate a directory leaf
1783 * @dip: the directory
1784 * @index: the hash table offset in the directory
1785 * @len: the number of pointers to this leaf
1786 * @leaf_no: the leaf number
1787 * @data: not used
1788 *
1789 * Returns: errno
1790 */
1791
1792static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1793 u64 leaf_no, void *data)
1794{
1795 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1796 struct gfs2_leaf *tmp_leaf;
1797 struct gfs2_rgrp_list rlist;
1798 struct buffer_head *bh, *dibh;
1799 u64 blk, nblk;
1800 unsigned int rg_blocks = 0, l_blocks = 0;
1801 char *ht;
1802 unsigned int x, size = len * sizeof(u64);
1803 int error;
1804
1805 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1806
1807 ht = kzalloc(size, GFP_KERNEL);
1808 if (!ht)
1809 return -ENOMEM;
1810
1811 gfs2_alloc_get(dip);
1812
1813 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1814 if (error)
1815 goto out;
1816
1817 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
1818 if (error)
1819 goto out_qs;
1820
1821 /* Count the number of leaves */
1822
1823 for (blk = leaf_no; blk; blk = nblk) {
1824 error = get_leaf(dip, blk, &bh);
1825 if (error)
1826 goto out_rlist;
1827 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1828 nblk = be64_to_cpu(tmp_leaf->lf_next);
1829 brelse(bh);
1830
1831 gfs2_rlist_add(sdp, &rlist, blk);
1832 l_blocks++;
1833 }
1834
1835 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1836
1837 for (x = 0; x < rlist.rl_rgrps; x++) {
1838 struct gfs2_rgrpd *rgd;
1839 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1840 rg_blocks += rgd->rd_ri.ri_length;
1841 }
1842
1843 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1844 if (error)
1845 goto out_rlist;
1846
1847 error = gfs2_trans_begin(sdp,
1848 rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
1849 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
1850 if (error)
1851 goto out_rg_gunlock;
1852
1853 for (blk = leaf_no; blk; blk = nblk) {
1854 error = get_leaf(dip, blk, &bh);
1855 if (error)
1856 goto out_end_trans;
1857 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1858 nblk = be64_to_cpu(tmp_leaf->lf_next);
1859 brelse(bh);
1860
1861 gfs2_free_meta(dip, blk, 1);
1862
1863 if (!dip->i_di.di_blocks)
1864 gfs2_consist_inode(dip);
1865 dip->i_di.di_blocks--;
1866 }
1867
1868 error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
1869 if (error != size) {
1870 if (error >= 0)
1871 error = -EIO;
1872 goto out_end_trans;
1873 }
1874
1875 error = gfs2_meta_inode_buffer(dip, &dibh);
1876 if (error)
1877 goto out_end_trans;
1878
1879 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1880 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1881 brelse(dibh);
1882
1883out_end_trans:
1884 gfs2_trans_end(sdp);
1885out_rg_gunlock:
1886 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1887out_rlist:
1888 gfs2_rlist_free(&rlist);
1889 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
1890out_qs:
1891 gfs2_quota_unhold(dip);
1892out:
1893 gfs2_alloc_put(dip);
1894 kfree(ht);
1895 return error;
1896}
1897
1898/**
1899 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
1900 * @dip: the directory
1901 *
1902 * Dealloc all on-disk directory leaves to FREEMETA state
1903 * Change on-disk inode type to "regular file"
1904 *
1905 * Returns: errno
1906 */
1907
1908int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1909{
1910 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1911 struct buffer_head *bh;
1912 int error;
1913
1914 /* Dealloc on-disk leaves to FREEMETA state */
1915 error = foreach_leaf(dip, leaf_dealloc, NULL);
1916 if (error)
1917 return error;
1918
1919 /* Make this a regular file in case we crash.
1920 (We don't want to free these blocks a second time.) */
1921
1922 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1923 if (error)
1924 return error;
1925
1926 error = gfs2_meta_inode_buffer(dip, &bh);
1927 if (!error) {
1928 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1929 ((struct gfs2_dinode *)bh->b_data)->di_mode =
1930 cpu_to_be32(S_IFREG);
1931 brelse(bh);
1932 }
1933
1934 gfs2_trans_end(sdp);
1935
1936 return error;
1937}
1938
1939/**
1940 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
1941 * @ip: the file being written to
1942 * @filname: the filename that's going to be added
1943 *
1944 * Returns: 1 if alloc required, 0 if not, -ve on error
1945 */
1946
1947int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name)
1948{
1949 struct gfs2_dirent *dent;
1950 struct buffer_head *bh;
1951
1952 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh);
1953 if (!dent) {
1954 return 1;
1955 }
1956 if (IS_ERR(dent))
1957 return PTR_ERR(dent);
1958 brelse(bh);
1959 return 0;
1960}
1961
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
new file mode 100644
index 000000000000..371233419b07
--- /dev/null
+++ b/fs/gfs2/dir.h
@@ -0,0 +1,79 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __DIR_DOT_H__
11#define __DIR_DOT_H__
12
13#include <linux/dcache.h>
14
15struct inode;
16struct gfs2_inode;
17struct gfs2_inum;
18
19/**
20 * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
21 * @opaque: opaque data used by the function
22 * @name: the name of the directory entry
23 * @length: the length of the name
24 * @offset: the entry's offset in the directory
25 * @inum: the inode number the entry points to
26 * @type: the type of inode the entry points to
27 *
28 * Returns: 0 on success, 1 if buffer full
29 */
30
31typedef int (*gfs2_filldir_t) (void *opaque,
32 const char *name, unsigned int length,
33 u64 offset,
34 struct gfs2_inum *inum, unsigned int type);
35
36int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
37 struct gfs2_inum *inum, unsigned int *type);
38int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
39 const struct gfs2_inum *inum, unsigned int type);
40int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
41int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque,
42 gfs2_filldir_t filldir);
43int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
44 struct gfs2_inum *new_inum, unsigned int new_type);
45
46int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
47
48int gfs2_diradd_alloc_required(struct inode *dir,
49 const struct qstr *filename);
50int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
51 struct buffer_head **bhp);
52
53static inline u32 gfs2_disk_hash(const char *data, int len)
54{
55 return crc32_le((u32)~0, data, len) ^ (u32)~0;
56}
57
58
59static inline void gfs2_str2qstr(struct qstr *name, const char *fname)
60{
61 name->name = fname;
62 name->len = strlen(fname);
63 name->hash = gfs2_disk_hash(name->name, name->len);
64}
65
66/* N.B. This probably ought to take inum & type as args as well */
67static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct gfs2_dirent *dent)
68{
69 dent->de_inum.no_addr = cpu_to_be64(0);
70 dent->de_inum.no_formal_ino = cpu_to_be64(0);
71 dent->de_hash = cpu_to_be32(name->hash);
72 dent->de_rec_len = cpu_to_be16(reclen);
73 dent->de_name_len = cpu_to_be16(name->len);
74 dent->de_type = cpu_to_be16(0);
75 memset(dent->__pad, 0, sizeof(dent->__pad));
76 memcpy(dent + 1, name->name, name->len);
77}
78
79#endif /* __DIR_DOT_H__ */
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
new file mode 100644
index 000000000000..92c54e9b0dc3
--- /dev/null
+++ b/fs/gfs2/eaops.c
@@ -0,0 +1,230 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "util.h"
26
27/**
28 * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
29 * @namep: ea name, possibly with type appended
30 *
31 * Returns: GFS2_EATYPE_XXX
32 */
33
34unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name)
35{
36 unsigned int type;
37
38 if (strncmp(name, "system.", 7) == 0) {
39 type = GFS2_EATYPE_SYS;
40 if (truncated_name)
41 *truncated_name = name + sizeof("system.") - 1;
42 } else if (strncmp(name, "user.", 5) == 0) {
43 type = GFS2_EATYPE_USR;
44 if (truncated_name)
45 *truncated_name = name + sizeof("user.") - 1;
46 } else if (strncmp(name, "security.", 9) == 0) {
47 type = GFS2_EATYPE_SECURITY;
48 if (truncated_name)
49 *truncated_name = name + sizeof("security.") - 1;
50 } else {
51 type = GFS2_EATYPE_UNUSED;
52 if (truncated_name)
53 *truncated_name = NULL;
54 }
55
56 return type;
57}
58
59static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
60{
61 struct inode *inode = &ip->i_inode;
62 int error = permission(inode, MAY_READ, NULL);
63 if (error)
64 return error;
65
66 return gfs2_ea_get_i(ip, er);
67}
68
69static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
70{
71 struct inode *inode = &ip->i_inode;
72
73 if (S_ISREG(inode->i_mode) ||
74 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
75 int error = permission(inode, MAY_WRITE, NULL);
76 if (error)
77 return error;
78 } else
79 return -EPERM;
80
81 return gfs2_ea_set_i(ip, er);
82}
83
84static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
85{
86 struct inode *inode = &ip->i_inode;
87
88 if (S_ISREG(inode->i_mode) ||
89 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
90 int error = permission(inode, MAY_WRITE, NULL);
91 if (error)
92 return error;
93 } else
94 return -EPERM;
95
96 return gfs2_ea_remove_i(ip, er);
97}
98
99static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
100{
101 if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
102 !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
103 !capable(CAP_SYS_ADMIN))
104 return -EPERM;
105
106 if (GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl == 0 &&
107 (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
108 GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
109 return -EOPNOTSUPP;
110
111
112
113 return gfs2_ea_get_i(ip, er);
114}
115
116static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
117{
118 int remove = 0;
119 int error;
120
121 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
122 if (!(er->er_flags & GFS2_ERF_MODE)) {
123 er->er_mode = ip->i_di.di_mode;
124 er->er_flags |= GFS2_ERF_MODE;
125 }
126 error = gfs2_acl_validate_set(ip, 1, er,
127 &remove, &er->er_mode);
128 if (error)
129 return error;
130 error = gfs2_ea_set_i(ip, er);
131 if (error)
132 return error;
133 if (remove)
134 gfs2_ea_remove_i(ip, er);
135 return 0;
136
137 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
138 error = gfs2_acl_validate_set(ip, 0, er,
139 &remove, NULL);
140 if (error)
141 return error;
142 if (!remove)
143 error = gfs2_ea_set_i(ip, er);
144 else {
145 error = gfs2_ea_remove_i(ip, er);
146 if (error == -ENODATA)
147 error = 0;
148 }
149 return error;
150 }
151
152 return -EPERM;
153}
154
155static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
156{
157 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
158 int error = gfs2_acl_validate_remove(ip, 1);
159 if (error)
160 return error;
161
162 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
163 int error = gfs2_acl_validate_remove(ip, 0);
164 if (error)
165 return error;
166
167 } else
168 return -EPERM;
169
170 return gfs2_ea_remove_i(ip, er);
171}
172
173static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
174{
175 struct inode *inode = &ip->i_inode;
176 int error = permission(inode, MAY_READ, NULL);
177 if (error)
178 return error;
179
180 return gfs2_ea_get_i(ip, er);
181}
182
183static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
184{
185 struct inode *inode = &ip->i_inode;
186 int error = permission(inode, MAY_WRITE, NULL);
187 if (error)
188 return error;
189
190 return gfs2_ea_set_i(ip, er);
191}
192
193static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
194{
195 struct inode *inode = &ip->i_inode;
196 int error = permission(inode, MAY_WRITE, NULL);
197 if (error)
198 return error;
199
200 return gfs2_ea_remove_i(ip, er);
201}
202
203static struct gfs2_eattr_operations gfs2_user_eaops = {
204 .eo_get = user_eo_get,
205 .eo_set = user_eo_set,
206 .eo_remove = user_eo_remove,
207 .eo_name = "user",
208};
209
210struct gfs2_eattr_operations gfs2_system_eaops = {
211 .eo_get = system_eo_get,
212 .eo_set = system_eo_set,
213 .eo_remove = system_eo_remove,
214 .eo_name = "system",
215};
216
217static struct gfs2_eattr_operations gfs2_security_eaops = {
218 .eo_get = security_eo_get,
219 .eo_set = security_eo_set,
220 .eo_remove = security_eo_remove,
221 .eo_name = "security",
222};
223
224struct gfs2_eattr_operations *gfs2_ea_ops[] = {
225 NULL,
226 &gfs2_user_eaops,
227 &gfs2_system_eaops,
228 &gfs2_security_eaops,
229};
230
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
new file mode 100644
index 000000000000..508b4f7a2449
--- /dev/null
+++ b/fs/gfs2/eaops.h
@@ -0,0 +1,30 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __EAOPS_DOT_H__
11#define __EAOPS_DOT_H__
12
13struct gfs2_ea_request;
14struct gfs2_inode;
15
16struct gfs2_eattr_operations {
17 int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
18 int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
19 int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
20 char *eo_name;
21};
22
23unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
24
25extern struct gfs2_eattr_operations gfs2_system_eaops;
26
27extern struct gfs2_eattr_operations *gfs2_ea_ops[];
28
29#endif /* __EAOPS_DOT_H__ */
30
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
new file mode 100644
index 000000000000..a65a4ccfd4dd
--- /dev/null
+++ b/fs/gfs2/eattr.c
@@ -0,0 +1,1501 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "glock.h"
26#include "inode.h"
27#include "meta_io.h"
28#include "quota.h"
29#include "rgrp.h"
30#include "trans.h"
31#include "util.h"
32
33/**
34 * ea_calc_size - returns the acutal number of bytes the request will take up
35 * (not counting any unstuffed data blocks)
36 * @sdp:
37 * @er:
38 * @size:
39 *
40 * Returns: 1 if the EA should be stuffed
41 */
42
43static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er,
44 unsigned int *size)
45{
46 *size = GFS2_EAREQ_SIZE_STUFFED(er);
47 if (*size <= sdp->sd_jbsize)
48 return 1;
49
50 *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er);
51
52 return 0;
53}
54
55static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er)
56{
57 unsigned int size;
58
59 if (er->er_data_len > GFS2_EA_MAX_DATA_LEN)
60 return -ERANGE;
61
62 ea_calc_size(sdp, er, &size);
63
64 /* This can only happen with 512 byte blocks */
65 if (size > sdp->sd_jbsize)
66 return -ERANGE;
67
68 return 0;
69}
70
71typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
72 struct gfs2_ea_header *ea,
73 struct gfs2_ea_header *prev, void *private);
74
75static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
76 ea_call_t ea_call, void *data)
77{
78 struct gfs2_ea_header *ea, *prev = NULL;
79 int error = 0;
80
81 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_EA))
82 return -EIO;
83
84 for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
85 if (!GFS2_EA_REC_LEN(ea))
86 goto fail;
87 if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <=
88 bh->b_data + bh->b_size))
89 goto fail;
90 if (!GFS2_EATYPE_VALID(ea->ea_type))
91 goto fail;
92
93 error = ea_call(ip, bh, ea, prev, data);
94 if (error)
95 return error;
96
97 if (GFS2_EA_IS_LAST(ea)) {
98 if ((char *)GFS2_EA2NEXT(ea) !=
99 bh->b_data + bh->b_size)
100 goto fail;
101 break;
102 }
103 }
104
105 return error;
106
107fail:
108 gfs2_consist_inode(ip);
109 return -EIO;
110}
111
112static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
113{
114 struct buffer_head *bh, *eabh;
115 u64 *eablk, *end;
116 int error;
117
118 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh);
119 if (error)
120 return error;
121
122 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
123 error = ea_foreach_i(ip, bh, ea_call, data);
124 goto out;
125 }
126
127 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_IN)) {
128 error = -EIO;
129 goto out;
130 }
131
132 eablk = (u64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
133 end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs;
134
135 for (; eablk < end; eablk++) {
136 u64 bn;
137
138 if (!*eablk)
139 break;
140 bn = be64_to_cpu(*eablk);
141
142 error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, &eabh);
143 if (error)
144 break;
145 error = ea_foreach_i(ip, eabh, ea_call, data);
146 brelse(eabh);
147 if (error)
148 break;
149 }
150out:
151 brelse(bh);
152 return error;
153}
154
155struct ea_find {
156 struct gfs2_ea_request *ef_er;
157 struct gfs2_ea_location *ef_el;
158};
159
160static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
161 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
162 void *private)
163{
164 struct ea_find *ef = private;
165 struct gfs2_ea_request *er = ef->ef_er;
166
167 if (ea->ea_type == GFS2_EATYPE_UNUSED)
168 return 0;
169
170 if (ea->ea_type == er->er_type) {
171 if (ea->ea_name_len == er->er_name_len &&
172 !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
173 struct gfs2_ea_location *el = ef->ef_el;
174 get_bh(bh);
175 el->el_bh = bh;
176 el->el_ea = ea;
177 el->el_prev = prev;
178 return 1;
179 }
180 }
181
182 return 0;
183}
184
185int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er,
186 struct gfs2_ea_location *el)
187{
188 struct ea_find ef;
189 int error;
190
191 ef.ef_er = er;
192 ef.ef_el = el;
193
194 memset(el, 0, sizeof(struct gfs2_ea_location));
195
196 error = ea_foreach(ip, ea_find_i, &ef);
197 if (error > 0)
198 return 0;
199
200 return error;
201}
202
203/**
204 * ea_dealloc_unstuffed -
205 * @ip:
206 * @bh:
207 * @ea:
208 * @prev:
209 * @private:
210 *
211 * Take advantage of the fact that all unstuffed blocks are
212 * allocated from the same RG. But watch, this may not always
213 * be true.
214 *
215 * Returns: errno
216 */
217
218static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
219 struct gfs2_ea_header *ea,
220 struct gfs2_ea_header *prev, void *private)
221{
222 int *leave = private;
223 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
224 struct gfs2_rgrpd *rgd;
225 struct gfs2_holder rg_gh;
226 struct buffer_head *dibh;
227 u64 *dataptrs, bn = 0;
228 u64 bstart = 0;
229 unsigned int blen = 0;
230 unsigned int blks = 0;
231 unsigned int x;
232 int error;
233
234 if (GFS2_EA_IS_STUFFED(ea))
235 return 0;
236
237 dataptrs = GFS2_EA2DATAPTRS(ea);
238 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
239 if (*dataptrs) {
240 blks++;
241 bn = be64_to_cpu(*dataptrs);
242 }
243 }
244 if (!blks)
245 return 0;
246
247 rgd = gfs2_blk2rgrpd(sdp, bn);
248 if (!rgd) {
249 gfs2_consist_inode(ip);
250 return -EIO;
251 }
252
253 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
254 if (error)
255 return error;
256
257 error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE +
258 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
259 if (error)
260 goto out_gunlock;
261
262 gfs2_trans_add_bh(ip->i_gl, bh, 1);
263
264 dataptrs = GFS2_EA2DATAPTRS(ea);
265 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
266 if (!*dataptrs)
267 break;
268 bn = be64_to_cpu(*dataptrs);
269
270 if (bstart + blen == bn)
271 blen++;
272 else {
273 if (bstart)
274 gfs2_free_meta(ip, bstart, blen);
275 bstart = bn;
276 blen = 1;
277 }
278
279 *dataptrs = 0;
280 if (!ip->i_di.di_blocks)
281 gfs2_consist_inode(ip);
282 ip->i_di.di_blocks--;
283 }
284 if (bstart)
285 gfs2_free_meta(ip, bstart, blen);
286
287 if (prev && !leave) {
288 u32 len;
289
290 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
291 prev->ea_rec_len = cpu_to_be32(len);
292
293 if (GFS2_EA_IS_LAST(ea))
294 prev->ea_flags |= GFS2_EAFLAG_LAST;
295 } else {
296 ea->ea_type = GFS2_EATYPE_UNUSED;
297 ea->ea_num_ptrs = 0;
298 }
299
300 error = gfs2_meta_inode_buffer(ip, &dibh);
301 if (!error) {
302 ip->i_di.di_ctime = get_seconds();
303 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
304 gfs2_dinode_out(&ip->i_di, dibh->b_data);
305 brelse(dibh);
306 }
307
308 gfs2_trans_end(sdp);
309
310out_gunlock:
311 gfs2_glock_dq_uninit(&rg_gh);
312 return error;
313}
314
315static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
316 struct gfs2_ea_header *ea,
317 struct gfs2_ea_header *prev, int leave)
318{
319 struct gfs2_alloc *al;
320 int error;
321
322 al = gfs2_alloc_get(ip);
323
324 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
325 if (error)
326 goto out_alloc;
327
328 error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh);
329 if (error)
330 goto out_quota;
331
332 error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL);
333
334 gfs2_glock_dq_uninit(&al->al_ri_gh);
335
336out_quota:
337 gfs2_quota_unhold(ip);
338out_alloc:
339 gfs2_alloc_put(ip);
340 return error;
341}
342
343struct ea_list {
344 struct gfs2_ea_request *ei_er;
345 unsigned int ei_size;
346};
347
348static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
349 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
350 void *private)
351{
352 struct ea_list *ei = private;
353 struct gfs2_ea_request *er = ei->ei_er;
354 unsigned int ea_size = gfs2_ea_strlen(ea);
355
356 if (ea->ea_type == GFS2_EATYPE_UNUSED)
357 return 0;
358
359 if (er->er_data_len) {
360 char *prefix = NULL;
361 unsigned int l = 0;
362 char c = 0;
363
364 if (ei->ei_size + ea_size > er->er_data_len)
365 return -ERANGE;
366
367 switch (ea->ea_type) {
368 case GFS2_EATYPE_USR:
369 prefix = "user.";
370 l = 5;
371 break;
372 case GFS2_EATYPE_SYS:
373 prefix = "system.";
374 l = 7;
375 break;
376 case GFS2_EATYPE_SECURITY:
377 prefix = "security.";
378 l = 9;
379 break;
380 }
381
382 BUG_ON(l == 0);
383
384 memcpy(er->er_data + ei->ei_size, prefix, l);
385 memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea),
386 ea->ea_name_len);
387 memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1);
388 }
389
390 ei->ei_size += ea_size;
391
392 return 0;
393}
394
395/**
396 * gfs2_ea_list -
397 * @ip:
398 * @er:
399 *
400 * Returns: actual size of data on success, -errno on error
401 */
402
403int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
404{
405 struct gfs2_holder i_gh;
406 int error;
407
408 if (!er->er_data || !er->er_data_len) {
409 er->er_data = NULL;
410 er->er_data_len = 0;
411 }
412
413 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
414 if (error)
415 return error;
416
417 if (ip->i_di.di_eattr) {
418 struct ea_list ei = { .ei_er = er, .ei_size = 0 };
419
420 error = ea_foreach(ip, ea_list_i, &ei);
421 if (!error)
422 error = ei.ei_size;
423 }
424
425 gfs2_glock_dq_uninit(&i_gh);
426
427 return error;
428}
429
430/**
431 * ea_get_unstuffed - actually copies the unstuffed data into the
432 * request buffer
433 * @ip: The GFS2 inode
434 * @ea: The extended attribute header structure
435 * @data: The data to be copied
436 *
437 * Returns: errno
438 */
439
440static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
441 char *data)
442{
443 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
444 struct buffer_head **bh;
445 unsigned int amount = GFS2_EA_DATA_LEN(ea);
446 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
447 u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
448 unsigned int x;
449 int error = 0;
450
451 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
452 if (!bh)
453 return -ENOMEM;
454
455 for (x = 0; x < nptrs; x++) {
456 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
457 bh + x);
458 if (error) {
459 while (x--)
460 brelse(bh[x]);
461 goto out;
462 }
463 dataptrs++;
464 }
465
466 for (x = 0; x < nptrs; x++) {
467 error = gfs2_meta_wait(sdp, bh[x]);
468 if (error) {
469 for (; x < nptrs; x++)
470 brelse(bh[x]);
471 goto out;
472 }
473 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
474 for (; x < nptrs; x++)
475 brelse(bh[x]);
476 error = -EIO;
477 goto out;
478 }
479
480 memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header),
481 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
482
483 amount -= sdp->sd_jbsize;
484 data += sdp->sd_jbsize;
485
486 brelse(bh[x]);
487 }
488
489out:
490 kfree(bh);
491 return error;
492}
493
494int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
495 char *data)
496{
497 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
498 memcpy(data, GFS2_EA2DATA(el->el_ea), GFS2_EA_DATA_LEN(el->el_ea));
499 return 0;
500 } else
501 return ea_get_unstuffed(ip, el->el_ea, data);
502}
503
504/**
505 * gfs2_ea_get_i -
506 * @ip: The GFS2 inode
507 * @er: The request structure
508 *
509 * Returns: actual size of data on success, -errno on error
510 */
511
512int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
513{
514 struct gfs2_ea_location el;
515 int error;
516
517 if (!ip->i_di.di_eattr)
518 return -ENODATA;
519
520 error = gfs2_ea_find(ip, er, &el);
521 if (error)
522 return error;
523 if (!el.el_ea)
524 return -ENODATA;
525
526 if (er->er_data_len) {
527 if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len)
528 error = -ERANGE;
529 else
530 error = gfs2_ea_get_copy(ip, &el, er->er_data);
531 }
532 if (!error)
533 error = GFS2_EA_DATA_LEN(el.el_ea);
534
535 brelse(el.el_bh);
536
537 return error;
538}
539
540/**
541 * gfs2_ea_get -
542 * @ip: The GFS2 inode
543 * @er: The request structure
544 *
545 * Returns: actual size of data on success, -errno on error
546 */
547
548int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
549{
550 struct gfs2_holder i_gh;
551 int error;
552
553 if (!er->er_name_len ||
554 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
555 return -EINVAL;
556 if (!er->er_data || !er->er_data_len) {
557 er->er_data = NULL;
558 er->er_data_len = 0;
559 }
560
561 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
562 if (error)
563 return error;
564
565 error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
566
567 gfs2_glock_dq_uninit(&i_gh);
568
569 return error;
570}
571
572/**
573 * ea_alloc_blk - allocates a new block for extended attributes.
574 * @ip: A pointer to the inode that's getting extended attributes
575 * @bhp: Pointer to pointer to a struct buffer_head
576 *
577 * Returns: errno
578 */
579
580static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
581{
582 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
583 struct gfs2_ea_header *ea;
584 u64 block;
585
586 block = gfs2_alloc_meta(ip);
587
588 *bhp = gfs2_meta_new(ip->i_gl, block);
589 gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
590 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
591 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
592
593 ea = GFS2_EA_BH2FIRST(*bhp);
594 ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize);
595 ea->ea_type = GFS2_EATYPE_UNUSED;
596 ea->ea_flags = GFS2_EAFLAG_LAST;
597 ea->ea_num_ptrs = 0;
598
599 ip->i_di.di_blocks++;
600
601 return 0;
602}
603
604/**
605 * ea_write - writes the request info to an ea, creating new blocks if
606 * necessary
607 * @ip: inode that is being modified
608 * @ea: the location of the new ea in a block
609 * @er: the write request
610 *
611 * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags
612 *
613 * returns : errno
614 */
615
616static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
617 struct gfs2_ea_request *er)
618{
619 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
620
621 ea->ea_data_len = cpu_to_be32(er->er_data_len);
622 ea->ea_name_len = er->er_name_len;
623 ea->ea_type = er->er_type;
624 ea->__pad = 0;
625
626 memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len);
627
628 if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
629 ea->ea_num_ptrs = 0;
630 memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
631 } else {
632 u64 *dataptr = GFS2_EA2DATAPTRS(ea);
633 const char *data = er->er_data;
634 unsigned int data_len = er->er_data_len;
635 unsigned int copy;
636 unsigned int x;
637
638 ea->ea_num_ptrs = DIV_ROUND_UP(er->er_data_len, sdp->sd_jbsize);
639 for (x = 0; x < ea->ea_num_ptrs; x++) {
640 struct buffer_head *bh;
641 u64 block;
642 int mh_size = sizeof(struct gfs2_meta_header);
643
644 block = gfs2_alloc_meta(ip);
645
646 bh = gfs2_meta_new(ip->i_gl, block);
647 gfs2_trans_add_bh(ip->i_gl, bh, 1);
648 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
649
650 ip->i_di.di_blocks++;
651
652 copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize :
653 data_len;
654 memcpy(bh->b_data + mh_size, data, copy);
655 if (copy < sdp->sd_jbsize)
656 memset(bh->b_data + mh_size + copy, 0,
657 sdp->sd_jbsize - copy);
658
659 *dataptr++ = cpu_to_be64(bh->b_blocknr);
660 data += copy;
661 data_len -= copy;
662
663 brelse(bh);
664 }
665
666 gfs2_assert_withdraw(sdp, !data_len);
667 }
668
669 return 0;
670}
671
672typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip,
673 struct gfs2_ea_request *er, void *private);
674
675static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
676 unsigned int blks,
677 ea_skeleton_call_t skeleton_call, void *private)
678{
679 struct gfs2_alloc *al;
680 struct buffer_head *dibh;
681 int error;
682
683 al = gfs2_alloc_get(ip);
684
685 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
686 if (error)
687 goto out;
688
689 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
690 if (error)
691 goto out_gunlock_q;
692
693 al->al_requested = blks;
694
695 error = gfs2_inplace_reserve(ip);
696 if (error)
697 goto out_gunlock_q;
698
699 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
700 blks + al->al_rgd->rd_ri.ri_length +
701 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
702 if (error)
703 goto out_ipres;
704
705 error = skeleton_call(ip, er, private);
706 if (error)
707 goto out_end_trans;
708
709 error = gfs2_meta_inode_buffer(ip, &dibh);
710 if (!error) {
711 if (er->er_flags & GFS2_ERF_MODE) {
712 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
713 (ip->i_di.di_mode & S_IFMT) ==
714 (er->er_mode & S_IFMT));
715 ip->i_di.di_mode = er->er_mode;
716 }
717 ip->i_di.di_ctime = get_seconds();
718 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
719 gfs2_dinode_out(&ip->i_di, dibh->b_data);
720 brelse(dibh);
721 }
722
723out_end_trans:
724 gfs2_trans_end(GFS2_SB(&ip->i_inode));
725out_ipres:
726 gfs2_inplace_release(ip);
727out_gunlock_q:
728 gfs2_quota_unlock(ip);
729out:
730 gfs2_alloc_put(ip);
731 return error;
732}
733
734static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
735 void *private)
736{
737 struct buffer_head *bh;
738 int error;
739
740 error = ea_alloc_blk(ip, &bh);
741 if (error)
742 return error;
743
744 ip->i_di.di_eattr = bh->b_blocknr;
745 error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
746
747 brelse(bh);
748
749 return error;
750}
751
752/**
753 * ea_init - initializes a new eattr block
754 * @ip:
755 * @er:
756 *
757 * Returns: errno
758 */
759
760static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er)
761{
762 unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize;
763 unsigned int blks = 1;
764
765 if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize)
766 blks += DIV_ROUND_UP(er->er_data_len, jbsize);
767
768 return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL);
769}
770
771static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
772{
773 u32 ea_size = GFS2_EA_SIZE(ea);
774 struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea +
775 ea_size);
776 u32 new_size = GFS2_EA_REC_LEN(ea) - ea_size;
777 int last = ea->ea_flags & GFS2_EAFLAG_LAST;
778
779 ea->ea_rec_len = cpu_to_be32(ea_size);
780 ea->ea_flags ^= last;
781
782 new->ea_rec_len = cpu_to_be32(new_size);
783 new->ea_flags = last;
784
785 return new;
786}
787
788static void ea_set_remove_stuffed(struct gfs2_inode *ip,
789 struct gfs2_ea_location *el)
790{
791 struct gfs2_ea_header *ea = el->el_ea;
792 struct gfs2_ea_header *prev = el->el_prev;
793 u32 len;
794
795 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
796
797 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
798 ea->ea_type = GFS2_EATYPE_UNUSED;
799 return;
800 } else if (GFS2_EA2NEXT(prev) != ea) {
801 prev = GFS2_EA2NEXT(prev);
802 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), GFS2_EA2NEXT(prev) == ea);
803 }
804
805 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
806 prev->ea_rec_len = cpu_to_be32(len);
807
808 if (GFS2_EA_IS_LAST(ea))
809 prev->ea_flags |= GFS2_EAFLAG_LAST;
810}
811
812struct ea_set {
813 int ea_split;
814
815 struct gfs2_ea_request *es_er;
816 struct gfs2_ea_location *es_el;
817
818 struct buffer_head *es_bh;
819 struct gfs2_ea_header *es_ea;
820};
821
822static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
823 struct gfs2_ea_header *ea, struct ea_set *es)
824{
825 struct gfs2_ea_request *er = es->es_er;
826 struct buffer_head *dibh;
827 int error;
828
829 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + 2 * RES_EATTR, 0);
830 if (error)
831 return error;
832
833 gfs2_trans_add_bh(ip->i_gl, bh, 1);
834
835 if (es->ea_split)
836 ea = ea_split_ea(ea);
837
838 ea_write(ip, ea, er);
839
840 if (es->es_el)
841 ea_set_remove_stuffed(ip, es->es_el);
842
843 error = gfs2_meta_inode_buffer(ip, &dibh);
844 if (error)
845 goto out;
846
847 if (er->er_flags & GFS2_ERF_MODE) {
848 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
849 (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
850 ip->i_di.di_mode = er->er_mode;
851 }
852 ip->i_di.di_ctime = get_seconds();
853 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
854 gfs2_dinode_out(&ip->i_di, dibh->b_data);
855 brelse(dibh);
856out:
857 gfs2_trans_end(GFS2_SB(&ip->i_inode));
858 return error;
859}
860
861static int ea_set_simple_alloc(struct gfs2_inode *ip,
862 struct gfs2_ea_request *er, void *private)
863{
864 struct ea_set *es = private;
865 struct gfs2_ea_header *ea = es->es_ea;
866 int error;
867
868 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1);
869
870 if (es->ea_split)
871 ea = ea_split_ea(ea);
872
873 error = ea_write(ip, ea, er);
874 if (error)
875 return error;
876
877 if (es->es_el)
878 ea_set_remove_stuffed(ip, es->es_el);
879
880 return 0;
881}
882
883static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
884 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
885 void *private)
886{
887 struct ea_set *es = private;
888 unsigned int size;
889 int stuffed;
890 int error;
891
892 stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er, &size);
893
894 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
895 if (GFS2_EA_REC_LEN(ea) < size)
896 return 0;
897 if (!GFS2_EA_IS_STUFFED(ea)) {
898 error = ea_remove_unstuffed(ip, bh, ea, prev, 1);
899 if (error)
900 return error;
901 }
902 es->ea_split = 0;
903 } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size)
904 es->ea_split = 1;
905 else
906 return 0;
907
908 if (stuffed) {
909 error = ea_set_simple_noalloc(ip, bh, ea, es);
910 if (error)
911 return error;
912 } else {
913 unsigned int blks;
914
915 es->es_bh = bh;
916 es->es_ea = ea;
917 blks = 2 + DIV_ROUND_UP(es->es_er->er_data_len,
918 GFS2_SB(&ip->i_inode)->sd_jbsize);
919
920 error = ea_alloc_skeleton(ip, es->es_er, blks,
921 ea_set_simple_alloc, es);
922 if (error)
923 return error;
924 }
925
926 return 1;
927}
928
929static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
930 void *private)
931{
932 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
933 struct buffer_head *indbh, *newbh;
934 u64 *eablk;
935 int error;
936 int mh_size = sizeof(struct gfs2_meta_header);
937
938 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
939 u64 *end;
940
941 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT,
942 &indbh);
943 if (error)
944 return error;
945
946 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
947 error = -EIO;
948 goto out;
949 }
950
951 eablk = (u64 *)(indbh->b_data + mh_size);
952 end = eablk + sdp->sd_inptrs;
953
954 for (; eablk < end; eablk++)
955 if (!*eablk)
956 break;
957
958 if (eablk == end) {
959 error = -ENOSPC;
960 goto out;
961 }
962
963 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
964 } else {
965 u64 blk;
966
967 blk = gfs2_alloc_meta(ip);
968
969 indbh = gfs2_meta_new(ip->i_gl, blk);
970 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
971 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
972 gfs2_buffer_clear_tail(indbh, mh_size);
973
974 eablk = (u64 *)(indbh->b_data + mh_size);
975 *eablk = cpu_to_be64(ip->i_di.di_eattr);
976 ip->i_di.di_eattr = blk;
977 ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
978 ip->i_di.di_blocks++;
979
980 eablk++;
981 }
982
983 error = ea_alloc_blk(ip, &newbh);
984 if (error)
985 goto out;
986
987 *eablk = cpu_to_be64((u64)newbh->b_blocknr);
988 error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er);
989 brelse(newbh);
990 if (error)
991 goto out;
992
993 if (private)
994 ea_set_remove_stuffed(ip, private);
995
996out:
997 brelse(indbh);
998 return error;
999}
1000
1001static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1002 struct gfs2_ea_location *el)
1003{
1004 struct ea_set es;
1005 unsigned int blks = 2;
1006 int error;
1007
1008 memset(&es, 0, sizeof(struct ea_set));
1009 es.es_er = er;
1010 es.es_el = el;
1011
1012 error = ea_foreach(ip, ea_set_simple, &es);
1013 if (error > 0)
1014 return 0;
1015 if (error)
1016 return error;
1017
1018 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
1019 blks++;
1020 if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
1021 blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
1022
1023 return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
1024}
1025
1026static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1027 struct gfs2_ea_location *el)
1028{
1029 if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) {
1030 el->el_prev = GFS2_EA2NEXT(el->el_prev);
1031 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
1032 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1033 }
1034
1035 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0);
1036}
1037
1038int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1039{
1040 struct gfs2_ea_location el;
1041 int error;
1042
1043 if (!ip->i_di.di_eattr) {
1044 if (er->er_flags & XATTR_REPLACE)
1045 return -ENODATA;
1046 return ea_init(ip, er);
1047 }
1048
1049 error = gfs2_ea_find(ip, er, &el);
1050 if (error)
1051 return error;
1052
1053 if (el.el_ea) {
1054 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
1055 brelse(el.el_bh);
1056 return -EPERM;
1057 }
1058
1059 error = -EEXIST;
1060 if (!(er->er_flags & XATTR_CREATE)) {
1061 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1062 error = ea_set_i(ip, er, &el);
1063 if (!error && unstuffed)
1064 ea_set_remove_unstuffed(ip, &el);
1065 }
1066
1067 brelse(el.el_bh);
1068 } else {
1069 error = -ENODATA;
1070 if (!(er->er_flags & XATTR_REPLACE))
1071 error = ea_set_i(ip, er, NULL);
1072 }
1073
1074 return error;
1075}
1076
1077int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1078{
1079 struct gfs2_holder i_gh;
1080 int error;
1081
1082 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1083 return -EINVAL;
1084 if (!er->er_data || !er->er_data_len) {
1085 er->er_data = NULL;
1086 er->er_data_len = 0;
1087 }
1088 error = ea_check_size(GFS2_SB(&ip->i_inode), er);
1089 if (error)
1090 return error;
1091
1092 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1093 if (error)
1094 return error;
1095
1096 if (IS_IMMUTABLE(&ip->i_inode))
1097 error = -EPERM;
1098 else
1099 error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
1100
1101 gfs2_glock_dq_uninit(&i_gh);
1102
1103 return error;
1104}
1105
1106static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1107{
1108 struct gfs2_ea_header *ea = el->el_ea;
1109 struct gfs2_ea_header *prev = el->el_prev;
1110 struct buffer_head *dibh;
1111 int error;
1112
1113 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0);
1114 if (error)
1115 return error;
1116
1117 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1118
1119 if (prev) {
1120 u32 len;
1121
1122 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
1123 prev->ea_rec_len = cpu_to_be32(len);
1124
1125 if (GFS2_EA_IS_LAST(ea))
1126 prev->ea_flags |= GFS2_EAFLAG_LAST;
1127 } else
1128 ea->ea_type = GFS2_EATYPE_UNUSED;
1129
1130 error = gfs2_meta_inode_buffer(ip, &dibh);
1131 if (!error) {
1132 ip->i_di.di_ctime = get_seconds();
1133 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1134 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1135 brelse(dibh);
1136 }
1137
1138 gfs2_trans_end(GFS2_SB(&ip->i_inode));
1139
1140 return error;
1141}
1142
1143int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1144{
1145 struct gfs2_ea_location el;
1146 int error;
1147
1148 if (!ip->i_di.di_eattr)
1149 return -ENODATA;
1150
1151 error = gfs2_ea_find(ip, er, &el);
1152 if (error)
1153 return error;
1154 if (!el.el_ea)
1155 return -ENODATA;
1156
1157 if (GFS2_EA_IS_STUFFED(el.el_ea))
1158 error = ea_remove_stuffed(ip, &el);
1159 else
1160 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev,
1161 0);
1162
1163 brelse(el.el_bh);
1164
1165 return error;
1166}
1167
1168/**
1169 * gfs2_ea_remove - sets (or creates or replaces) an extended attribute
1170 * @ip: pointer to the inode of the target file
1171 * @er: request information
1172 *
1173 * Returns: errno
1174 */
1175
1176int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1177{
1178 struct gfs2_holder i_gh;
1179 int error;
1180
1181 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1182 return -EINVAL;
1183
1184 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1185 if (error)
1186 return error;
1187
1188 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
1189 error = -EPERM;
1190 else
1191 error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er);
1192
1193 gfs2_glock_dq_uninit(&i_gh);
1194
1195 return error;
1196}
1197
1198static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1199 struct gfs2_ea_header *ea, char *data)
1200{
1201 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1202 struct buffer_head **bh;
1203 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1204 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
1205 u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
1206 unsigned int x;
1207 int error;
1208
1209 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
1210 if (!bh)
1211 return -ENOMEM;
1212
1213 error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1214 if (error)
1215 goto out;
1216
1217 for (x = 0; x < nptrs; x++) {
1218 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
1219 bh + x);
1220 if (error) {
1221 while (x--)
1222 brelse(bh[x]);
1223 goto fail;
1224 }
1225 dataptrs++;
1226 }
1227
1228 for (x = 0; x < nptrs; x++) {
1229 error = gfs2_meta_wait(sdp, bh[x]);
1230 if (error) {
1231 for (; x < nptrs; x++)
1232 brelse(bh[x]);
1233 goto fail;
1234 }
1235 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
1236 for (; x < nptrs; x++)
1237 brelse(bh[x]);
1238 error = -EIO;
1239 goto fail;
1240 }
1241
1242 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
1243
1244 memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data,
1245 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
1246
1247 amount -= sdp->sd_jbsize;
1248 data += sdp->sd_jbsize;
1249
1250 brelse(bh[x]);
1251 }
1252
1253out:
1254 kfree(bh);
1255 return error;
1256
1257fail:
1258 gfs2_trans_end(sdp);
1259 kfree(bh);
1260 return error;
1261}
1262
1263int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
1264 struct iattr *attr, char *data)
1265{
1266 struct buffer_head *dibh;
1267 int error;
1268
1269 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
1270 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0);
1271 if (error)
1272 return error;
1273
1274 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1275 memcpy(GFS2_EA2DATA(el->el_ea), data,
1276 GFS2_EA_DATA_LEN(el->el_ea));
1277 } else
1278 error = ea_acl_chmod_unstuffed(ip, el->el_ea, data);
1279
1280 if (error)
1281 return error;
1282
1283 error = gfs2_meta_inode_buffer(ip, &dibh);
1284 if (!error) {
1285 error = inode_setattr(&ip->i_inode, attr);
1286 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
1287 gfs2_inode_attr_out(ip);
1288 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1289 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1290 brelse(dibh);
1291 }
1292
1293 gfs2_trans_end(GFS2_SB(&ip->i_inode));
1294
1295 return error;
1296}
1297
1298static int ea_dealloc_indirect(struct gfs2_inode *ip)
1299{
1300 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1301 struct gfs2_rgrp_list rlist;
1302 struct buffer_head *indbh, *dibh;
1303 u64 *eablk, *end;
1304 unsigned int rg_blocks = 0;
1305 u64 bstart = 0;
1306 unsigned int blen = 0;
1307 unsigned int blks = 0;
1308 unsigned int x;
1309 int error;
1310
1311 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1312
1313 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &indbh);
1314 if (error)
1315 return error;
1316
1317 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
1318 error = -EIO;
1319 goto out;
1320 }
1321
1322 eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1323 end = eablk + sdp->sd_inptrs;
1324
1325 for (; eablk < end; eablk++) {
1326 u64 bn;
1327
1328 if (!*eablk)
1329 break;
1330 bn = be64_to_cpu(*eablk);
1331
1332 if (bstart + blen == bn)
1333 blen++;
1334 else {
1335 if (bstart)
1336 gfs2_rlist_add(sdp, &rlist, bstart);
1337 bstart = bn;
1338 blen = 1;
1339 }
1340 blks++;
1341 }
1342 if (bstart)
1343 gfs2_rlist_add(sdp, &rlist, bstart);
1344 else
1345 goto out;
1346
1347 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1348
1349 for (x = 0; x < rlist.rl_rgrps; x++) {
1350 struct gfs2_rgrpd *rgd;
1351 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1352 rg_blocks += rgd->rd_ri.ri_length;
1353 }
1354
1355 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1356 if (error)
1357 goto out_rlist_free;
1358
1359 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT +
1360 RES_STATFS + RES_QUOTA, blks);
1361 if (error)
1362 goto out_gunlock;
1363
1364 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1365
1366 eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1367 bstart = 0;
1368 blen = 0;
1369
1370 for (; eablk < end; eablk++) {
1371 u64 bn;
1372
1373 if (!*eablk)
1374 break;
1375 bn = be64_to_cpu(*eablk);
1376
1377 if (bstart + blen == bn)
1378 blen++;
1379 else {
1380 if (bstart)
1381 gfs2_free_meta(ip, bstart, blen);
1382 bstart = bn;
1383 blen = 1;
1384 }
1385
1386 *eablk = 0;
1387 if (!ip->i_di.di_blocks)
1388 gfs2_consist_inode(ip);
1389 ip->i_di.di_blocks--;
1390 }
1391 if (bstart)
1392 gfs2_free_meta(ip, bstart, blen);
1393
1394 ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
1395
1396 error = gfs2_meta_inode_buffer(ip, &dibh);
1397 if (!error) {
1398 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1399 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1400 brelse(dibh);
1401 }
1402
1403 gfs2_trans_end(sdp);
1404
1405out_gunlock:
1406 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1407out_rlist_free:
1408 gfs2_rlist_free(&rlist);
1409out:
1410 brelse(indbh);
1411 return error;
1412}
1413
1414static int ea_dealloc_block(struct gfs2_inode *ip)
1415{
1416 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1417 struct gfs2_alloc *al = &ip->i_alloc;
1418 struct gfs2_rgrpd *rgd;
1419 struct buffer_head *dibh;
1420 int error;
1421
1422 rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
1423 if (!rgd) {
1424 gfs2_consist_inode(ip);
1425 return -EIO;
1426 }
1427
1428 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1429 &al->al_rgd_gh);
1430 if (error)
1431 return error;
1432
1433 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE + RES_STATFS +
1434 RES_QUOTA, 1);
1435 if (error)
1436 goto out_gunlock;
1437
1438 gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
1439
1440 ip->i_di.di_eattr = 0;
1441 if (!ip->i_di.di_blocks)
1442 gfs2_consist_inode(ip);
1443 ip->i_di.di_blocks--;
1444
1445 error = gfs2_meta_inode_buffer(ip, &dibh);
1446 if (!error) {
1447 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1448 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1449 brelse(dibh);
1450 }
1451
1452 gfs2_trans_end(sdp);
1453
1454out_gunlock:
1455 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1456 return error;
1457}
1458
1459/**
1460 * gfs2_ea_dealloc - deallocate the extended attribute fork
1461 * @ip: the inode
1462 *
1463 * Returns: errno
1464 */
1465
1466int gfs2_ea_dealloc(struct gfs2_inode *ip)
1467{
1468 struct gfs2_alloc *al;
1469 int error;
1470
1471 al = gfs2_alloc_get(ip);
1472
1473 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1474 if (error)
1475 goto out_alloc;
1476
1477 error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh);
1478 if (error)
1479 goto out_quota;
1480
1481 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
1482 if (error)
1483 goto out_rindex;
1484
1485 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
1486 error = ea_dealloc_indirect(ip);
1487 if (error)
1488 goto out_rindex;
1489 }
1490
1491 error = ea_dealloc_block(ip);
1492
1493out_rindex:
1494 gfs2_glock_dq_uninit(&al->al_ri_gh);
1495out_quota:
1496 gfs2_quota_unhold(ip);
1497out_alloc:
1498 gfs2_alloc_put(ip);
1499 return error;
1500}
1501
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
new file mode 100644
index 000000000000..ffa65947d686
--- /dev/null
+++ b/fs/gfs2/eattr.h
@@ -0,0 +1,100 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __EATTR_DOT_H__
11#define __EATTR_DOT_H__
12
13struct gfs2_inode;
14struct iattr;
15
16#define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len)
17#define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len)
18
19#define GFS2_EA_SIZE(ea) \
20ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
21 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
22 (sizeof(u64) * (ea)->ea_num_ptrs)), 8)
23
24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
26
27#define GFS2_EAREQ_SIZE_STUFFED(er) \
28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
29
30#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
31ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
32 sizeof(u64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
33
34#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
35#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
36
37#define GFS2_EA2DATAPTRS(ea) \
38((u64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
39
40#define GFS2_EA2NEXT(ea) \
41((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
42
43#define GFS2_EA_BH2FIRST(bh) \
44((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
45
46#define GFS2_ERF_MODE 0x80000000
47
48struct gfs2_ea_request {
49 const char *er_name;
50 char *er_data;
51 unsigned int er_name_len;
52 unsigned int er_data_len;
53 unsigned int er_type; /* GFS2_EATYPE_... */
54 int er_flags;
55 mode_t er_mode;
56};
57
58struct gfs2_ea_location {
59 struct buffer_head *el_bh;
60 struct gfs2_ea_header *el_ea;
61 struct gfs2_ea_header *el_prev;
62};
63
64int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
65int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
66int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
67
68int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
69int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
70int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
71int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
72
73int gfs2_ea_dealloc(struct gfs2_inode *ip);
74
75/* Exported to acl.c */
76
77int gfs2_ea_find(struct gfs2_inode *ip,
78 struct gfs2_ea_request *er,
79 struct gfs2_ea_location *el);
80int gfs2_ea_get_copy(struct gfs2_inode *ip,
81 struct gfs2_ea_location *el,
82 char *data);
83int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
84 struct iattr *attr, char *data);
85
86static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
87{
88 switch (ea->ea_type) {
89 case GFS2_EATYPE_USR:
90 return 5 + ea->ea_name_len + 1;
91 case GFS2_EATYPE_SYS:
92 return 7 + ea->ea_name_len + 1;
93 case GFS2_EATYPE_SECURITY:
94 return 9 + ea->ea_name_len + 1;
95 default:
96 return 0;
97 }
98}
99
100#endif /* __EATTR_DOT_H__ */
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h
new file mode 100644
index 000000000000..3bb11c0f8b56
--- /dev/null
+++ b/fs/gfs2/gfs2.h
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __GFS2_DOT_H__
11#define __GFS2_DOT_H__
12
13enum {
14 NO_CREATE = 0,
15 CREATE = 1,
16};
17
18enum {
19 NO_WAIT = 0,
20 WAIT = 1,
21};
22
23enum {
24 NO_FORCE = 0,
25 FORCE = 1,
26};
27
28#define GFS2_FAST_NAME_SIZE 8
29
30#endif /* __GFS2_DOT_H__ */
31
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
new file mode 100644
index 000000000000..78fe0fae23ff
--- /dev/null
+++ b/fs/gfs2/glock.c
@@ -0,0 +1,2231 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/sort.h>
17#include <linux/jhash.h>
18#include <linux/kallsyms.h>
19#include <linux/gfs2_ondisk.h>
20#include <linux/list.h>
21#include <linux/lm_interface.h>
22#include <asm/uaccess.h>
23
24#include "gfs2.h"
25#include "incore.h"
26#include "glock.h"
27#include "glops.h"
28#include "inode.h"
29#include "lm.h"
30#include "lops.h"
31#include "meta_io.h"
32#include "quota.h"
33#include "super.h"
34#include "util.h"
35
36struct greedy {
37 struct gfs2_holder gr_gh;
38 struct work_struct gr_work;
39};
40
41struct gfs2_gl_hash_bucket {
42 struct hlist_head hb_list;
43};
44
45typedef void (*glock_examiner) (struct gfs2_glock * gl);
46
47static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
48static int dump_glock(struct gfs2_glock *gl);
49static int dump_inode(struct gfs2_inode *ip);
50
51#define GFS2_GL_HASH_SHIFT 15
52#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
53#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
54
55static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
56
57/*
58 * Despite what you might think, the numbers below are not arbitrary :-)
59 * They are taken from the ipv4 routing hash code, which is well tested
60 * and thus should be nearly optimal. Later on we might tweek the numbers
61 * but for now this should be fine.
62 *
63 * The reason for putting the locks in a separate array from the list heads
64 * is that we can have fewer locks than list heads and save memory. We use
65 * the same hash function for both, but with a different hash mask.
66 */
67#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
68 defined(CONFIG_PROVE_LOCKING)
69
70#ifdef CONFIG_LOCKDEP
71# define GL_HASH_LOCK_SZ 256
72#else
73# if NR_CPUS >= 32
74# define GL_HASH_LOCK_SZ 4096
75# elif NR_CPUS >= 16
76# define GL_HASH_LOCK_SZ 2048
77# elif NR_CPUS >= 8
78# define GL_HASH_LOCK_SZ 1024
79# elif NR_CPUS >= 4
80# define GL_HASH_LOCK_SZ 512
81# else
82# define GL_HASH_LOCK_SZ 256
83# endif
84#endif
85
86/* We never want more locks than chains */
87#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
88# undef GL_HASH_LOCK_SZ
89# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
90#endif
91
92static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
93
94static inline rwlock_t *gl_lock_addr(unsigned int x)
95{
96 return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
97}
98#else /* not SMP, so no spinlocks required */
99static inline rwlock_t *gl_lock_addr(x)
100{
101 return NULL;
102}
103#endif
104
105/**
106 * relaxed_state_ok - is a requested lock compatible with the current lock mode?
107 * @actual: the current state of the lock
108 * @requested: the lock state that was requested by the caller
109 * @flags: the modifier flags passed in by the caller
110 *
111 * Returns: 1 if the locks are compatible, 0 otherwise
112 */
113
114static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
115 int flags)
116{
117 if (actual == requested)
118 return 1;
119
120 if (flags & GL_EXACT)
121 return 0;
122
123 if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
124 return 1;
125
126 if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
127 return 1;
128
129 return 0;
130}
131
132/**
133 * gl_hash() - Turn glock number into hash bucket number
134 * @lock: The glock number
135 *
136 * Returns: The number of the corresponding hash bucket
137 */
138
139static unsigned int gl_hash(const struct gfs2_sbd *sdp,
140 const struct lm_lockname *name)
141{
142 unsigned int h;
143
144 h = jhash(&name->ln_number, sizeof(u64), 0);
145 h = jhash(&name->ln_type, sizeof(unsigned int), h);
146 h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
147 h &= GFS2_GL_HASH_MASK;
148
149 return h;
150}
151
152/**
153 * glock_free() - Perform a few checks and then release struct gfs2_glock
154 * @gl: The glock to release
155 *
156 * Also calls lock module to release its internal structure for this glock.
157 *
158 */
159
160static void glock_free(struct gfs2_glock *gl)
161{
162 struct gfs2_sbd *sdp = gl->gl_sbd;
163 struct inode *aspace = gl->gl_aspace;
164
165 gfs2_lm_put_lock(sdp, gl->gl_lock);
166
167 if (aspace)
168 gfs2_aspace_put(aspace);
169
170 kmem_cache_free(gfs2_glock_cachep, gl);
171}
172
173/**
174 * gfs2_glock_hold() - increment reference count on glock
175 * @gl: The glock to hold
176 *
177 */
178
179void gfs2_glock_hold(struct gfs2_glock *gl)
180{
181 atomic_inc(&gl->gl_ref);
182}
183
184/**
185 * gfs2_glock_put() - Decrement reference count on glock
186 * @gl: The glock to put
187 *
188 */
189
190int gfs2_glock_put(struct gfs2_glock *gl)
191{
192 int rv = 0;
193 struct gfs2_sbd *sdp = gl->gl_sbd;
194
195 write_lock(gl_lock_addr(gl->gl_hash));
196 if (atomic_dec_and_test(&gl->gl_ref)) {
197 hlist_del(&gl->gl_list);
198 write_unlock(gl_lock_addr(gl->gl_hash));
199 BUG_ON(spin_is_locked(&gl->gl_spin));
200 gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
201 gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
202 gfs2_assert(sdp, list_empty(&gl->gl_holders));
203 gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
204 gfs2_assert(sdp, list_empty(&gl->gl_waiters2));
205 gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
206 glock_free(gl);
207 rv = 1;
208 goto out;
209 }
210 write_unlock(gl_lock_addr(gl->gl_hash));
211out:
212 return rv;
213}
214
215/**
216 * queue_empty - check to see if a glock's queue is empty
217 * @gl: the glock
218 * @head: the head of the queue to check
219 *
220 * This function protects the list in the event that a process already
221 * has a holder on the list and is adding a second holder for itself.
222 * The glmutex lock is what generally prevents processes from working
223 * on the same glock at once, but the special case of adding a second
224 * holder for yourself ("recursive" locking) doesn't involve locking
225 * glmutex, making the spin lock necessary.
226 *
227 * Returns: 1 if the queue is empty
228 */
229
230static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
231{
232 int empty;
233 spin_lock(&gl->gl_spin);
234 empty = list_empty(head);
235 spin_unlock(&gl->gl_spin);
236 return empty;
237}
238
239/**
240 * search_bucket() - Find struct gfs2_glock by lock number
241 * @bucket: the bucket to search
242 * @name: The lock name
243 *
244 * Returns: NULL, or the struct gfs2_glock with the requested number
245 */
246
247static struct gfs2_glock *search_bucket(unsigned int hash,
248 const struct gfs2_sbd *sdp,
249 const struct lm_lockname *name)
250{
251 struct gfs2_glock *gl;
252 struct hlist_node *h;
253
254 hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) {
255 if (!lm_name_equal(&gl->gl_name, name))
256 continue;
257 if (gl->gl_sbd != sdp)
258 continue;
259
260 atomic_inc(&gl->gl_ref);
261
262 return gl;
263 }
264
265 return NULL;
266}
267
268/**
269 * gfs2_glock_find() - Find glock by lock number
270 * @sdp: The GFS2 superblock
271 * @name: The lock name
272 *
273 * Returns: NULL, or the struct gfs2_glock with the requested number
274 */
275
276static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
277 const struct lm_lockname *name)
278{
279 unsigned int hash = gl_hash(sdp, name);
280 struct gfs2_glock *gl;
281
282 read_lock(gl_lock_addr(hash));
283 gl = search_bucket(hash, sdp, name);
284 read_unlock(gl_lock_addr(hash));
285
286 return gl;
287}
288
289/**
290 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
291 * @sdp: The GFS2 superblock
292 * @number: the lock number
293 * @glops: The glock_operations to use
294 * @create: If 0, don't create the glock if it doesn't exist
295 * @glp: the glock is returned here
296 *
297 * This does not lock a glock, just finds/creates structures for one.
298 *
299 * Returns: errno
300 */
301
302int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
303 const struct gfs2_glock_operations *glops, int create,
304 struct gfs2_glock **glp)
305{
306 struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type };
307 struct gfs2_glock *gl, *tmp;
308 unsigned int hash = gl_hash(sdp, &name);
309 int error;
310
311 read_lock(gl_lock_addr(hash));
312 gl = search_bucket(hash, sdp, &name);
313 read_unlock(gl_lock_addr(hash));
314
315 if (gl || !create) {
316 *glp = gl;
317 return 0;
318 }
319
320 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
321 if (!gl)
322 return -ENOMEM;
323
324 gl->gl_flags = 0;
325 gl->gl_name = name;
326 atomic_set(&gl->gl_ref, 1);
327 gl->gl_state = LM_ST_UNLOCKED;
328 gl->gl_hash = hash;
329 gl->gl_owner = NULL;
330 gl->gl_ip = 0;
331 gl->gl_ops = glops;
332 gl->gl_req_gh = NULL;
333 gl->gl_req_bh = NULL;
334 gl->gl_vn = 0;
335 gl->gl_stamp = jiffies;
336 gl->gl_object = NULL;
337 gl->gl_sbd = sdp;
338 gl->gl_aspace = NULL;
339 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
340
341 /* If this glock protects actual on-disk data or metadata blocks,
342 create a VFS inode to manage the pages/buffers holding them. */
343 if (glops == &gfs2_inode_glops || glops == &gfs2_rgrp_glops) {
344 gl->gl_aspace = gfs2_aspace_get(sdp);
345 if (!gl->gl_aspace) {
346 error = -ENOMEM;
347 goto fail;
348 }
349 }
350
351 error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
352 if (error)
353 goto fail_aspace;
354
355 write_lock(gl_lock_addr(hash));
356 tmp = search_bucket(hash, sdp, &name);
357 if (tmp) {
358 write_unlock(gl_lock_addr(hash));
359 glock_free(gl);
360 gl = tmp;
361 } else {
362 hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list);
363 write_unlock(gl_lock_addr(hash));
364 }
365
366 *glp = gl;
367
368 return 0;
369
370fail_aspace:
371 if (gl->gl_aspace)
372 gfs2_aspace_put(gl->gl_aspace);
373fail:
374 kmem_cache_free(gfs2_glock_cachep, gl);
375 return error;
376}
377
378/**
379 * gfs2_holder_init - initialize a struct gfs2_holder in the default way
380 * @gl: the glock
381 * @state: the state we're requesting
382 * @flags: the modifier flags
383 * @gh: the holder structure
384 *
385 */
386
387void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
388 struct gfs2_holder *gh)
389{
390 INIT_LIST_HEAD(&gh->gh_list);
391 gh->gh_gl = gl;
392 gh->gh_ip = (unsigned long)__builtin_return_address(0);
393 gh->gh_owner = current;
394 gh->gh_state = state;
395 gh->gh_flags = flags;
396 gh->gh_error = 0;
397 gh->gh_iflags = 0;
398 init_completion(&gh->gh_wait);
399
400 if (gh->gh_state == LM_ST_EXCLUSIVE)
401 gh->gh_flags |= GL_LOCAL_EXCL;
402
403 gfs2_glock_hold(gl);
404}
405
406/**
407 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
408 * @state: the state we're requesting
409 * @flags: the modifier flags
410 * @gh: the holder structure
411 *
412 * Don't mess with the glock.
413 *
414 */
415
416void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh)
417{
418 gh->gh_state = state;
419 gh->gh_flags = flags;
420 if (gh->gh_state == LM_ST_EXCLUSIVE)
421 gh->gh_flags |= GL_LOCAL_EXCL;
422
423 gh->gh_iflags &= 1 << HIF_ALLOCED;
424 gh->gh_ip = (unsigned long)__builtin_return_address(0);
425}
426
427/**
428 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
429 * @gh: the holder structure
430 *
431 */
432
433void gfs2_holder_uninit(struct gfs2_holder *gh)
434{
435 gfs2_glock_put(gh->gh_gl);
436 gh->gh_gl = NULL;
437 gh->gh_ip = 0;
438}
439
440/**
441 * gfs2_holder_get - get a struct gfs2_holder structure
442 * @gl: the glock
443 * @state: the state we're requesting
444 * @flags: the modifier flags
445 * @gfp_flags:
446 *
447 * Figure out how big an impact this function has. Either:
448 * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
449 * 2) Leave it like it is
450 *
451 * Returns: the holder structure, NULL on ENOMEM
452 */
453
454static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl,
455 unsigned int state,
456 int flags, gfp_t gfp_flags)
457{
458 struct gfs2_holder *gh;
459
460 gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags);
461 if (!gh)
462 return NULL;
463
464 gfs2_holder_init(gl, state, flags, gh);
465 set_bit(HIF_ALLOCED, &gh->gh_iflags);
466 gh->gh_ip = (unsigned long)__builtin_return_address(0);
467 return gh;
468}
469
470/**
471 * gfs2_holder_put - get rid of a struct gfs2_holder structure
472 * @gh: the holder structure
473 *
474 */
475
476static void gfs2_holder_put(struct gfs2_holder *gh)
477{
478 gfs2_holder_uninit(gh);
479 kfree(gh);
480}
481
482/**
483 * rq_mutex - process a mutex request in the queue
484 * @gh: the glock holder
485 *
486 * Returns: 1 if the queue is blocked
487 */
488
489static int rq_mutex(struct gfs2_holder *gh)
490{
491 struct gfs2_glock *gl = gh->gh_gl;
492
493 list_del_init(&gh->gh_list);
494 /* gh->gh_error never examined. */
495 set_bit(GLF_LOCK, &gl->gl_flags);
496 complete(&gh->gh_wait);
497
498 return 1;
499}
500
501/**
502 * rq_promote - process a promote request in the queue
503 * @gh: the glock holder
504 *
505 * Acquire a new inter-node lock, or change a lock state to more restrictive.
506 *
507 * Returns: 1 if the queue is blocked
508 */
509
510static int rq_promote(struct gfs2_holder *gh)
511{
512 struct gfs2_glock *gl = gh->gh_gl;
513 struct gfs2_sbd *sdp = gl->gl_sbd;
514 const struct gfs2_glock_operations *glops = gl->gl_ops;
515
516 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
517 if (list_empty(&gl->gl_holders)) {
518 gl->gl_req_gh = gh;
519 set_bit(GLF_LOCK, &gl->gl_flags);
520 spin_unlock(&gl->gl_spin);
521
522 if (atomic_read(&sdp->sd_reclaim_count) >
523 gfs2_tune_get(sdp, gt_reclaim_limit) &&
524 !(gh->gh_flags & LM_FLAG_PRIORITY)) {
525 gfs2_reclaim_glock(sdp);
526 gfs2_reclaim_glock(sdp);
527 }
528
529 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
530 spin_lock(&gl->gl_spin);
531 }
532 return 1;
533 }
534
535 if (list_empty(&gl->gl_holders)) {
536 set_bit(HIF_FIRST, &gh->gh_iflags);
537 set_bit(GLF_LOCK, &gl->gl_flags);
538 } else {
539 struct gfs2_holder *next_gh;
540 if (gh->gh_flags & GL_LOCAL_EXCL)
541 return 1;
542 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
543 gh_list);
544 if (next_gh->gh_flags & GL_LOCAL_EXCL)
545 return 1;
546 }
547
548 list_move_tail(&gh->gh_list, &gl->gl_holders);
549 gh->gh_error = 0;
550 set_bit(HIF_HOLDER, &gh->gh_iflags);
551
552 complete(&gh->gh_wait);
553
554 return 0;
555}
556
557/**
558 * rq_demote - process a demote request in the queue
559 * @gh: the glock holder
560 *
561 * Returns: 1 if the queue is blocked
562 */
563
564static int rq_demote(struct gfs2_holder *gh)
565{
566 struct gfs2_glock *gl = gh->gh_gl;
567 const struct gfs2_glock_operations *glops = gl->gl_ops;
568
569 if (!list_empty(&gl->gl_holders))
570 return 1;
571
572 if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
573 list_del_init(&gh->gh_list);
574 gh->gh_error = 0;
575 spin_unlock(&gl->gl_spin);
576 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
577 gfs2_holder_put(gh);
578 else
579 complete(&gh->gh_wait);
580 spin_lock(&gl->gl_spin);
581 } else {
582 gl->gl_req_gh = gh;
583 set_bit(GLF_LOCK, &gl->gl_flags);
584 spin_unlock(&gl->gl_spin);
585
586 if (gh->gh_state == LM_ST_UNLOCKED ||
587 gl->gl_state != LM_ST_EXCLUSIVE)
588 glops->go_drop_th(gl);
589 else
590 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
591
592 spin_lock(&gl->gl_spin);
593 }
594
595 return 0;
596}
597
598/**
599 * rq_greedy - process a queued request to drop greedy status
600 * @gh: the glock holder
601 *
602 * Returns: 1 if the queue is blocked
603 */
604
605static int rq_greedy(struct gfs2_holder *gh)
606{
607 struct gfs2_glock *gl = gh->gh_gl;
608
609 list_del_init(&gh->gh_list);
610 /* gh->gh_error never examined. */
611 clear_bit(GLF_GREEDY, &gl->gl_flags);
612 spin_unlock(&gl->gl_spin);
613
614 gfs2_holder_uninit(gh);
615 kfree(container_of(gh, struct greedy, gr_gh));
616
617 spin_lock(&gl->gl_spin);
618
619 return 0;
620}
621
622/**
623 * run_queue - process holder structures on a glock
624 * @gl: the glock
625 *
626 */
627static void run_queue(struct gfs2_glock *gl)
628{
629 struct gfs2_holder *gh;
630 int blocked = 1;
631
632 for (;;) {
633 if (test_bit(GLF_LOCK, &gl->gl_flags))
634 break;
635
636 if (!list_empty(&gl->gl_waiters1)) {
637 gh = list_entry(gl->gl_waiters1.next,
638 struct gfs2_holder, gh_list);
639
640 if (test_bit(HIF_MUTEX, &gh->gh_iflags))
641 blocked = rq_mutex(gh);
642 else
643 gfs2_assert_warn(gl->gl_sbd, 0);
644
645 } else if (!list_empty(&gl->gl_waiters2) &&
646 !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
647 gh = list_entry(gl->gl_waiters2.next,
648 struct gfs2_holder, gh_list);
649
650 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
651 blocked = rq_demote(gh);
652 else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
653 blocked = rq_greedy(gh);
654 else
655 gfs2_assert_warn(gl->gl_sbd, 0);
656
657 } else if (!list_empty(&gl->gl_waiters3)) {
658 gh = list_entry(gl->gl_waiters3.next,
659 struct gfs2_holder, gh_list);
660
661 if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
662 blocked = rq_promote(gh);
663 else
664 gfs2_assert_warn(gl->gl_sbd, 0);
665
666 } else
667 break;
668
669 if (blocked)
670 break;
671 }
672}
673
674/**
675 * gfs2_glmutex_lock - acquire a local lock on a glock
676 * @gl: the glock
677 *
678 * Gives caller exclusive access to manipulate a glock structure.
679 */
680
681static void gfs2_glmutex_lock(struct gfs2_glock *gl)
682{
683 struct gfs2_holder gh;
684
685 gfs2_holder_init(gl, 0, 0, &gh);
686 set_bit(HIF_MUTEX, &gh.gh_iflags);
687
688 spin_lock(&gl->gl_spin);
689 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
690 list_add_tail(&gh.gh_list, &gl->gl_waiters1);
691 } else {
692 gl->gl_owner = current;
693 gl->gl_ip = (unsigned long)__builtin_return_address(0);
694 complete(&gh.gh_wait);
695 }
696 spin_unlock(&gl->gl_spin);
697
698 wait_for_completion(&gh.gh_wait);
699 gfs2_holder_uninit(&gh);
700}
701
702/**
703 * gfs2_glmutex_trylock - try to acquire a local lock on a glock
704 * @gl: the glock
705 *
706 * Returns: 1 if the glock is acquired
707 */
708
709static int gfs2_glmutex_trylock(struct gfs2_glock *gl)
710{
711 int acquired = 1;
712
713 spin_lock(&gl->gl_spin);
714 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
715 acquired = 0;
716 } else {
717 gl->gl_owner = current;
718 gl->gl_ip = (unsigned long)__builtin_return_address(0);
719 }
720 spin_unlock(&gl->gl_spin);
721
722 return acquired;
723}
724
725/**
726 * gfs2_glmutex_unlock - release a local lock on a glock
727 * @gl: the glock
728 *
729 */
730
731static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
732{
733 spin_lock(&gl->gl_spin);
734 clear_bit(GLF_LOCK, &gl->gl_flags);
735 gl->gl_owner = NULL;
736 gl->gl_ip = 0;
737 run_queue(gl);
738 BUG_ON(!spin_is_locked(&gl->gl_spin));
739 spin_unlock(&gl->gl_spin);
740}
741
742/**
743 * handle_callback - add a demote request to a lock's queue
744 * @gl: the glock
745 * @state: the state the caller wants us to change to
746 *
747 * Note: This may fail sliently if we are out of memory.
748 */
749
750static void handle_callback(struct gfs2_glock *gl, unsigned int state)
751{
752 struct gfs2_holder *gh, *new_gh = NULL;
753
754restart:
755 spin_lock(&gl->gl_spin);
756
757 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
758 if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
759 gl->gl_req_gh != gh) {
760 if (gh->gh_state != state)
761 gh->gh_state = LM_ST_UNLOCKED;
762 goto out;
763 }
764 }
765
766 if (new_gh) {
767 list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
768 new_gh = NULL;
769 } else {
770 spin_unlock(&gl->gl_spin);
771
772 new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_KERNEL);
773 if (!new_gh)
774 return;
775 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
776 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
777
778 goto restart;
779 }
780
781out:
782 spin_unlock(&gl->gl_spin);
783
784 if (new_gh)
785 gfs2_holder_put(new_gh);
786}
787
788void gfs2_glock_inode_squish(struct inode *inode)
789{
790 struct gfs2_holder gh;
791 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
792 gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
793 set_bit(HIF_DEMOTE, &gh.gh_iflags);
794 spin_lock(&gl->gl_spin);
795 gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders));
796 list_add_tail(&gh.gh_list, &gl->gl_waiters2);
797 run_queue(gl);
798 spin_unlock(&gl->gl_spin);
799 wait_for_completion(&gh.gh_wait);
800 gfs2_holder_uninit(&gh);
801}
802
803/**
804 * state_change - record that the glock is now in a different state
805 * @gl: the glock
806 * @new_state the new state
807 *
808 */
809
810static void state_change(struct gfs2_glock *gl, unsigned int new_state)
811{
812 int held1, held2;
813
814 held1 = (gl->gl_state != LM_ST_UNLOCKED);
815 held2 = (new_state != LM_ST_UNLOCKED);
816
817 if (held1 != held2) {
818 if (held2)
819 gfs2_glock_hold(gl);
820 else
821 gfs2_glock_put(gl);
822 }
823
824 gl->gl_state = new_state;
825}
826
827/**
828 * xmote_bh - Called after the lock module is done acquiring a lock
829 * @gl: The glock in question
830 * @ret: the int returned from the lock module
831 *
832 */
833
834static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
835{
836 struct gfs2_sbd *sdp = gl->gl_sbd;
837 const struct gfs2_glock_operations *glops = gl->gl_ops;
838 struct gfs2_holder *gh = gl->gl_req_gh;
839 int prev_state = gl->gl_state;
840 int op_done = 1;
841
842 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
843 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
844 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
845
846 state_change(gl, ret & LM_OUT_ST_MASK);
847
848 if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
849 if (glops->go_inval)
850 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
851 } else if (gl->gl_state == LM_ST_DEFERRED) {
852 /* We might not want to do this here.
853 Look at moving to the inode glops. */
854 if (glops->go_inval)
855 glops->go_inval(gl, DIO_DATA);
856 }
857
858 /* Deal with each possible exit condition */
859
860 if (!gh)
861 gl->gl_stamp = jiffies;
862 else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
863 spin_lock(&gl->gl_spin);
864 list_del_init(&gh->gh_list);
865 gh->gh_error = -EIO;
866 spin_unlock(&gl->gl_spin);
867 } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
868 spin_lock(&gl->gl_spin);
869 list_del_init(&gh->gh_list);
870 if (gl->gl_state == gh->gh_state ||
871 gl->gl_state == LM_ST_UNLOCKED) {
872 gh->gh_error = 0;
873 } else {
874 if (gfs2_assert_warn(sdp, gh->gh_flags &
875 (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1)
876 fs_warn(sdp, "ret = 0x%.8X\n", ret);
877 gh->gh_error = GLR_TRYFAILED;
878 }
879 spin_unlock(&gl->gl_spin);
880
881 if (ret & LM_OUT_CANCELED)
882 handle_callback(gl, LM_ST_UNLOCKED);
883
884 } else if (ret & LM_OUT_CANCELED) {
885 spin_lock(&gl->gl_spin);
886 list_del_init(&gh->gh_list);
887 gh->gh_error = GLR_CANCELED;
888 spin_unlock(&gl->gl_spin);
889
890 } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
891 spin_lock(&gl->gl_spin);
892 list_move_tail(&gh->gh_list, &gl->gl_holders);
893 gh->gh_error = 0;
894 set_bit(HIF_HOLDER, &gh->gh_iflags);
895 spin_unlock(&gl->gl_spin);
896
897 set_bit(HIF_FIRST, &gh->gh_iflags);
898
899 op_done = 0;
900
901 } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
902 spin_lock(&gl->gl_spin);
903 list_del_init(&gh->gh_list);
904 gh->gh_error = GLR_TRYFAILED;
905 spin_unlock(&gl->gl_spin);
906
907 } else {
908 if (gfs2_assert_withdraw(sdp, 0) == -1)
909 fs_err(sdp, "ret = 0x%.8X\n", ret);
910 }
911
912 if (glops->go_xmote_bh)
913 glops->go_xmote_bh(gl);
914
915 if (op_done) {
916 spin_lock(&gl->gl_spin);
917 gl->gl_req_gh = NULL;
918 gl->gl_req_bh = NULL;
919 clear_bit(GLF_LOCK, &gl->gl_flags);
920 run_queue(gl);
921 spin_unlock(&gl->gl_spin);
922 }
923
924 gfs2_glock_put(gl);
925
926 if (gh) {
927 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
928 gfs2_holder_put(gh);
929 else
930 complete(&gh->gh_wait);
931 }
932}
933
934/**
935 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
936 * @gl: The glock in question
937 * @state: the requested state
938 * @flags: modifier flags to the lock call
939 *
940 */
941
942void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
943{
944 struct gfs2_sbd *sdp = gl->gl_sbd;
945 const struct gfs2_glock_operations *glops = gl->gl_ops;
946 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
947 LM_FLAG_NOEXP | LM_FLAG_ANY |
948 LM_FLAG_PRIORITY);
949 unsigned int lck_ret;
950
951 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
952 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
953 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
954 gfs2_assert_warn(sdp, state != gl->gl_state);
955
956 if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
957 glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
958
959 gfs2_glock_hold(gl);
960 gl->gl_req_bh = xmote_bh;
961
962 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags);
963
964 if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR)))
965 return;
966
967 if (lck_ret & LM_OUT_ASYNC)
968 gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
969 else
970 xmote_bh(gl, lck_ret);
971}
972
973/**
974 * drop_bh - Called after a lock module unlock completes
975 * @gl: the glock
976 * @ret: the return status
977 *
978 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
979 * Doesn't drop the reference on the glock the top half took out
980 *
981 */
982
983static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
984{
985 struct gfs2_sbd *sdp = gl->gl_sbd;
986 const struct gfs2_glock_operations *glops = gl->gl_ops;
987 struct gfs2_holder *gh = gl->gl_req_gh;
988
989 clear_bit(GLF_PREFETCH, &gl->gl_flags);
990
991 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
992 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
993 gfs2_assert_warn(sdp, !ret);
994
995 state_change(gl, LM_ST_UNLOCKED);
996
997 if (glops->go_inval)
998 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
999
1000 if (gh) {
1001 spin_lock(&gl->gl_spin);
1002 list_del_init(&gh->gh_list);
1003 gh->gh_error = 0;
1004 spin_unlock(&gl->gl_spin);
1005 }
1006
1007 if (glops->go_drop_bh)
1008 glops->go_drop_bh(gl);
1009
1010 spin_lock(&gl->gl_spin);
1011 gl->gl_req_gh = NULL;
1012 gl->gl_req_bh = NULL;
1013 clear_bit(GLF_LOCK, &gl->gl_flags);
1014 run_queue(gl);
1015 spin_unlock(&gl->gl_spin);
1016
1017 gfs2_glock_put(gl);
1018
1019 if (gh) {
1020 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
1021 gfs2_holder_put(gh);
1022 else
1023 complete(&gh->gh_wait);
1024 }
1025}
1026
1027/**
1028 * gfs2_glock_drop_th - call into the lock module to unlock a lock
1029 * @gl: the glock
1030 *
1031 */
1032
1033void gfs2_glock_drop_th(struct gfs2_glock *gl)
1034{
1035 struct gfs2_sbd *sdp = gl->gl_sbd;
1036 const struct gfs2_glock_operations *glops = gl->gl_ops;
1037 unsigned int ret;
1038
1039 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1040 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1041 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
1042
1043 if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
1044 glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
1045
1046 gfs2_glock_hold(gl);
1047 gl->gl_req_bh = drop_bh;
1048
1049 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
1050
1051 if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR)))
1052 return;
1053
1054 if (!ret)
1055 drop_bh(gl, ret);
1056 else
1057 gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
1058}
1059
1060/**
1061 * do_cancels - cancel requests for locks stuck waiting on an expire flag
1062 * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
1063 *
1064 * Don't cancel GL_NOCANCEL requests.
1065 */
1066
1067static void do_cancels(struct gfs2_holder *gh)
1068{
1069 struct gfs2_glock *gl = gh->gh_gl;
1070
1071 spin_lock(&gl->gl_spin);
1072
1073 while (gl->gl_req_gh != gh &&
1074 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1075 !list_empty(&gh->gh_list)) {
1076 if (gl->gl_req_bh && !(gl->gl_req_gh &&
1077 (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
1078 spin_unlock(&gl->gl_spin);
1079 gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock);
1080 msleep(100);
1081 spin_lock(&gl->gl_spin);
1082 } else {
1083 spin_unlock(&gl->gl_spin);
1084 msleep(100);
1085 spin_lock(&gl->gl_spin);
1086 }
1087 }
1088
1089 spin_unlock(&gl->gl_spin);
1090}
1091
1092/**
1093 * glock_wait_internal - wait on a glock acquisition
1094 * @gh: the glock holder
1095 *
1096 * Returns: 0 on success
1097 */
1098
1099static int glock_wait_internal(struct gfs2_holder *gh)
1100{
1101 struct gfs2_glock *gl = gh->gh_gl;
1102 struct gfs2_sbd *sdp = gl->gl_sbd;
1103 const struct gfs2_glock_operations *glops = gl->gl_ops;
1104
1105 if (test_bit(HIF_ABORTED, &gh->gh_iflags))
1106 return -EIO;
1107
1108 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1109 spin_lock(&gl->gl_spin);
1110 if (gl->gl_req_gh != gh &&
1111 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1112 !list_empty(&gh->gh_list)) {
1113 list_del_init(&gh->gh_list);
1114 gh->gh_error = GLR_TRYFAILED;
1115 run_queue(gl);
1116 spin_unlock(&gl->gl_spin);
1117 return gh->gh_error;
1118 }
1119 spin_unlock(&gl->gl_spin);
1120 }
1121
1122 if (gh->gh_flags & LM_FLAG_PRIORITY)
1123 do_cancels(gh);
1124
1125 wait_for_completion(&gh->gh_wait);
1126
1127 if (gh->gh_error)
1128 return gh->gh_error;
1129
1130 gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
1131 gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state,
1132 gh->gh_flags));
1133
1134 if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
1135 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1136
1137 if (glops->go_lock) {
1138 gh->gh_error = glops->go_lock(gh);
1139 if (gh->gh_error) {
1140 spin_lock(&gl->gl_spin);
1141 list_del_init(&gh->gh_list);
1142 spin_unlock(&gl->gl_spin);
1143 }
1144 }
1145
1146 spin_lock(&gl->gl_spin);
1147 gl->gl_req_gh = NULL;
1148 gl->gl_req_bh = NULL;
1149 clear_bit(GLF_LOCK, &gl->gl_flags);
1150 run_queue(gl);
1151 spin_unlock(&gl->gl_spin);
1152 }
1153
1154 return gh->gh_error;
1155}
1156
1157static inline struct gfs2_holder *
1158find_holder_by_owner(struct list_head *head, struct task_struct *owner)
1159{
1160 struct gfs2_holder *gh;
1161
1162 list_for_each_entry(gh, head, gh_list) {
1163 if (gh->gh_owner == owner)
1164 return gh;
1165 }
1166
1167 return NULL;
1168}
1169
1170/**
1171 * add_to_queue - Add a holder to the wait queue (but look for recursion)
1172 * @gh: the holder structure to add
1173 *
1174 */
1175
1176static void add_to_queue(struct gfs2_holder *gh)
1177{
1178 struct gfs2_glock *gl = gh->gh_gl;
1179 struct gfs2_holder *existing;
1180
1181 BUG_ON(!gh->gh_owner);
1182
1183 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
1184 if (existing) {
1185 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
1186 printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid);
1187 printk(KERN_INFO "lock type : %d lock state : %d\n",
1188 existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state);
1189 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1190 printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid);
1191 printk(KERN_INFO "lock type : %d lock state : %d\n",
1192 gl->gl_name.ln_type, gl->gl_state);
1193 BUG();
1194 }
1195
1196 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
1197 if (existing) {
1198 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
1199 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1200 BUG();
1201 }
1202
1203 if (gh->gh_flags & LM_FLAG_PRIORITY)
1204 list_add(&gh->gh_list, &gl->gl_waiters3);
1205 else
1206 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1207}
1208
1209/**
1210 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1211 * @gh: the holder structure
1212 *
1213 * if (gh->gh_flags & GL_ASYNC), this never returns an error
1214 *
1215 * Returns: 0, GLR_TRYFAILED, or errno on failure
1216 */
1217
1218int gfs2_glock_nq(struct gfs2_holder *gh)
1219{
1220 struct gfs2_glock *gl = gh->gh_gl;
1221 struct gfs2_sbd *sdp = gl->gl_sbd;
1222 int error = 0;
1223
1224restart:
1225 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
1226 set_bit(HIF_ABORTED, &gh->gh_iflags);
1227 return -EIO;
1228 }
1229
1230 set_bit(HIF_PROMOTE, &gh->gh_iflags);
1231
1232 spin_lock(&gl->gl_spin);
1233 add_to_queue(gh);
1234 run_queue(gl);
1235 spin_unlock(&gl->gl_spin);
1236
1237 if (!(gh->gh_flags & GL_ASYNC)) {
1238 error = glock_wait_internal(gh);
1239 if (error == GLR_CANCELED) {
1240 msleep(100);
1241 goto restart;
1242 }
1243 }
1244
1245 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1246
1247 if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP))
1248 dump_glock(gl);
1249
1250 return error;
1251}
1252
1253/**
1254 * gfs2_glock_poll - poll to see if an async request has been completed
1255 * @gh: the holder
1256 *
1257 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1258 */
1259
1260int gfs2_glock_poll(struct gfs2_holder *gh)
1261{
1262 struct gfs2_glock *gl = gh->gh_gl;
1263 int ready = 0;
1264
1265 spin_lock(&gl->gl_spin);
1266
1267 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1268 ready = 1;
1269 else if (list_empty(&gh->gh_list)) {
1270 if (gh->gh_error == GLR_CANCELED) {
1271 spin_unlock(&gl->gl_spin);
1272 msleep(100);
1273 if (gfs2_glock_nq(gh))
1274 return 1;
1275 return 0;
1276 } else
1277 ready = 1;
1278 }
1279
1280 spin_unlock(&gl->gl_spin);
1281
1282 return ready;
1283}
1284
1285/**
1286 * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
1287 * @gh: the holder structure
1288 *
1289 * Returns: 0, GLR_TRYFAILED, or errno on failure
1290 */
1291
1292int gfs2_glock_wait(struct gfs2_holder *gh)
1293{
1294 int error;
1295
1296 error = glock_wait_internal(gh);
1297 if (error == GLR_CANCELED) {
1298 msleep(100);
1299 gh->gh_flags &= ~GL_ASYNC;
1300 error = gfs2_glock_nq(gh);
1301 }
1302
1303 return error;
1304}
1305
1306/**
1307 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1308 * @gh: the glock holder
1309 *
1310 */
1311
1312void gfs2_glock_dq(struct gfs2_holder *gh)
1313{
1314 struct gfs2_glock *gl = gh->gh_gl;
1315 const struct gfs2_glock_operations *glops = gl->gl_ops;
1316
1317 if (gh->gh_flags & GL_NOCACHE)
1318 handle_callback(gl, LM_ST_UNLOCKED);
1319
1320 gfs2_glmutex_lock(gl);
1321
1322 spin_lock(&gl->gl_spin);
1323 list_del_init(&gh->gh_list);
1324
1325 if (list_empty(&gl->gl_holders)) {
1326 spin_unlock(&gl->gl_spin);
1327
1328 if (glops->go_unlock)
1329 glops->go_unlock(gh);
1330
1331 gl->gl_stamp = jiffies;
1332
1333 spin_lock(&gl->gl_spin);
1334 }
1335
1336 clear_bit(GLF_LOCK, &gl->gl_flags);
1337 run_queue(gl);
1338 spin_unlock(&gl->gl_spin);
1339}
1340
1341/**
1342 * gfs2_glock_prefetch - Try to prefetch a glock
1343 * @gl: the glock
1344 * @state: the state to prefetch in
1345 * @flags: flags passed to go_xmote_th()
1346 *
1347 */
1348
1349static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
1350 int flags)
1351{
1352 const struct gfs2_glock_operations *glops = gl->gl_ops;
1353
1354 spin_lock(&gl->gl_spin);
1355
1356 if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) ||
1357 !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) ||
1358 !list_empty(&gl->gl_waiters3) ||
1359 relaxed_state_ok(gl->gl_state, state, flags)) {
1360 spin_unlock(&gl->gl_spin);
1361 return;
1362 }
1363
1364 set_bit(GLF_PREFETCH, &gl->gl_flags);
1365 set_bit(GLF_LOCK, &gl->gl_flags);
1366 spin_unlock(&gl->gl_spin);
1367
1368 glops->go_xmote_th(gl, state, flags);
1369}
1370
1371static void greedy_work(void *data)
1372{
1373 struct greedy *gr = data;
1374 struct gfs2_holder *gh = &gr->gr_gh;
1375 struct gfs2_glock *gl = gh->gh_gl;
1376 const struct gfs2_glock_operations *glops = gl->gl_ops;
1377
1378 clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1379
1380 if (glops->go_greedy)
1381 glops->go_greedy(gl);
1382
1383 spin_lock(&gl->gl_spin);
1384
1385 if (list_empty(&gl->gl_waiters2)) {
1386 clear_bit(GLF_GREEDY, &gl->gl_flags);
1387 spin_unlock(&gl->gl_spin);
1388 gfs2_holder_uninit(gh);
1389 kfree(gr);
1390 } else {
1391 gfs2_glock_hold(gl);
1392 list_add_tail(&gh->gh_list, &gl->gl_waiters2);
1393 run_queue(gl);
1394 spin_unlock(&gl->gl_spin);
1395 gfs2_glock_put(gl);
1396 }
1397}
1398
1399/**
1400 * gfs2_glock_be_greedy -
1401 * @gl:
1402 * @time:
1403 *
1404 * Returns: 0 if go_greedy will be called, 1 otherwise
1405 */
1406
1407int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
1408{
1409 struct greedy *gr;
1410 struct gfs2_holder *gh;
1411
1412 if (!time || gl->gl_sbd->sd_args.ar_localcaching ||
1413 test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
1414 return 1;
1415
1416 gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
1417 if (!gr) {
1418 clear_bit(GLF_GREEDY, &gl->gl_flags);
1419 return 1;
1420 }
1421 gh = &gr->gr_gh;
1422
1423 gfs2_holder_init(gl, 0, 0, gh);
1424 set_bit(HIF_GREEDY, &gh->gh_iflags);
1425 INIT_WORK(&gr->gr_work, greedy_work, gr);
1426
1427 set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1428 schedule_delayed_work(&gr->gr_work, time);
1429
1430 return 0;
1431}
1432
1433/**
1434 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1435 * @gh: the holder structure
1436 *
1437 */
1438
1439void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1440{
1441 gfs2_glock_dq(gh);
1442 gfs2_holder_uninit(gh);
1443}
1444
1445/**
1446 * gfs2_glock_nq_num - acquire a glock based on lock number
1447 * @sdp: the filesystem
1448 * @number: the lock number
1449 * @glops: the glock operations for the type of glock
1450 * @state: the state to acquire the glock in
1451 * @flags: modifier flags for the aquisition
1452 * @gh: the struct gfs2_holder
1453 *
1454 * Returns: errno
1455 */
1456
1457int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
1458 const struct gfs2_glock_operations *glops,
1459 unsigned int state, int flags, struct gfs2_holder *gh)
1460{
1461 struct gfs2_glock *gl;
1462 int error;
1463
1464 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1465 if (!error) {
1466 error = gfs2_glock_nq_init(gl, state, flags, gh);
1467 gfs2_glock_put(gl);
1468 }
1469
1470 return error;
1471}
1472
1473/**
1474 * glock_compare - Compare two struct gfs2_glock structures for sorting
1475 * @arg_a: the first structure
1476 * @arg_b: the second structure
1477 *
1478 */
1479
1480static int glock_compare(const void *arg_a, const void *arg_b)
1481{
1482 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1483 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1484 const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1485 const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1486
1487 if (a->ln_number > b->ln_number)
1488 return 1;
1489 if (a->ln_number < b->ln_number)
1490 return -1;
1491 if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
1492 return 1;
1493 if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL))
1494 return 1;
1495 return 0;
1496}
1497
1498/**
1499 * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1500 * @num_gh: the number of structures
1501 * @ghs: an array of struct gfs2_holder structures
1502 *
1503 * Returns: 0 on success (all glocks acquired),
1504 * errno on failure (no glocks acquired)
1505 */
1506
1507static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1508 struct gfs2_holder **p)
1509{
1510 unsigned int x;
1511 int error = 0;
1512
1513 for (x = 0; x < num_gh; x++)
1514 p[x] = &ghs[x];
1515
1516 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1517
1518 for (x = 0; x < num_gh; x++) {
1519 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1520
1521 error = gfs2_glock_nq(p[x]);
1522 if (error) {
1523 while (x--)
1524 gfs2_glock_dq(p[x]);
1525 break;
1526 }
1527 }
1528
1529 return error;
1530}
1531
1532/**
1533 * gfs2_glock_nq_m - acquire multiple glocks
1534 * @num_gh: the number of structures
1535 * @ghs: an array of struct gfs2_holder structures
1536 *
1537 * Figure out how big an impact this function has. Either:
1538 * 1) Replace this code with code that calls gfs2_glock_prefetch()
1539 * 2) Forget async stuff and just call nq_m_sync()
1540 * 3) Leave it like it is
1541 *
1542 * Returns: 0 on success (all glocks acquired),
1543 * errno on failure (no glocks acquired)
1544 */
1545
1546int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1547{
1548 int *e;
1549 unsigned int x;
1550 int borked = 0, serious = 0;
1551 int error = 0;
1552
1553 if (!num_gh)
1554 return 0;
1555
1556 if (num_gh == 1) {
1557 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1558 return gfs2_glock_nq(ghs);
1559 }
1560
1561 e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1562 if (!e)
1563 return -ENOMEM;
1564
1565 for (x = 0; x < num_gh; x++) {
1566 ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
1567 error = gfs2_glock_nq(&ghs[x]);
1568 if (error) {
1569 borked = 1;
1570 serious = error;
1571 num_gh = x;
1572 break;
1573 }
1574 }
1575
1576 for (x = 0; x < num_gh; x++) {
1577 error = e[x] = glock_wait_internal(&ghs[x]);
1578 if (error) {
1579 borked = 1;
1580 if (error != GLR_TRYFAILED && error != GLR_CANCELED)
1581 serious = error;
1582 }
1583 }
1584
1585 if (!borked) {
1586 kfree(e);
1587 return 0;
1588 }
1589
1590 for (x = 0; x < num_gh; x++)
1591 if (!e[x])
1592 gfs2_glock_dq(&ghs[x]);
1593
1594 if (serious)
1595 error = serious;
1596 else {
1597 for (x = 0; x < num_gh; x++)
1598 gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
1599 &ghs[x]);
1600 error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
1601 }
1602
1603 kfree(e);
1604
1605 return error;
1606}
1607
1608/**
1609 * gfs2_glock_dq_m - release multiple glocks
1610 * @num_gh: the number of structures
1611 * @ghs: an array of struct gfs2_holder structures
1612 *
1613 */
1614
1615void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1616{
1617 unsigned int x;
1618
1619 for (x = 0; x < num_gh; x++)
1620 gfs2_glock_dq(&ghs[x]);
1621}
1622
1623/**
1624 * gfs2_glock_dq_uninit_m - release multiple glocks
1625 * @num_gh: the number of structures
1626 * @ghs: an array of struct gfs2_holder structures
1627 *
1628 */
1629
1630void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1631{
1632 unsigned int x;
1633
1634 for (x = 0; x < num_gh; x++)
1635 gfs2_glock_dq_uninit(&ghs[x]);
1636}
1637
1638/**
1639 * gfs2_glock_prefetch_num - prefetch a glock based on lock number
1640 * @sdp: the filesystem
1641 * @number: the lock number
1642 * @glops: the glock operations for the type of glock
1643 * @state: the state to acquire the glock in
1644 * @flags: modifier flags for the aquisition
1645 *
1646 * Returns: errno
1647 */
1648
1649void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
1650 const struct gfs2_glock_operations *glops,
1651 unsigned int state, int flags)
1652{
1653 struct gfs2_glock *gl;
1654 int error;
1655
1656 if (atomic_read(&sdp->sd_reclaim_count) <
1657 gfs2_tune_get(sdp, gt_reclaim_limit)) {
1658 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1659 if (!error) {
1660 gfs2_glock_prefetch(gl, state, flags);
1661 gfs2_glock_put(gl);
1662 }
1663 }
1664}
1665
1666/**
1667 * gfs2_lvb_hold - attach a LVB from a glock
1668 * @gl: The glock in question
1669 *
1670 */
1671
1672int gfs2_lvb_hold(struct gfs2_glock *gl)
1673{
1674 int error;
1675
1676 gfs2_glmutex_lock(gl);
1677
1678 if (!atomic_read(&gl->gl_lvb_count)) {
1679 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1680 if (error) {
1681 gfs2_glmutex_unlock(gl);
1682 return error;
1683 }
1684 gfs2_glock_hold(gl);
1685 }
1686 atomic_inc(&gl->gl_lvb_count);
1687
1688 gfs2_glmutex_unlock(gl);
1689
1690 return 0;
1691}
1692
1693/**
1694 * gfs2_lvb_unhold - detach a LVB from a glock
1695 * @gl: The glock in question
1696 *
1697 */
1698
1699void gfs2_lvb_unhold(struct gfs2_glock *gl)
1700{
1701 gfs2_glock_hold(gl);
1702 gfs2_glmutex_lock(gl);
1703
1704 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1705 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1706 gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1707 gl->gl_lvb = NULL;
1708 gfs2_glock_put(gl);
1709 }
1710
1711 gfs2_glmutex_unlock(gl);
1712 gfs2_glock_put(gl);
1713}
1714
1715static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1716 unsigned int state)
1717{
1718 struct gfs2_glock *gl;
1719
1720 gl = gfs2_glock_find(sdp, name);
1721 if (!gl)
1722 return;
1723
1724 if (gl->gl_ops->go_callback)
1725 gl->gl_ops->go_callback(gl, state);
1726 handle_callback(gl, state);
1727
1728 spin_lock(&gl->gl_spin);
1729 run_queue(gl);
1730 spin_unlock(&gl->gl_spin);
1731
1732 gfs2_glock_put(gl);
1733}
1734
1735/**
1736 * gfs2_glock_cb - Callback used by locking module
1737 * @sdp: Pointer to the superblock
1738 * @type: Type of callback
1739 * @data: Type dependent data pointer
1740 *
1741 * Called by the locking module when it wants to tell us something.
1742 * Either we need to drop a lock, one of our ASYNC requests completed, or
1743 * a journal from another client needs to be recovered.
1744 */
1745
1746void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1747{
1748 struct gfs2_sbd *sdp = cb_data;
1749
1750 switch (type) {
1751 case LM_CB_NEED_E:
1752 blocking_cb(sdp, data, LM_ST_UNLOCKED);
1753 return;
1754
1755 case LM_CB_NEED_D:
1756 blocking_cb(sdp, data, LM_ST_DEFERRED);
1757 return;
1758
1759 case LM_CB_NEED_S:
1760 blocking_cb(sdp, data, LM_ST_SHARED);
1761 return;
1762
1763 case LM_CB_ASYNC: {
1764 struct lm_async_cb *async = data;
1765 struct gfs2_glock *gl;
1766
1767 gl = gfs2_glock_find(sdp, &async->lc_name);
1768 if (gfs2_assert_warn(sdp, gl))
1769 return;
1770 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1771 gl->gl_req_bh(gl, async->lc_ret);
1772 gfs2_glock_put(gl);
1773 return;
1774 }
1775
1776 case LM_CB_NEED_RECOVERY:
1777 gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
1778 if (sdp->sd_recoverd_process)
1779 wake_up_process(sdp->sd_recoverd_process);
1780 return;
1781
1782 case LM_CB_DROPLOCKS:
1783 gfs2_gl_hash_clear(sdp, NO_WAIT);
1784 gfs2_quota_scan(sdp);
1785 return;
1786
1787 default:
1788 gfs2_assert_warn(sdp, 0);
1789 return;
1790 }
1791}
1792
1793/**
1794 * demote_ok - Check to see if it's ok to unlock a glock
1795 * @gl: the glock
1796 *
1797 * Returns: 1 if it's ok
1798 */
1799
1800static int demote_ok(struct gfs2_glock *gl)
1801{
1802 struct gfs2_sbd *sdp = gl->gl_sbd;
1803 const struct gfs2_glock_operations *glops = gl->gl_ops;
1804 int demote = 1;
1805
1806 if (test_bit(GLF_STICKY, &gl->gl_flags))
1807 demote = 0;
1808 else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
1809 demote = time_after_eq(jiffies, gl->gl_stamp +
1810 gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
1811 else if (glops->go_demote_ok)
1812 demote = glops->go_demote_ok(gl);
1813
1814 return demote;
1815}
1816
1817/**
1818 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
1819 * @gl: the glock
1820 *
1821 */
1822
1823void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
1824{
1825 struct gfs2_sbd *sdp = gl->gl_sbd;
1826
1827 spin_lock(&sdp->sd_reclaim_lock);
1828 if (list_empty(&gl->gl_reclaim)) {
1829 gfs2_glock_hold(gl);
1830 list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
1831 atomic_inc(&sdp->sd_reclaim_count);
1832 }
1833 spin_unlock(&sdp->sd_reclaim_lock);
1834
1835 wake_up(&sdp->sd_reclaim_wq);
1836}
1837
1838/**
1839 * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
1840 * @sdp: the filesystem
1841 *
1842 * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
1843 * different glock and we notice that there are a lot of glocks in the
1844 * reclaim list.
1845 *
1846 */
1847
1848void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1849{
1850 struct gfs2_glock *gl;
1851
1852 spin_lock(&sdp->sd_reclaim_lock);
1853 if (list_empty(&sdp->sd_reclaim_list)) {
1854 spin_unlock(&sdp->sd_reclaim_lock);
1855 return;
1856 }
1857 gl = list_entry(sdp->sd_reclaim_list.next,
1858 struct gfs2_glock, gl_reclaim);
1859 list_del_init(&gl->gl_reclaim);
1860 spin_unlock(&sdp->sd_reclaim_lock);
1861
1862 atomic_dec(&sdp->sd_reclaim_count);
1863 atomic_inc(&sdp->sd_reclaimed);
1864
1865 if (gfs2_glmutex_trylock(gl)) {
1866 if (queue_empty(gl, &gl->gl_holders) &&
1867 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1868 handle_callback(gl, LM_ST_UNLOCKED);
1869 gfs2_glmutex_unlock(gl);
1870 }
1871
1872 gfs2_glock_put(gl);
1873}
1874
1875/**
1876 * examine_bucket - Call a function for glock in a hash bucket
1877 * @examiner: the function
1878 * @sdp: the filesystem
1879 * @bucket: the bucket
1880 *
1881 * Returns: 1 if the bucket has entries
1882 */
1883
1884static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
1885 unsigned int hash)
1886{
1887 struct gfs2_glock *gl, *prev = NULL;
1888 int has_entries = 0;
1889 struct hlist_head *head = &gl_hash_table[hash].hb_list;
1890
1891 read_lock(gl_lock_addr(hash));
1892 /* Can't use hlist_for_each_entry - don't want prefetch here */
1893 if (hlist_empty(head))
1894 goto out;
1895 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1896 while(1) {
1897 if (gl->gl_sbd == sdp) {
1898 gfs2_glock_hold(gl);
1899 read_unlock(gl_lock_addr(hash));
1900 if (prev)
1901 gfs2_glock_put(prev);
1902 prev = gl;
1903 examiner(gl);
1904 has_entries = 1;
1905 read_lock(gl_lock_addr(hash));
1906 }
1907 if (gl->gl_list.next == NULL)
1908 break;
1909 gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
1910 }
1911out:
1912 read_unlock(gl_lock_addr(hash));
1913 if (prev)
1914 gfs2_glock_put(prev);
1915 return has_entries;
1916}
1917
1918/**
1919 * scan_glock - look at a glock and see if we can reclaim it
1920 * @gl: the glock to look at
1921 *
1922 */
1923
1924static void scan_glock(struct gfs2_glock *gl)
1925{
1926 if (gl->gl_ops == &gfs2_inode_glops)
1927 return;
1928
1929 if (gfs2_glmutex_trylock(gl)) {
1930 if (queue_empty(gl, &gl->gl_holders) &&
1931 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1932 goto out_schedule;
1933 gfs2_glmutex_unlock(gl);
1934 }
1935 return;
1936
1937out_schedule:
1938 gfs2_glmutex_unlock(gl);
1939 gfs2_glock_schedule_for_reclaim(gl);
1940}
1941
1942/**
1943 * gfs2_scand_internal - Look for glocks and inodes to toss from memory
1944 * @sdp: the filesystem
1945 *
1946 */
1947
1948void gfs2_scand_internal(struct gfs2_sbd *sdp)
1949{
1950 unsigned int x;
1951
1952 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1953 examine_bucket(scan_glock, sdp, x);
1954}
1955
1956/**
1957 * clear_glock - look at a glock and see if we can free it from glock cache
1958 * @gl: the glock to look at
1959 *
1960 */
1961
1962static void clear_glock(struct gfs2_glock *gl)
1963{
1964 struct gfs2_sbd *sdp = gl->gl_sbd;
1965 int released;
1966
1967 spin_lock(&sdp->sd_reclaim_lock);
1968 if (!list_empty(&gl->gl_reclaim)) {
1969 list_del_init(&gl->gl_reclaim);
1970 atomic_dec(&sdp->sd_reclaim_count);
1971 spin_unlock(&sdp->sd_reclaim_lock);
1972 released = gfs2_glock_put(gl);
1973 gfs2_assert(sdp, !released);
1974 } else {
1975 spin_unlock(&sdp->sd_reclaim_lock);
1976 }
1977
1978 if (gfs2_glmutex_trylock(gl)) {
1979 if (queue_empty(gl, &gl->gl_holders) &&
1980 gl->gl_state != LM_ST_UNLOCKED)
1981 handle_callback(gl, LM_ST_UNLOCKED);
1982 gfs2_glmutex_unlock(gl);
1983 }
1984}
1985
1986/**
1987 * gfs2_gl_hash_clear - Empty out the glock hash table
1988 * @sdp: the filesystem
1989 * @wait: wait until it's all gone
1990 *
1991 * Called when unmounting the filesystem, or when inter-node lock manager
1992 * requests DROPLOCKS because it is running out of capacity.
1993 */
1994
1995void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
1996{
1997 unsigned long t;
1998 unsigned int x;
1999 int cont;
2000
2001 t = jiffies;
2002
2003 for (;;) {
2004 cont = 0;
2005 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2006 if (examine_bucket(clear_glock, sdp, x))
2007 cont = 1;
2008 }
2009
2010 if (!wait || !cont)
2011 break;
2012
2013 if (time_after_eq(jiffies,
2014 t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
2015 fs_warn(sdp, "Unmount seems to be stalled. "
2016 "Dumping lock state...\n");
2017 gfs2_dump_lockstate(sdp);
2018 t = jiffies;
2019 }
2020
2021 invalidate_inodes(sdp->sd_vfs);
2022 msleep(10);
2023 }
2024}
2025
2026/*
2027 * Diagnostic routines to help debug distributed deadlock
2028 */
2029
2030/**
2031 * dump_holder - print information about a glock holder
2032 * @str: a string naming the type of holder
2033 * @gh: the glock holder
2034 *
2035 * Returns: 0 on success, -ENOBUFS when we run out of space
2036 */
2037
2038static int dump_holder(char *str, struct gfs2_holder *gh)
2039{
2040 unsigned int x;
2041 int error = -ENOBUFS;
2042
2043 printk(KERN_INFO " %s\n", str);
2044 printk(KERN_INFO " owner = %ld\n",
2045 (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
2046 printk(KERN_INFO " gh_state = %u\n", gh->gh_state);
2047 printk(KERN_INFO " gh_flags =");
2048 for (x = 0; x < 32; x++)
2049 if (gh->gh_flags & (1 << x))
2050 printk(" %u", x);
2051 printk(" \n");
2052 printk(KERN_INFO " error = %d\n", gh->gh_error);
2053 printk(KERN_INFO " gh_iflags =");
2054 for (x = 0; x < 32; x++)
2055 if (test_bit(x, &gh->gh_iflags))
2056 printk(" %u", x);
2057 printk(" \n");
2058 print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip);
2059
2060 error = 0;
2061
2062 return error;
2063}
2064
2065/**
2066 * dump_inode - print information about an inode
2067 * @ip: the inode
2068 *
2069 * Returns: 0 on success, -ENOBUFS when we run out of space
2070 */
2071
2072static int dump_inode(struct gfs2_inode *ip)
2073{
2074 unsigned int x;
2075 int error = -ENOBUFS;
2076
2077 printk(KERN_INFO " Inode:\n");
2078 printk(KERN_INFO " num = %llu %llu\n",
2079 (unsigned long long)ip->i_num.no_formal_ino,
2080 (unsigned long long)ip->i_num.no_addr);
2081 printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode));
2082 printk(KERN_INFO " i_flags =");
2083 for (x = 0; x < 32; x++)
2084 if (test_bit(x, &ip->i_flags))
2085 printk(" %u", x);
2086 printk(" \n");
2087
2088 error = 0;
2089
2090 return error;
2091}
2092
2093/**
2094 * dump_glock - print information about a glock
2095 * @gl: the glock
2096 * @count: where we are in the buffer
2097 *
2098 * Returns: 0 on success, -ENOBUFS when we run out of space
2099 */
2100
2101static int dump_glock(struct gfs2_glock *gl)
2102{
2103 struct gfs2_holder *gh;
2104 unsigned int x;
2105 int error = -ENOBUFS;
2106
2107 spin_lock(&gl->gl_spin);
2108
2109 printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type,
2110 (unsigned long long)gl->gl_name.ln_number);
2111 printk(KERN_INFO " gl_flags =");
2112 for (x = 0; x < 32; x++) {
2113 if (test_bit(x, &gl->gl_flags))
2114 printk(" %u", x);
2115 }
2116 printk(" \n");
2117 printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref));
2118 printk(KERN_INFO " gl_state = %u\n", gl->gl_state);
2119 printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm);
2120 print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip);
2121 printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
2122 printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
2123 printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
2124 printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no");
2125 printk(KERN_INFO " le = %s\n",
2126 (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
2127 printk(KERN_INFO " reclaim = %s\n",
2128 (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
2129 if (gl->gl_aspace)
2130 printk(KERN_INFO " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace,
2131 gl->gl_aspace->i_mapping->nrpages);
2132 else
2133 printk(KERN_INFO " aspace = no\n");
2134 printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count));
2135 if (gl->gl_req_gh) {
2136 error = dump_holder("Request", gl->gl_req_gh);
2137 if (error)
2138 goto out;
2139 }
2140 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
2141 error = dump_holder("Holder", gh);
2142 if (error)
2143 goto out;
2144 }
2145 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
2146 error = dump_holder("Waiter1", gh);
2147 if (error)
2148 goto out;
2149 }
2150 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
2151 error = dump_holder("Waiter2", gh);
2152 if (error)
2153 goto out;
2154 }
2155 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
2156 error = dump_holder("Waiter3", gh);
2157 if (error)
2158 goto out;
2159 }
2160 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
2161 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
2162 list_empty(&gl->gl_holders)) {
2163 error = dump_inode(gl->gl_object);
2164 if (error)
2165 goto out;
2166 } else {
2167 error = -ENOBUFS;
2168 printk(KERN_INFO " Inode: busy\n");
2169 }
2170 }
2171
2172 error = 0;
2173
2174out:
2175 spin_unlock(&gl->gl_spin);
2176 return error;
2177}
2178
2179/**
2180 * gfs2_dump_lockstate - print out the current lockstate
2181 * @sdp: the filesystem
2182 * @ub: the buffer to copy the information into
2183 *
2184 * If @ub is NULL, dump the lockstate to the console.
2185 *
2186 */
2187
2188static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
2189{
2190 struct gfs2_glock *gl;
2191 struct hlist_node *h;
2192 unsigned int x;
2193 int error = 0;
2194
2195 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2196
2197 read_lock(gl_lock_addr(x));
2198
2199 hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
2200 if (gl->gl_sbd != sdp)
2201 continue;
2202
2203 error = dump_glock(gl);
2204 if (error)
2205 break;
2206 }
2207
2208 read_unlock(gl_lock_addr(x));
2209
2210 if (error)
2211 break;
2212 }
2213
2214
2215 return error;
2216}
2217
2218int __init gfs2_glock_init(void)
2219{
2220 unsigned i;
2221 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
2222 INIT_HLIST_HEAD(&gl_hash_table[i].hb_list);
2223 }
2224#ifdef GL_HASH_LOCK_SZ
2225 for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
2226 rwlock_init(&gl_hash_locks[i]);
2227 }
2228#endif
2229 return 0;
2230}
2231
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
new file mode 100644
index 000000000000..2b2a889ee2cc
--- /dev/null
+++ b/fs/gfs2/glock.h
@@ -0,0 +1,153 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __GLOCK_DOT_H__
11#define __GLOCK_DOT_H__
12
13#include "incore.h"
14
15/* Flags for lock requests; used in gfs2_holder gh_flag field.
16 From lm_interface.h:
17#define LM_FLAG_TRY 0x00000001
18#define LM_FLAG_TRY_1CB 0x00000002
19#define LM_FLAG_NOEXP 0x00000004
20#define LM_FLAG_ANY 0x00000008
21#define LM_FLAG_PRIORITY 0x00000010 */
22
23#define GL_LOCAL_EXCL 0x00000020
24#define GL_ASYNC 0x00000040
25#define GL_EXACT 0x00000080
26#define GL_SKIP 0x00000100
27#define GL_ATIME 0x00000200
28#define GL_NOCACHE 0x00000400
29#define GL_NOCANCEL 0x00001000
30#define GL_AOP 0x00004000
31#define GL_DUMP 0x00008000
32
33#define GLR_TRYFAILED 13
34#define GLR_CANCELED 14
35
36static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
37{
38 struct gfs2_holder *gh;
39 int locked = 0;
40
41 /* Look in glock's list of holders for one with current task as owner */
42 spin_lock(&gl->gl_spin);
43 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
44 if (gh->gh_owner == current) {
45 locked = 1;
46 break;
47 }
48 }
49 spin_unlock(&gl->gl_spin);
50
51 return locked;
52}
53
54static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
55{
56 return gl->gl_state == LM_ST_EXCLUSIVE;
57}
58
59static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
60{
61 return gl->gl_state == LM_ST_DEFERRED;
62}
63
64static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
65{
66 return gl->gl_state == LM_ST_SHARED;
67}
68
69static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
70{
71 int ret;
72 spin_lock(&gl->gl_spin);
73 ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
74 spin_unlock(&gl->gl_spin);
75 return ret;
76}
77
78int gfs2_glock_get(struct gfs2_sbd *sdp,
79 u64 number, const struct gfs2_glock_operations *glops,
80 int create, struct gfs2_glock **glp);
81void gfs2_glock_hold(struct gfs2_glock *gl);
82int gfs2_glock_put(struct gfs2_glock *gl);
83void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
84 struct gfs2_holder *gh);
85void gfs2_holder_reinit(unsigned int state, unsigned flags,
86 struct gfs2_holder *gh);
87void gfs2_holder_uninit(struct gfs2_holder *gh);
88
89void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
90void gfs2_glock_drop_th(struct gfs2_glock *gl);
91
92int gfs2_glock_nq(struct gfs2_holder *gh);
93int gfs2_glock_poll(struct gfs2_holder *gh);
94int gfs2_glock_wait(struct gfs2_holder *gh);
95void gfs2_glock_dq(struct gfs2_holder *gh);
96
97int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
98
99void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
100int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
101 u64 number, const struct gfs2_glock_operations *glops,
102 unsigned int state, int flags, struct gfs2_holder *gh);
103
104int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
105void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
106void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
107
108void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
109 const struct gfs2_glock_operations *glops,
110 unsigned int state, int flags);
111void gfs2_glock_inode_squish(struct inode *inode);
112
113/**
114 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
115 * @gl: the glock
116 * @state: the state we're requesting
117 * @flags: the modifier flags
118 * @gh: the holder structure
119 *
120 * Returns: 0, GLR_*, or errno
121 */
122
123static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
124 unsigned int state, int flags,
125 struct gfs2_holder *gh)
126{
127 int error;
128
129 gfs2_holder_init(gl, state, flags, gh);
130
131 error = gfs2_glock_nq(gh);
132 if (error)
133 gfs2_holder_uninit(gh);
134
135 return error;
136}
137
138/* Lock Value Block functions */
139
140int gfs2_lvb_hold(struct gfs2_glock *gl);
141void gfs2_lvb_unhold(struct gfs2_glock *gl);
142
143void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
144
145void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
146void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
147
148void gfs2_scand_internal(struct gfs2_sbd *sdp);
149void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
150
151int __init gfs2_glock_init(void);
152
153#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
new file mode 100644
index 000000000000..41a6b6818a50
--- /dev/null
+++ b/fs/gfs2/glops.c
@@ -0,0 +1,615 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "bmap.h"
21#include "glock.h"
22#include "glops.h"
23#include "inode.h"
24#include "log.h"
25#include "meta_io.h"
26#include "recovery.h"
27#include "rgrp.h"
28#include "util.h"
29#include "trans.h"
30
31/**
32 * ail_empty_gl - remove all buffers for a given lock from the AIL
33 * @gl: the glock
34 *
35 * None of the buffers should be dirty, locked, or pinned.
36 */
37
38static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
39{
40 struct gfs2_sbd *sdp = gl->gl_sbd;
41 unsigned int blocks;
42 struct list_head *head = &gl->gl_ail_list;
43 struct gfs2_bufdata *bd;
44 struct buffer_head *bh;
45 u64 blkno;
46 int error;
47
48 blocks = atomic_read(&gl->gl_ail_count);
49 if (!blocks)
50 return;
51
52 error = gfs2_trans_begin(sdp, 0, blocks);
53 if (gfs2_assert_withdraw(sdp, !error))
54 return;
55
56 gfs2_log_lock(sdp);
57 while (!list_empty(head)) {
58 bd = list_entry(head->next, struct gfs2_bufdata,
59 bd_ail_gl_list);
60 bh = bd->bd_bh;
61 blkno = bh->b_blocknr;
62 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
63
64 bd->bd_ail = NULL;
65 list_del(&bd->bd_ail_st_list);
66 list_del(&bd->bd_ail_gl_list);
67 atomic_dec(&gl->gl_ail_count);
68 brelse(bh);
69 gfs2_log_unlock(sdp);
70
71 gfs2_trans_add_revoke(sdp, blkno);
72
73 gfs2_log_lock(sdp);
74 }
75 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
76 gfs2_log_unlock(sdp);
77
78 gfs2_trans_end(sdp);
79 gfs2_log_flush(sdp, NULL);
80}
81
82/**
83 * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
84 * @gl: the glock
85 *
86 */
87
88static void gfs2_pte_inval(struct gfs2_glock *gl)
89{
90 struct gfs2_inode *ip;
91 struct inode *inode;
92
93 ip = gl->gl_object;
94 inode = &ip->i_inode;
95 if (!ip || !S_ISREG(ip->i_di.di_mode))
96 return;
97
98 if (!test_bit(GIF_PAGED, &ip->i_flags))
99 return;
100
101 unmap_shared_mapping_range(inode->i_mapping, 0, 0);
102
103 if (test_bit(GIF_SW_PAGED, &ip->i_flags))
104 set_bit(GLF_DIRTY, &gl->gl_flags);
105
106 clear_bit(GIF_SW_PAGED, &ip->i_flags);
107}
108
109/**
110 * gfs2_page_inval - Invalidate all pages associated with a glock
111 * @gl: the glock
112 *
113 */
114
115static void gfs2_page_inval(struct gfs2_glock *gl)
116{
117 struct gfs2_inode *ip;
118 struct inode *inode;
119
120 ip = gl->gl_object;
121 inode = &ip->i_inode;
122 if (!ip || !S_ISREG(ip->i_di.di_mode))
123 return;
124
125 truncate_inode_pages(inode->i_mapping, 0);
126 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages);
127 clear_bit(GIF_PAGED, &ip->i_flags);
128}
129
130/**
131 * gfs2_page_wait - Wait for writeback of data
132 * @gl: the glock
133 *
134 * Syncs data (not metadata) for a regular file.
135 * No-op for all other types.
136 */
137
138static void gfs2_page_wait(struct gfs2_glock *gl)
139{
140 struct gfs2_inode *ip = gl->gl_object;
141 struct inode *inode = &ip->i_inode;
142 struct address_space *mapping = inode->i_mapping;
143 int error;
144
145 if (!S_ISREG(ip->i_di.di_mode))
146 return;
147
148 error = filemap_fdatawait(mapping);
149
150 /* Put back any errors cleared by filemap_fdatawait()
151 so they can be caught by someone who can pass them
152 up to user space. */
153
154 if (error == -ENOSPC)
155 set_bit(AS_ENOSPC, &mapping->flags);
156 else if (error)
157 set_bit(AS_EIO, &mapping->flags);
158
159}
160
161static void gfs2_page_writeback(struct gfs2_glock *gl)
162{
163 struct gfs2_inode *ip = gl->gl_object;
164 struct inode *inode = &ip->i_inode;
165 struct address_space *mapping = inode->i_mapping;
166
167 if (!S_ISREG(ip->i_di.di_mode))
168 return;
169
170 filemap_fdatawrite(mapping);
171}
172
173/**
174 * meta_go_sync - sync out the metadata for this glock
175 * @gl: the glock
176 * @flags: DIO_*
177 *
178 * Called when demoting or unlocking an EX glock. We must flush
179 * to disk all dirty buffers/pages relating to this glock, and must not
180 * not return to caller to demote/unlock the glock until I/O is complete.
181 */
182
183static void meta_go_sync(struct gfs2_glock *gl, int flags)
184{
185 if (!(flags & DIO_METADATA))
186 return;
187
188 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
189 gfs2_log_flush(gl->gl_sbd, gl);
190 gfs2_meta_sync(gl);
191 if (flags & DIO_RELEASE)
192 gfs2_ail_empty_gl(gl);
193 }
194
195}
196
197/**
198 * meta_go_inval - invalidate the metadata for this glock
199 * @gl: the glock
200 * @flags:
201 *
202 */
203
204static void meta_go_inval(struct gfs2_glock *gl, int flags)
205{
206 if (!(flags & DIO_METADATA))
207 return;
208
209 gfs2_meta_inval(gl);
210 gl->gl_vn++;
211}
212
213/**
214 * inode_go_xmote_th - promote/demote a glock
215 * @gl: the glock
216 * @state: the requested state
217 * @flags:
218 *
219 */
220
221static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
222 int flags)
223{
224 if (gl->gl_state != LM_ST_UNLOCKED)
225 gfs2_pte_inval(gl);
226 gfs2_glock_xmote_th(gl, state, flags);
227}
228
229/**
230 * inode_go_xmote_bh - After promoting/demoting a glock
231 * @gl: the glock
232 *
233 */
234
235static void inode_go_xmote_bh(struct gfs2_glock *gl)
236{
237 struct gfs2_holder *gh = gl->gl_req_gh;
238 struct buffer_head *bh;
239 int error;
240
241 if (gl->gl_state != LM_ST_UNLOCKED &&
242 (!gh || !(gh->gh_flags & GL_SKIP))) {
243 error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh);
244 if (!error)
245 brelse(bh);
246 }
247}
248
249/**
250 * inode_go_drop_th - unlock a glock
251 * @gl: the glock
252 *
253 * Invoked from rq_demote().
254 * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
255 * is being purged from our node's glock cache; we're dropping lock.
256 */
257
258static void inode_go_drop_th(struct gfs2_glock *gl)
259{
260 gfs2_pte_inval(gl);
261 gfs2_glock_drop_th(gl);
262}
263
264/**
265 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
266 * @gl: the glock protecting the inode
267 * @flags:
268 *
269 */
270
271static void inode_go_sync(struct gfs2_glock *gl, int flags)
272{
273 int meta = (flags & DIO_METADATA);
274 int data = (flags & DIO_DATA);
275
276 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
277 if (meta && data) {
278 gfs2_page_writeback(gl);
279 gfs2_log_flush(gl->gl_sbd, gl);
280 gfs2_meta_sync(gl);
281 gfs2_page_wait(gl);
282 clear_bit(GLF_DIRTY, &gl->gl_flags);
283 } else if (meta) {
284 gfs2_log_flush(gl->gl_sbd, gl);
285 gfs2_meta_sync(gl);
286 } else if (data) {
287 gfs2_page_writeback(gl);
288 gfs2_page_wait(gl);
289 }
290 if (flags & DIO_RELEASE)
291 gfs2_ail_empty_gl(gl);
292 }
293}
294
295/**
296 * inode_go_inval - prepare a inode glock to be released
297 * @gl: the glock
298 * @flags:
299 *
300 */
301
302static void inode_go_inval(struct gfs2_glock *gl, int flags)
303{
304 int meta = (flags & DIO_METADATA);
305 int data = (flags & DIO_DATA);
306
307 if (meta) {
308 gfs2_meta_inval(gl);
309 gl->gl_vn++;
310 }
311 if (data)
312 gfs2_page_inval(gl);
313}
314
315/**
316 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
317 * @gl: the glock
318 *
319 * Returns: 1 if it's ok
320 */
321
322static int inode_go_demote_ok(struct gfs2_glock *gl)
323{
324 struct gfs2_sbd *sdp = gl->gl_sbd;
325 int demote = 0;
326
327 if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages)
328 demote = 1;
329 else if (!sdp->sd_args.ar_localcaching &&
330 time_after_eq(jiffies, gl->gl_stamp +
331 gfs2_tune_get(sdp, gt_demote_secs) * HZ))
332 demote = 1;
333
334 return demote;
335}
336
337/**
338 * inode_go_lock - operation done after an inode lock is locked by a process
339 * @gl: the glock
340 * @flags:
341 *
342 * Returns: errno
343 */
344
345static int inode_go_lock(struct gfs2_holder *gh)
346{
347 struct gfs2_glock *gl = gh->gh_gl;
348 struct gfs2_inode *ip = gl->gl_object;
349 int error = 0;
350
351 if (!ip)
352 return 0;
353
354 if (ip->i_vn != gl->gl_vn) {
355 error = gfs2_inode_refresh(ip);
356 if (error)
357 return error;
358 gfs2_inode_attr_in(ip);
359 }
360
361 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
362 (gl->gl_state == LM_ST_EXCLUSIVE) &&
363 (gh->gh_flags & GL_LOCAL_EXCL))
364 error = gfs2_truncatei_resume(ip);
365
366 return error;
367}
368
369/**
370 * inode_go_unlock - operation done before an inode lock is unlocked by a
371 * process
372 * @gl: the glock
373 * @flags:
374 *
375 */
376
377static void inode_go_unlock(struct gfs2_holder *gh)
378{
379 struct gfs2_glock *gl = gh->gh_gl;
380 struct gfs2_inode *ip = gl->gl_object;
381
382 if (ip == NULL)
383 return;
384 if (test_bit(GLF_DIRTY, &gl->gl_flags))
385 gfs2_inode_attr_in(ip);
386 gfs2_meta_cache_flush(ip);
387}
388
389/**
390 * inode_greedy -
391 * @gl: the glock
392 *
393 */
394
395static void inode_greedy(struct gfs2_glock *gl)
396{
397 struct gfs2_sbd *sdp = gl->gl_sbd;
398 struct gfs2_inode *ip = gl->gl_object;
399 unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
400 unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
401 unsigned int new_time;
402
403 spin_lock(&ip->i_spin);
404
405 if (time_after(ip->i_last_pfault + quantum, jiffies)) {
406 new_time = ip->i_greedy + quantum;
407 if (new_time > max)
408 new_time = max;
409 } else {
410 new_time = ip->i_greedy - quantum;
411 if (!new_time || new_time > max)
412 new_time = 1;
413 }
414
415 ip->i_greedy = new_time;
416
417 spin_unlock(&ip->i_spin);
418
419 iput(&ip->i_inode);
420}
421
422/**
423 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
424 * @gl: the glock
425 *
426 * Returns: 1 if it's ok
427 */
428
429static int rgrp_go_demote_ok(struct gfs2_glock *gl)
430{
431 return !gl->gl_aspace->i_mapping->nrpages;
432}
433
434/**
435 * rgrp_go_lock - operation done after an rgrp lock is locked by
436 * a first holder on this node.
437 * @gl: the glock
438 * @flags:
439 *
440 * Returns: errno
441 */
442
443static int rgrp_go_lock(struct gfs2_holder *gh)
444{
445 return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
446}
447
448/**
449 * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
450 * a last holder on this node.
451 * @gl: the glock
452 * @flags:
453 *
454 */
455
456static void rgrp_go_unlock(struct gfs2_holder *gh)
457{
458 gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
459}
460
461/**
462 * trans_go_xmote_th - promote/demote the transaction glock
463 * @gl: the glock
464 * @state: the requested state
465 * @flags:
466 *
467 */
468
469static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
470 int flags)
471{
472 struct gfs2_sbd *sdp = gl->gl_sbd;
473
474 if (gl->gl_state != LM_ST_UNLOCKED &&
475 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
476 gfs2_meta_syncfs(sdp);
477 gfs2_log_shutdown(sdp);
478 }
479
480 gfs2_glock_xmote_th(gl, state, flags);
481}
482
483/**
484 * trans_go_xmote_bh - After promoting/demoting the transaction glock
485 * @gl: the glock
486 *
487 */
488
489static void trans_go_xmote_bh(struct gfs2_glock *gl)
490{
491 struct gfs2_sbd *sdp = gl->gl_sbd;
492 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
493 struct gfs2_glock *j_gl = ip->i_gl;
494 struct gfs2_log_header head;
495 int error;
496
497 if (gl->gl_state != LM_ST_UNLOCKED &&
498 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
499 gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode));
500 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
501
502 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
503 if (error)
504 gfs2_consist(sdp);
505 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
506 gfs2_consist(sdp);
507
508 /* Initialize some head of the log stuff */
509 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
510 sdp->sd_log_sequence = head.lh_sequence + 1;
511 gfs2_log_pointers_init(sdp, head.lh_blkno);
512 }
513 }
514}
515
516/**
517 * trans_go_drop_th - unlock the transaction glock
518 * @gl: the glock
519 *
520 * We want to sync the device even with localcaching. Remember
521 * that localcaching journal replay only marks buffers dirty.
522 */
523
524static void trans_go_drop_th(struct gfs2_glock *gl)
525{
526 struct gfs2_sbd *sdp = gl->gl_sbd;
527
528 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
529 gfs2_meta_syncfs(sdp);
530 gfs2_log_shutdown(sdp);
531 }
532
533 gfs2_glock_drop_th(gl);
534}
535
536/**
537 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
538 * @gl: the glock
539 *
540 * Returns: 1 if it's ok
541 */
542
543static int quota_go_demote_ok(struct gfs2_glock *gl)
544{
545 return !atomic_read(&gl->gl_lvb_count);
546}
547
548const struct gfs2_glock_operations gfs2_meta_glops = {
549 .go_xmote_th = gfs2_glock_xmote_th,
550 .go_drop_th = gfs2_glock_drop_th,
551 .go_type = LM_TYPE_META,
552};
553
554const struct gfs2_glock_operations gfs2_inode_glops = {
555 .go_xmote_th = inode_go_xmote_th,
556 .go_xmote_bh = inode_go_xmote_bh,
557 .go_drop_th = inode_go_drop_th,
558 .go_sync = inode_go_sync,
559 .go_inval = inode_go_inval,
560 .go_demote_ok = inode_go_demote_ok,
561 .go_lock = inode_go_lock,
562 .go_unlock = inode_go_unlock,
563 .go_greedy = inode_greedy,
564 .go_type = LM_TYPE_INODE,
565};
566
567const struct gfs2_glock_operations gfs2_rgrp_glops = {
568 .go_xmote_th = gfs2_glock_xmote_th,
569 .go_drop_th = gfs2_glock_drop_th,
570 .go_sync = meta_go_sync,
571 .go_inval = meta_go_inval,
572 .go_demote_ok = rgrp_go_demote_ok,
573 .go_lock = rgrp_go_lock,
574 .go_unlock = rgrp_go_unlock,
575 .go_type = LM_TYPE_RGRP,
576};
577
578const struct gfs2_glock_operations gfs2_trans_glops = {
579 .go_xmote_th = trans_go_xmote_th,
580 .go_xmote_bh = trans_go_xmote_bh,
581 .go_drop_th = trans_go_drop_th,
582 .go_type = LM_TYPE_NONDISK,
583};
584
585const struct gfs2_glock_operations gfs2_iopen_glops = {
586 .go_xmote_th = gfs2_glock_xmote_th,
587 .go_drop_th = gfs2_glock_drop_th,
588 .go_type = LM_TYPE_IOPEN,
589};
590
591const struct gfs2_glock_operations gfs2_flock_glops = {
592 .go_xmote_th = gfs2_glock_xmote_th,
593 .go_drop_th = gfs2_glock_drop_th,
594 .go_type = LM_TYPE_FLOCK,
595};
596
597const struct gfs2_glock_operations gfs2_nondisk_glops = {
598 .go_xmote_th = gfs2_glock_xmote_th,
599 .go_drop_th = gfs2_glock_drop_th,
600 .go_type = LM_TYPE_NONDISK,
601};
602
603const struct gfs2_glock_operations gfs2_quota_glops = {
604 .go_xmote_th = gfs2_glock_xmote_th,
605 .go_drop_th = gfs2_glock_drop_th,
606 .go_demote_ok = quota_go_demote_ok,
607 .go_type = LM_TYPE_QUOTA,
608};
609
610const struct gfs2_glock_operations gfs2_journal_glops = {
611 .go_xmote_th = gfs2_glock_xmote_th,
612 .go_drop_th = gfs2_glock_drop_th,
613 .go_type = LM_TYPE_JOURNAL,
614};
615
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
new file mode 100644
index 000000000000..a1d9b5b024e6
--- /dev/null
+++ b/fs/gfs2/glops.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __GLOPS_DOT_H__
11#define __GLOPS_DOT_H__
12
13#include "incore.h"
14
15extern const struct gfs2_glock_operations gfs2_meta_glops;
16extern const struct gfs2_glock_operations gfs2_inode_glops;
17extern const struct gfs2_glock_operations gfs2_rgrp_glops;
18extern const struct gfs2_glock_operations gfs2_trans_glops;
19extern const struct gfs2_glock_operations gfs2_iopen_glops;
20extern const struct gfs2_glock_operations gfs2_flock_glops;
21extern const struct gfs2_glock_operations gfs2_nondisk_glops;
22extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops;
24
25#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
new file mode 100644
index 000000000000..118dc693d111
--- /dev/null
+++ b/fs/gfs2/incore.h
@@ -0,0 +1,634 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __INCORE_DOT_H__
11#define __INCORE_DOT_H__
12
13#include <linux/fs.h>
14
15#define DIO_WAIT 0x00000010
16#define DIO_METADATA 0x00000020
17#define DIO_DATA 0x00000040
18#define DIO_RELEASE 0x00000080
19#define DIO_ALL 0x00000100
20
21struct gfs2_log_operations;
22struct gfs2_log_element;
23struct gfs2_holder;
24struct gfs2_glock;
25struct gfs2_quota_data;
26struct gfs2_trans;
27struct gfs2_ail;
28struct gfs2_jdesc;
29struct gfs2_sbd;
30
31typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
32
33/*
34 * Structure of operations that are associated with each
35 * type of element in the log.
36 */
37
38struct gfs2_log_operations {
39 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
40 void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
41 void (*lo_before_commit) (struct gfs2_sbd *sdp);
42 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
43 void (*lo_before_scan) (struct gfs2_jdesc *jd,
44 struct gfs2_log_header *head, int pass);
45 int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
46 struct gfs2_log_descriptor *ld, __be64 *ptr,
47 int pass);
48 void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
49 const char *lo_name;
50};
51
52struct gfs2_log_element {
53 struct list_head le_list;
54 const struct gfs2_log_operations *le_ops;
55};
56
57struct gfs2_bitmap {
58 struct buffer_head *bi_bh;
59 char *bi_clone;
60 u32 bi_offset;
61 u32 bi_start;
62 u32 bi_len;
63};
64
65struct gfs2_rgrpd {
66 struct list_head rd_list; /* Link with superblock */
67 struct list_head rd_list_mru;
68 struct list_head rd_recent; /* Recently used rgrps */
69 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
70 struct gfs2_rindex rd_ri;
71 struct gfs2_rgrp rd_rg;
72 u64 rd_rg_vn;
73 struct gfs2_bitmap *rd_bits;
74 unsigned int rd_bh_count;
75 struct mutex rd_mutex;
76 u32 rd_free_clone;
77 struct gfs2_log_element rd_le;
78 u32 rd_last_alloc_data;
79 u32 rd_last_alloc_meta;
80 struct gfs2_sbd *rd_sbd;
81};
82
83enum gfs2_state_bits {
84 BH_Pinned = BH_PrivateStart,
85 BH_Escaped = BH_PrivateStart + 1,
86};
87
88BUFFER_FNS(Pinned, pinned)
89TAS_BUFFER_FNS(Pinned, pinned)
90BUFFER_FNS(Escaped, escaped)
91TAS_BUFFER_FNS(Escaped, escaped)
92
93struct gfs2_bufdata {
94 struct buffer_head *bd_bh;
95 struct gfs2_glock *bd_gl;
96
97 struct list_head bd_list_tr;
98 struct gfs2_log_element bd_le;
99
100 struct gfs2_ail *bd_ail;
101 struct list_head bd_ail_st_list;
102 struct list_head bd_ail_gl_list;
103};
104
105struct gfs2_glock_operations {
106 void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
107 int flags);
108 void (*go_xmote_bh) (struct gfs2_glock * gl);
109 void (*go_drop_th) (struct gfs2_glock * gl);
110 void (*go_drop_bh) (struct gfs2_glock * gl);
111 void (*go_sync) (struct gfs2_glock * gl, int flags);
112 void (*go_inval) (struct gfs2_glock * gl, int flags);
113 int (*go_demote_ok) (struct gfs2_glock * gl);
114 int (*go_lock) (struct gfs2_holder * gh);
115 void (*go_unlock) (struct gfs2_holder * gh);
116 void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
117 void (*go_greedy) (struct gfs2_glock * gl);
118 const int go_type;
119};
120
121enum {
122 /* Actions */
123 HIF_MUTEX = 0,
124 HIF_PROMOTE = 1,
125 HIF_DEMOTE = 2,
126 HIF_GREEDY = 3,
127
128 /* States */
129 HIF_ALLOCED = 4,
130 HIF_DEALLOC = 5,
131 HIF_HOLDER = 6,
132 HIF_FIRST = 7,
133 HIF_ABORTED = 9,
134};
135
136struct gfs2_holder {
137 struct list_head gh_list;
138
139 struct gfs2_glock *gh_gl;
140 struct task_struct *gh_owner;
141 unsigned int gh_state;
142 unsigned gh_flags;
143
144 int gh_error;
145 unsigned long gh_iflags;
146 struct completion gh_wait;
147 unsigned long gh_ip;
148};
149
150enum {
151 GLF_LOCK = 1,
152 GLF_STICKY = 2,
153 GLF_PREFETCH = 3,
154 GLF_DIRTY = 5,
155 GLF_SKIP_WAITERS2 = 6,
156 GLF_GREEDY = 7,
157};
158
159struct gfs2_glock {
160 struct hlist_node gl_list;
161 unsigned long gl_flags; /* GLF_... */
162 struct lm_lockname gl_name;
163 atomic_t gl_ref;
164
165 spinlock_t gl_spin;
166
167 unsigned int gl_state;
168 unsigned int gl_hash;
169 struct task_struct *gl_owner;
170 unsigned long gl_ip;
171 struct list_head gl_holders;
172 struct list_head gl_waiters1; /* HIF_MUTEX */
173 struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
174 struct list_head gl_waiters3; /* HIF_PROMOTE */
175
176 const struct gfs2_glock_operations *gl_ops;
177
178 struct gfs2_holder *gl_req_gh;
179 gfs2_glop_bh_t gl_req_bh;
180
181 void *gl_lock;
182 char *gl_lvb;
183 atomic_t gl_lvb_count;
184
185 u64 gl_vn;
186 unsigned long gl_stamp;
187 void *gl_object;
188
189 struct list_head gl_reclaim;
190
191 struct gfs2_sbd *gl_sbd;
192
193 struct inode *gl_aspace;
194 struct gfs2_log_element gl_le;
195 struct list_head gl_ail_list;
196 atomic_t gl_ail_count;
197};
198
199struct gfs2_alloc {
200 /* Quota stuff */
201
202 struct gfs2_quota_data *al_qd[2*MAXQUOTAS];
203 struct gfs2_holder al_qd_ghs[2*MAXQUOTAS];
204 unsigned int al_qd_num;
205
206 u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */
207 u32 al_alloced; /* Filled in by gfs2_alloc_*() */
208
209 /* Filled in by gfs2_inplace_reserve() */
210
211 unsigned int al_line;
212 char *al_file;
213 struct gfs2_holder al_ri_gh;
214 struct gfs2_holder al_rgd_gh;
215 struct gfs2_rgrpd *al_rgd;
216
217};
218
219enum {
220 GIF_QD_LOCKED = 1,
221 GIF_PAGED = 2,
222 GIF_SW_PAGED = 3,
223};
224
225struct gfs2_inode {
226 struct inode i_inode;
227 struct gfs2_inum i_num;
228
229 unsigned long i_flags; /* GIF_... */
230
231 u64 i_vn;
232 struct gfs2_dinode i_di; /* To be replaced by ref to block */
233
234 struct gfs2_glock *i_gl; /* Move into i_gh? */
235 struct gfs2_holder i_iopen_gh;
236 struct gfs2_holder i_gh; /* for prepare/commit_write only */
237 struct gfs2_alloc i_alloc;
238 u64 i_last_rg_alloc;
239
240 spinlock_t i_spin;
241 struct rw_semaphore i_rw_mutex;
242 unsigned int i_greedy;
243 unsigned long i_last_pfault;
244
245 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
246};
247
248/*
249 * Since i_inode is the first element of struct gfs2_inode,
250 * this is effectively a cast.
251 */
252static inline struct gfs2_inode *GFS2_I(struct inode *inode)
253{
254 return container_of(inode, struct gfs2_inode, i_inode);
255}
256
257/* To be removed? */
258static inline struct gfs2_sbd *GFS2_SB(struct inode *inode)
259{
260 return inode->i_sb->s_fs_info;
261}
262
263enum {
264 GFF_DID_DIRECT_ALLOC = 0,
265 GFF_EXLOCK = 1,
266};
267
268struct gfs2_file {
269 unsigned long f_flags; /* GFF_... */
270 struct mutex f_fl_mutex;
271 struct gfs2_holder f_fl_gh;
272};
273
274struct gfs2_revoke {
275 struct gfs2_log_element rv_le;
276 u64 rv_blkno;
277};
278
279struct gfs2_revoke_replay {
280 struct list_head rr_list;
281 u64 rr_blkno;
282 unsigned int rr_where;
283};
284
285enum {
286 QDF_USER = 0,
287 QDF_CHANGE = 1,
288 QDF_LOCKED = 2,
289};
290
291struct gfs2_quota_lvb {
292 __be32 qb_magic;
293 u32 __pad;
294 __be64 qb_limit; /* Hard limit of # blocks to alloc */
295 __be64 qb_warn; /* Warn user when alloc is above this # */
296 __be64 qb_value; /* Current # blocks allocated */
297};
298
299struct gfs2_quota_data {
300 struct list_head qd_list;
301 unsigned int qd_count;
302
303 u32 qd_id;
304 unsigned long qd_flags; /* QDF_... */
305
306 s64 qd_change;
307 s64 qd_change_sync;
308
309 unsigned int qd_slot;
310 unsigned int qd_slot_count;
311
312 struct buffer_head *qd_bh;
313 struct gfs2_quota_change *qd_bh_qc;
314 unsigned int qd_bh_count;
315
316 struct gfs2_glock *qd_gl;
317 struct gfs2_quota_lvb qd_qb;
318
319 u64 qd_sync_gen;
320 unsigned long qd_last_warn;
321 unsigned long qd_last_touched;
322};
323
324struct gfs2_log_buf {
325 struct list_head lb_list;
326 struct buffer_head *lb_bh;
327 struct buffer_head *lb_real;
328};
329
330struct gfs2_trans {
331 unsigned long tr_ip;
332
333 unsigned int tr_blocks;
334 unsigned int tr_revokes;
335 unsigned int tr_reserved;
336
337 struct gfs2_holder tr_t_gh;
338
339 int tr_touched;
340
341 unsigned int tr_num_buf;
342 unsigned int tr_num_buf_new;
343 unsigned int tr_num_buf_rm;
344 struct list_head tr_list_buf;
345
346 unsigned int tr_num_revoke;
347 unsigned int tr_num_revoke_rm;
348};
349
350struct gfs2_ail {
351 struct list_head ai_list;
352
353 unsigned int ai_first;
354 struct list_head ai_ail1_list;
355 struct list_head ai_ail2_list;
356
357 u64 ai_sync_gen;
358};
359
360struct gfs2_jdesc {
361 struct list_head jd_list;
362
363 struct inode *jd_inode;
364 unsigned int jd_jid;
365 int jd_dirty;
366
367 unsigned int jd_blocks;
368};
369
370#define GFS2_GLOCKD_DEFAULT 1
371#define GFS2_GLOCKD_MAX 16
372
373#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF
374#define GFS2_QUOTA_OFF 0
375#define GFS2_QUOTA_ACCOUNT 1
376#define GFS2_QUOTA_ON 2
377
378#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
379#define GFS2_DATA_WRITEBACK 1
380#define GFS2_DATA_ORDERED 2
381
382struct gfs2_args {
383 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
384 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
385 char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
386 int ar_spectator; /* Don't get a journal because we're always RO */
387 int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
388 int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
389 int ar_localcaching; /* Local-style caching (dangerous on multihost) */
390 int ar_debug; /* Oops on errors instead of trying to be graceful */
391 int ar_upgrade; /* Upgrade ondisk/multihost format */
392 unsigned int ar_num_glockd; /* Number of glockd threads */
393 int ar_posix_acl; /* Enable posix acls */
394 int ar_quota; /* off/account/on */
395 int ar_suiddir; /* suiddir support */
396 int ar_data; /* ordered/writeback */
397};
398
399struct gfs2_tune {
400 spinlock_t gt_spin;
401
402 unsigned int gt_ilimit;
403 unsigned int gt_ilimit_tries;
404 unsigned int gt_ilimit_min;
405 unsigned int gt_demote_secs; /* Cache retention for unheld glock */
406 unsigned int gt_incore_log_blocks;
407 unsigned int gt_log_flush_secs;
408 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
409
410 unsigned int gt_scand_secs;
411 unsigned int gt_recoverd_secs;
412 unsigned int gt_logd_secs;
413 unsigned int gt_quotad_secs;
414
415 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
416 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
417 unsigned int gt_quota_scale_num; /* Numerator */
418 unsigned int gt_quota_scale_den; /* Denominator */
419 unsigned int gt_quota_cache_secs;
420 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
421 unsigned int gt_atime_quantum; /* Min secs between atime updates */
422 unsigned int gt_new_files_jdata;
423 unsigned int gt_new_files_directio;
424 unsigned int gt_max_atomic_write; /* Split big writes into this size */
425 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
426 unsigned int gt_lockdump_size;
427 unsigned int gt_stall_secs; /* Detects trouble! */
428 unsigned int gt_complain_secs;
429 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
430 unsigned int gt_entries_per_readdir;
431 unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
432 unsigned int gt_greedy_default;
433 unsigned int gt_greedy_quantum;
434 unsigned int gt_greedy_max;
435 unsigned int gt_statfs_quantum;
436 unsigned int gt_statfs_slow;
437};
438
439enum {
440 SDF_JOURNAL_CHECKED = 0,
441 SDF_JOURNAL_LIVE = 1,
442 SDF_SHUTDOWN = 2,
443 SDF_NOATIME = 3,
444};
445
446#define GFS2_FSNAME_LEN 256
447
448struct gfs2_sbd {
449 struct super_block *sd_vfs;
450 struct super_block *sd_vfs_meta;
451 struct kobject sd_kobj;
452 unsigned long sd_flags; /* SDF_... */
453 struct gfs2_sb sd_sb;
454
455 /* Constants computed on mount */
456
457 u32 sd_fsb2bb;
458 u32 sd_fsb2bb_shift;
459 u32 sd_diptrs; /* Number of pointers in a dinode */
460 u32 sd_inptrs; /* Number of pointers in a indirect block */
461 u32 sd_jbsize; /* Size of a journaled data block */
462 u32 sd_hash_bsize; /* sizeof(exhash block) */
463 u32 sd_hash_bsize_shift;
464 u32 sd_hash_ptrs; /* Number of pointers in a hash block */
465 u32 sd_qc_per_block;
466 u32 sd_max_dirres; /* Max blocks needed to add a directory entry */
467 u32 sd_max_height; /* Max height of a file's metadata tree */
468 u64 sd_heightsize[GFS2_MAX_META_HEIGHT];
469 u32 sd_max_jheight; /* Max height of journaled file's meta tree */
470 u64 sd_jheightsize[GFS2_MAX_META_HEIGHT];
471
472 struct gfs2_args sd_args; /* Mount arguments */
473 struct gfs2_tune sd_tune; /* Filesystem tuning structure */
474
475 /* Lock Stuff */
476
477 struct lm_lockstruct sd_lockstruct;
478 struct list_head sd_reclaim_list;
479 spinlock_t sd_reclaim_lock;
480 wait_queue_head_t sd_reclaim_wq;
481 atomic_t sd_reclaim_count;
482 struct gfs2_holder sd_live_gh;
483 struct gfs2_glock *sd_rename_gl;
484 struct gfs2_glock *sd_trans_gl;
485
486 /* Inode Stuff */
487
488 struct inode *sd_master_dir;
489 struct inode *sd_jindex;
490 struct inode *sd_inum_inode;
491 struct inode *sd_statfs_inode;
492 struct inode *sd_ir_inode;
493 struct inode *sd_sc_inode;
494 struct inode *sd_qc_inode;
495 struct inode *sd_rindex;
496 struct inode *sd_quota_inode;
497
498 /* Inum stuff */
499
500 struct mutex sd_inum_mutex;
501
502 /* StatFS stuff */
503
504 spinlock_t sd_statfs_spin;
505 struct mutex sd_statfs_mutex;
506 struct gfs2_statfs_change sd_statfs_master;
507 struct gfs2_statfs_change sd_statfs_local;
508 unsigned long sd_statfs_sync_time;
509
510 /* Resource group stuff */
511
512 u64 sd_rindex_vn;
513 spinlock_t sd_rindex_spin;
514 struct mutex sd_rindex_mutex;
515 struct list_head sd_rindex_list;
516 struct list_head sd_rindex_mru_list;
517 struct list_head sd_rindex_recent_list;
518 struct gfs2_rgrpd *sd_rindex_forward;
519 unsigned int sd_rgrps;
520
521 /* Journal index stuff */
522
523 struct list_head sd_jindex_list;
524 spinlock_t sd_jindex_spin;
525 struct mutex sd_jindex_mutex;
526 unsigned int sd_journals;
527 unsigned long sd_jindex_refresh_time;
528
529 struct gfs2_jdesc *sd_jdesc;
530 struct gfs2_holder sd_journal_gh;
531 struct gfs2_holder sd_jinode_gh;
532
533 struct gfs2_holder sd_ir_gh;
534 struct gfs2_holder sd_sc_gh;
535 struct gfs2_holder sd_qc_gh;
536
537 /* Daemon stuff */
538
539 struct task_struct *sd_scand_process;
540 struct task_struct *sd_recoverd_process;
541 struct task_struct *sd_logd_process;
542 struct task_struct *sd_quotad_process;
543 struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
544 unsigned int sd_glockd_num;
545
546 /* Quota stuff */
547
548 struct list_head sd_quota_list;
549 atomic_t sd_quota_count;
550 spinlock_t sd_quota_spin;
551 struct mutex sd_quota_mutex;
552
553 unsigned int sd_quota_slots;
554 unsigned int sd_quota_chunks;
555 unsigned char **sd_quota_bitmap;
556
557 u64 sd_quota_sync_gen;
558 unsigned long sd_quota_sync_time;
559
560 /* Log stuff */
561
562 spinlock_t sd_log_lock;
563
564 unsigned int sd_log_blks_reserved;
565 unsigned int sd_log_commited_buf;
566 unsigned int sd_log_commited_revoke;
567
568 unsigned int sd_log_num_gl;
569 unsigned int sd_log_num_buf;
570 unsigned int sd_log_num_revoke;
571 unsigned int sd_log_num_rg;
572 unsigned int sd_log_num_databuf;
573 unsigned int sd_log_num_jdata;
574 unsigned int sd_log_num_hdrs;
575
576 struct list_head sd_log_le_gl;
577 struct list_head sd_log_le_buf;
578 struct list_head sd_log_le_revoke;
579 struct list_head sd_log_le_rg;
580 struct list_head sd_log_le_databuf;
581
582 unsigned int sd_log_blks_free;
583 struct mutex sd_log_reserve_mutex;
584
585 u64 sd_log_sequence;
586 unsigned int sd_log_head;
587 unsigned int sd_log_tail;
588 int sd_log_idle;
589
590 unsigned long sd_log_flush_time;
591 struct rw_semaphore sd_log_flush_lock;
592 struct list_head sd_log_flush_list;
593
594 unsigned int sd_log_flush_head;
595 u64 sd_log_flush_wrapped;
596
597 struct list_head sd_ail1_list;
598 struct list_head sd_ail2_list;
599 u64 sd_ail_sync_gen;
600
601 /* Replay stuff */
602
603 struct list_head sd_revoke_list;
604 unsigned int sd_replay_tail;
605
606 unsigned int sd_found_blocks;
607 unsigned int sd_found_revokes;
608 unsigned int sd_replayed_blocks;
609
610 /* For quiescing the filesystem */
611
612 struct gfs2_holder sd_freeze_gh;
613 struct mutex sd_freeze_lock;
614 unsigned int sd_freeze_count;
615
616 /* Counters */
617
618 atomic_t sd_glock_count;
619 atomic_t sd_glock_held_count;
620 atomic_t sd_inode_count;
621 atomic_t sd_reclaimed;
622
623 char sd_fsname[GFS2_FSNAME_LEN];
624 char sd_table_name[GFS2_FSNAME_LEN];
625 char sd_proto_name[GFS2_FSNAME_LEN];
626
627 /* Debugging crud */
628
629 unsigned long sd_last_warning;
630 struct vfsmount *sd_gfs2mnt;
631};
632
633#endif /* __INCORE_DOT_H__ */
634
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
new file mode 100644
index 000000000000..57c43ac47925
--- /dev/null
+++ b/fs/gfs2/inode.c
@@ -0,0 +1,1379 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h>
19#include <linux/lm_interface.h>
20#include <linux/security.h>
21
22#include "gfs2.h"
23#include "incore.h"
24#include "acl.h"
25#include "bmap.h"
26#include "dir.h"
27#include "eattr.h"
28#include "glock.h"
29#include "glops.h"
30#include "inode.h"
31#include "log.h"
32#include "meta_io.h"
33#include "ops_address.h"
34#include "ops_file.h"
35#include "ops_inode.h"
36#include "quota.h"
37#include "rgrp.h"
38#include "trans.h"
39#include "util.h"
40
41/**
42 * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
43 * @ip: The GFS2 inode (with embedded disk inode data)
44 * @inode: The Linux VFS inode
45 *
46 */
47
48void gfs2_inode_attr_in(struct gfs2_inode *ip)
49{
50 struct inode *inode = &ip->i_inode;
51 struct gfs2_dinode *di = &ip->i_di;
52
53 inode->i_ino = ip->i_num.no_addr;
54
55 switch (di->di_mode & S_IFMT) {
56 case S_IFBLK:
57 case S_IFCHR:
58 inode->i_rdev = MKDEV(di->di_major, di->di_minor);
59 break;
60 default:
61 inode->i_rdev = 0;
62 break;
63 };
64
65 inode->i_mode = di->di_mode;
66 inode->i_nlink = di->di_nlink;
67 inode->i_uid = di->di_uid;
68 inode->i_gid = di->di_gid;
69 i_size_write(inode, di->di_size);
70 inode->i_atime.tv_sec = di->di_atime;
71 inode->i_mtime.tv_sec = di->di_mtime;
72 inode->i_ctime.tv_sec = di->di_ctime;
73 inode->i_atime.tv_nsec = 0;
74 inode->i_mtime.tv_nsec = 0;
75 inode->i_ctime.tv_nsec = 0;
76 inode->i_blocks = di->di_blocks <<
77 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
78
79 if (di->di_flags & GFS2_DIF_IMMUTABLE)
80 inode->i_flags |= S_IMMUTABLE;
81 else
82 inode->i_flags &= ~S_IMMUTABLE;
83
84 if (di->di_flags & GFS2_DIF_APPENDONLY)
85 inode->i_flags |= S_APPEND;
86 else
87 inode->i_flags &= ~S_APPEND;
88}
89
90/**
91 * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
92 * @ip: The GFS2 inode
93 *
94 * Only copy out the attributes that we want the VFS layer
95 * to be able to modify.
96 */
97
98void gfs2_inode_attr_out(struct gfs2_inode *ip)
99{
100 struct inode *inode = &ip->i_inode;
101 struct gfs2_dinode *di = &ip->i_di;
102 gfs2_assert_withdraw(GFS2_SB(inode),
103 (di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
104 di->di_mode = inode->i_mode;
105 di->di_uid = inode->i_uid;
106 di->di_gid = inode->i_gid;
107 di->di_atime = inode->i_atime.tv_sec;
108 di->di_mtime = inode->i_mtime.tv_sec;
109 di->di_ctime = inode->i_ctime.tv_sec;
110}
111
112static int iget_test(struct inode *inode, void *opaque)
113{
114 struct gfs2_inode *ip = GFS2_I(inode);
115 struct gfs2_inum *inum = opaque;
116
117 if (ip && ip->i_num.no_addr == inum->no_addr)
118 return 1;
119
120 return 0;
121}
122
123static int iget_set(struct inode *inode, void *opaque)
124{
125 struct gfs2_inode *ip = GFS2_I(inode);
126 struct gfs2_inum *inum = opaque;
127
128 ip->i_num = *inum;
129 return 0;
130}
131
132struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum)
133{
134 return ilookup5(sb, (unsigned long)inum->no_formal_ino,
135 iget_test, inum);
136}
137
138static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
139{
140 return iget5_locked(sb, (unsigned long)inum->no_formal_ino,
141 iget_test, iget_set, inum);
142}
143
144/**
145 * gfs2_inode_lookup - Lookup an inode
146 * @sb: The super block
147 * @inum: The inode number
148 * @type: The type of the inode
149 *
150 * Returns: A VFS inode, or an error
151 */
152
153struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned int type)
154{
155 struct inode *inode = gfs2_iget(sb, inum);
156 struct gfs2_inode *ip = GFS2_I(inode);
157 struct gfs2_glock *io_gl;
158 int error;
159
160 if (inode->i_state & I_NEW) {
161 struct gfs2_sbd *sdp = GFS2_SB(inode);
162 umode_t mode = DT2IF(type);
163 inode->i_private = ip;
164 inode->i_mode = mode;
165
166 if (S_ISREG(mode)) {
167 inode->i_op = &gfs2_file_iops;
168 inode->i_fop = &gfs2_file_fops;
169 inode->i_mapping->a_ops = &gfs2_file_aops;
170 } else if (S_ISDIR(mode)) {
171 inode->i_op = &gfs2_dir_iops;
172 inode->i_fop = &gfs2_dir_fops;
173 } else if (S_ISLNK(mode)) {
174 inode->i_op = &gfs2_symlink_iops;
175 } else {
176 inode->i_op = &gfs2_dev_iops;
177 }
178
179 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
180 if (unlikely(error))
181 goto fail;
182 ip->i_gl->gl_object = ip;
183
184 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
185 if (unlikely(error))
186 goto fail_put;
187
188 ip->i_vn = ip->i_gl->gl_vn - 1;
189 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
190 if (unlikely(error))
191 goto fail_iopen;
192
193 gfs2_glock_put(io_gl);
194 unlock_new_inode(inode);
195 }
196
197 return inode;
198fail_iopen:
199 gfs2_glock_put(io_gl);
200fail_put:
201 ip->i_gl->gl_object = NULL;
202 gfs2_glock_put(ip->i_gl);
203fail:
204 iput(inode);
205 return ERR_PTR(error);
206}
207
208/**
209 * gfs2_inode_refresh - Refresh the incore copy of the dinode
210 * @ip: The GFS2 inode
211 *
212 * Returns: errno
213 */
214
215int gfs2_inode_refresh(struct gfs2_inode *ip)
216{
217 struct buffer_head *dibh;
218 int error;
219
220 error = gfs2_meta_inode_buffer(ip, &dibh);
221 if (error)
222 return error;
223
224 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
225 brelse(dibh);
226 return -EIO;
227 }
228
229 gfs2_dinode_in(&ip->i_di, dibh->b_data);
230
231 brelse(dibh);
232
233 if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
234 if (gfs2_consist_inode(ip))
235 gfs2_dinode_print(&ip->i_di);
236 return -EIO;
237 }
238 if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
239 return -ESTALE;
240
241 ip->i_vn = ip->i_gl->gl_vn;
242
243 return 0;
244}
245
246int gfs2_dinode_dealloc(struct gfs2_inode *ip)
247{
248 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
249 struct gfs2_alloc *al;
250 struct gfs2_rgrpd *rgd;
251 int error;
252
253 if (ip->i_di.di_blocks != 1) {
254 if (gfs2_consist_inode(ip))
255 gfs2_dinode_print(&ip->i_di);
256 return -EIO;
257 }
258
259 al = gfs2_alloc_get(ip);
260
261 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
262 if (error)
263 goto out;
264
265 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
266 if (error)
267 goto out_qs;
268
269 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
270 if (!rgd) {
271 gfs2_consist_inode(ip);
272 error = -EIO;
273 goto out_rindex_relse;
274 }
275
276 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
277 &al->al_rgd_gh);
278 if (error)
279 goto out_rindex_relse;
280
281 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
282 if (error)
283 goto out_rg_gunlock;
284
285 gfs2_trans_add_gl(ip->i_gl);
286
287 gfs2_free_di(rgd, ip);
288
289 gfs2_trans_end(sdp);
290 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
291
292out_rg_gunlock:
293 gfs2_glock_dq_uninit(&al->al_rgd_gh);
294out_rindex_relse:
295 gfs2_glock_dq_uninit(&al->al_ri_gh);
296out_qs:
297 gfs2_quota_unhold(ip);
298out:
299 gfs2_alloc_put(ip);
300 return error;
301}
302
303/**
304 * gfs2_change_nlink - Change nlink count on inode
305 * @ip: The GFS2 inode
306 * @diff: The change in the nlink count required
307 *
308 * Returns: errno
309 */
310
311int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
312{
313 struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info;
314 struct buffer_head *dibh;
315 u32 nlink;
316 int error;
317
318 BUG_ON(ip->i_di.di_nlink != ip->i_inode.i_nlink);
319 nlink = ip->i_di.di_nlink + diff;
320
321 /* If we are reducing the nlink count, but the new value ends up being
322 bigger than the old one, we must have underflowed. */
323 if (diff < 0 && nlink > ip->i_di.di_nlink) {
324 if (gfs2_consist_inode(ip))
325 gfs2_dinode_print(&ip->i_di);
326 return -EIO;
327 }
328
329 error = gfs2_meta_inode_buffer(ip, &dibh);
330 if (error)
331 return error;
332
333 ip->i_di.di_nlink = nlink;
334 ip->i_di.di_ctime = get_seconds();
335 ip->i_inode.i_nlink = nlink;
336
337 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
338 gfs2_dinode_out(&ip->i_di, dibh->b_data);
339 brelse(dibh);
340 mark_inode_dirty(&ip->i_inode);
341
342 if (ip->i_di.di_nlink == 0) {
343 struct gfs2_rgrpd *rgd;
344 struct gfs2_holder ri_gh, rg_gh;
345
346 error = gfs2_rindex_hold(sdp, &ri_gh);
347 if (error)
348 goto out;
349 error = -EIO;
350 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
351 if (!rgd)
352 goto out_norgrp;
353 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
354 if (error)
355 goto out_norgrp;
356
357 clear_nlink(&ip->i_inode);
358 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
359 gfs2_glock_dq_uninit(&rg_gh);
360out_norgrp:
361 gfs2_glock_dq_uninit(&ri_gh);
362 }
363out:
364 return error;
365}
366
367struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
368{
369 struct qstr qstr;
370 gfs2_str2qstr(&qstr, name);
371 return gfs2_lookupi(dip, &qstr, 1, NULL);
372}
373
374
375/**
376 * gfs2_lookupi - Look up a filename in a directory and return its inode
377 * @d_gh: An initialized holder for the directory glock
378 * @name: The name of the inode to look for
379 * @is_root: If 1, ignore the caller's permissions
380 * @i_gh: An uninitialized holder for the new inode glock
381 *
382 * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
383 * @is_root is true.
384 *
385 * Returns: errno
386 */
387
388struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
389 int is_root, struct nameidata *nd)
390{
391 struct super_block *sb = dir->i_sb;
392 struct gfs2_inode *dip = GFS2_I(dir);
393 struct gfs2_holder d_gh;
394 struct gfs2_inum inum;
395 unsigned int type;
396 int error = 0;
397 struct inode *inode = NULL;
398
399 if (!name->len || name->len > GFS2_FNAMESIZE)
400 return ERR_PTR(-ENAMETOOLONG);
401
402 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) ||
403 (name->len == 2 && memcmp(name->name, "..", 2) == 0 &&
404 dir == sb->s_root->d_inode)) {
405 igrab(dir);
406 return dir;
407 }
408
409 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
410 if (error)
411 return ERR_PTR(error);
412
413 if (!is_root) {
414 error = permission(dir, MAY_EXEC, NULL);
415 if (error)
416 goto out;
417 }
418
419 error = gfs2_dir_search(dir, name, &inum, &type);
420 if (error)
421 goto out;
422
423 inode = gfs2_inode_lookup(sb, &inum, type);
424
425out:
426 gfs2_glock_dq_uninit(&d_gh);
427 if (error == -ENOENT)
428 return NULL;
429 return inode;
430}
431
432static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
433{
434 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
435 struct buffer_head *bh;
436 struct gfs2_inum_range ir;
437 int error;
438
439 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
440 if (error)
441 return error;
442 mutex_lock(&sdp->sd_inum_mutex);
443
444 error = gfs2_meta_inode_buffer(ip, &bh);
445 if (error) {
446 mutex_unlock(&sdp->sd_inum_mutex);
447 gfs2_trans_end(sdp);
448 return error;
449 }
450
451 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
452
453 if (ir.ir_length) {
454 *formal_ino = ir.ir_start++;
455 ir.ir_length--;
456 gfs2_trans_add_bh(ip->i_gl, bh, 1);
457 gfs2_inum_range_out(&ir,
458 bh->b_data + sizeof(struct gfs2_dinode));
459 brelse(bh);
460 mutex_unlock(&sdp->sd_inum_mutex);
461 gfs2_trans_end(sdp);
462 return 0;
463 }
464
465 brelse(bh);
466
467 mutex_unlock(&sdp->sd_inum_mutex);
468 gfs2_trans_end(sdp);
469
470 return 1;
471}
472
473static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
474{
475 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
476 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode);
477 struct gfs2_holder gh;
478 struct buffer_head *bh;
479 struct gfs2_inum_range ir;
480 int error;
481
482 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
483 if (error)
484 return error;
485
486 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
487 if (error)
488 goto out;
489 mutex_lock(&sdp->sd_inum_mutex);
490
491 error = gfs2_meta_inode_buffer(ip, &bh);
492 if (error)
493 goto out_end_trans;
494
495 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
496
497 if (!ir.ir_length) {
498 struct buffer_head *m_bh;
499 u64 x, y;
500
501 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
502 if (error)
503 goto out_brelse;
504
505 x = *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
506 x = y = be64_to_cpu(x);
507 ir.ir_start = x;
508 ir.ir_length = GFS2_INUM_QUANTUM;
509 x += GFS2_INUM_QUANTUM;
510 if (x < y)
511 gfs2_consist_inode(m_ip);
512 x = cpu_to_be64(x);
513 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
514 *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
515
516 brelse(m_bh);
517 }
518
519 *formal_ino = ir.ir_start++;
520 ir.ir_length--;
521
522 gfs2_trans_add_bh(ip->i_gl, bh, 1);
523 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
524
525out_brelse:
526 brelse(bh);
527out_end_trans:
528 mutex_unlock(&sdp->sd_inum_mutex);
529 gfs2_trans_end(sdp);
530out:
531 gfs2_glock_dq_uninit(&gh);
532 return error;
533}
534
535static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum)
536{
537 int error;
538
539 error = pick_formal_ino_1(sdp, inum);
540 if (error <= 0)
541 return error;
542
543 error = pick_formal_ino_2(sdp, inum);
544
545 return error;
546}
547
548/**
549 * create_ok - OK to create a new on-disk inode here?
550 * @dip: Directory in which dinode is to be created
551 * @name: Name of new dinode
552 * @mode:
553 *
554 * Returns: errno
555 */
556
557static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
558 unsigned int mode)
559{
560 int error;
561
562 error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL);
563 if (error)
564 return error;
565
566 /* Don't create entries in an unlinked directory */
567 if (!dip->i_di.di_nlink)
568 return -EPERM;
569
570 error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
571 switch (error) {
572 case -ENOENT:
573 error = 0;
574 break;
575 case 0:
576 return -EEXIST;
577 default:
578 return error;
579 }
580
581 if (dip->i_di.di_entries == (u32)-1)
582 return -EFBIG;
583 if (S_ISDIR(mode) && dip->i_di.di_nlink == (u32)-1)
584 return -EMLINK;
585
586 return 0;
587}
588
589static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
590 unsigned int *uid, unsigned int *gid)
591{
592 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
593 (dip->i_di.di_mode & S_ISUID) && dip->i_di.di_uid) {
594 if (S_ISDIR(*mode))
595 *mode |= S_ISUID;
596 else if (dip->i_di.di_uid != current->fsuid)
597 *mode &= ~07111;
598 *uid = dip->i_di.di_uid;
599 } else
600 *uid = current->fsuid;
601
602 if (dip->i_di.di_mode & S_ISGID) {
603 if (S_ISDIR(*mode))
604 *mode |= S_ISGID;
605 *gid = dip->i_di.di_gid;
606 } else
607 *gid = current->fsgid;
608}
609
610static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum *inum,
611 u64 *generation)
612{
613 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
614 int error;
615
616 gfs2_alloc_get(dip);
617
618 dip->i_alloc.al_requested = RES_DINODE;
619 error = gfs2_inplace_reserve(dip);
620 if (error)
621 goto out;
622
623 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0);
624 if (error)
625 goto out_ipreserv;
626
627 inum->no_addr = gfs2_alloc_di(dip, generation);
628
629 gfs2_trans_end(sdp);
630
631out_ipreserv:
632 gfs2_inplace_release(dip);
633out:
634 gfs2_alloc_put(dip);
635 return error;
636}
637
638/**
639 * init_dinode - Fill in a new dinode structure
640 * @dip: the directory this inode is being created in
641 * @gl: The glock covering the new inode
642 * @inum: the inode number
643 * @mode: the file permissions
644 * @uid:
645 * @gid:
646 *
647 */
648
649static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
650 const struct gfs2_inum *inum, unsigned int mode,
651 unsigned int uid, unsigned int gid,
652 const u64 *generation)
653{
654 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
655 struct gfs2_dinode *di;
656 struct buffer_head *dibh;
657
658 dibh = gfs2_meta_new(gl, inum->no_addr);
659 gfs2_trans_add_bh(gl, dibh, 1);
660 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
661 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
662 di = (struct gfs2_dinode *)dibh->b_data;
663
664 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino);
665 di->di_num.no_addr = cpu_to_be64(inum->no_addr);
666 di->di_mode = cpu_to_be32(mode);
667 di->di_uid = cpu_to_be32(uid);
668 di->di_gid = cpu_to_be32(gid);
669 di->di_nlink = cpu_to_be32(0);
670 di->di_size = cpu_to_be64(0);
671 di->di_blocks = cpu_to_be64(1);
672 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
673 di->di_major = di->di_minor = cpu_to_be32(0);
674 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
675 di->di_generation = cpu_to_be64(*generation);
676 di->di_flags = cpu_to_be32(0);
677
678 if (S_ISREG(mode)) {
679 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
680 gfs2_tune_get(sdp, gt_new_files_jdata))
681 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
682 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
683 gfs2_tune_get(sdp, gt_new_files_directio))
684 di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO);
685 } else if (S_ISDIR(mode)) {
686 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
687 GFS2_DIF_INHERIT_DIRECTIO);
688 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
689 GFS2_DIF_INHERIT_JDATA);
690 }
691
692 di->__pad1 = 0;
693 di->di_payload_format = cpu_to_be32(0);
694 di->di_height = cpu_to_be32(0);
695 di->__pad2 = 0;
696 di->__pad3 = 0;
697 di->di_depth = cpu_to_be16(0);
698 di->di_entries = cpu_to_be32(0);
699 memset(&di->__pad4, 0, sizeof(di->__pad4));
700 di->di_eattr = cpu_to_be64(0);
701 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
702
703 brelse(dibh);
704}
705
706static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
707 unsigned int mode, const struct gfs2_inum *inum,
708 const u64 *generation)
709{
710 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
711 unsigned int uid, gid;
712 int error;
713
714 munge_mode_uid_gid(dip, &mode, &uid, &gid);
715 gfs2_alloc_get(dip);
716
717 error = gfs2_quota_lock(dip, uid, gid);
718 if (error)
719 goto out;
720
721 error = gfs2_quota_check(dip, uid, gid);
722 if (error)
723 goto out_quota;
724
725 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0);
726 if (error)
727 goto out_quota;
728
729 init_dinode(dip, gl, inum, mode, uid, gid, generation);
730 gfs2_quota_change(dip, +1, uid, gid);
731 gfs2_trans_end(sdp);
732
733out_quota:
734 gfs2_quota_unlock(dip);
735out:
736 gfs2_alloc_put(dip);
737 return error;
738}
739
740static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
741 struct gfs2_inode *ip)
742{
743 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
744 struct gfs2_alloc *al;
745 int alloc_required;
746 struct buffer_head *dibh;
747 int error;
748
749 al = gfs2_alloc_get(dip);
750
751 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
752 if (error)
753 goto fail;
754
755 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name);
756 if (alloc_required < 0)
757 goto fail;
758 if (alloc_required) {
759 error = gfs2_quota_check(dip, dip->i_di.di_uid,
760 dip->i_di.di_gid);
761 if (error)
762 goto fail_quota_locks;
763
764 al->al_requested = sdp->sd_max_dirres;
765
766 error = gfs2_inplace_reserve(dip);
767 if (error)
768 goto fail_quota_locks;
769
770 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
771 al->al_rgd->rd_ri.ri_length +
772 2 * RES_DINODE +
773 RES_STATFS + RES_QUOTA, 0);
774 if (error)
775 goto fail_ipreserv;
776 } else {
777 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0);
778 if (error)
779 goto fail_quota_locks;
780 }
781
782 error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
783 if (error)
784 goto fail_end_trans;
785
786 error = gfs2_meta_inode_buffer(ip, &dibh);
787 if (error)
788 goto fail_end_trans;
789 ip->i_di.di_nlink = 1;
790 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
791 gfs2_dinode_out(&ip->i_di, dibh->b_data);
792 brelse(dibh);
793 return 0;
794
795fail_end_trans:
796 gfs2_trans_end(sdp);
797
798fail_ipreserv:
799 if (dip->i_alloc.al_rgd)
800 gfs2_inplace_release(dip);
801
802fail_quota_locks:
803 gfs2_quota_unlock(dip);
804
805fail:
806 gfs2_alloc_put(dip);
807 return error;
808}
809
810static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
811{
812 int err;
813 size_t len;
814 void *value;
815 char *name;
816 struct gfs2_ea_request er;
817
818 err = security_inode_init_security(&ip->i_inode, &dip->i_inode,
819 &name, &value, &len);
820
821 if (err) {
822 if (err == -EOPNOTSUPP)
823 return 0;
824 return err;
825 }
826
827 memset(&er, 0, sizeof(struct gfs2_ea_request));
828
829 er.er_type = GFS2_EATYPE_SECURITY;
830 er.er_name = name;
831 er.er_data = value;
832 er.er_name_len = strlen(name);
833 er.er_data_len = len;
834
835 err = gfs2_ea_set_i(ip, &er);
836
837 kfree(value);
838 kfree(name);
839
840 return err;
841}
842
843/**
844 * gfs2_createi - Create a new inode
845 * @ghs: An array of two holders
846 * @name: The name of the new file
847 * @mode: the permissions on the new inode
848 *
849 * @ghs[0] is an initialized holder for the directory
850 * @ghs[1] is the holder for the inode lock
851 *
852 * If the return value is not NULL, the glocks on both the directory and the new
853 * file are held. A transaction has been started and an inplace reservation
854 * is held, as well.
855 *
856 * Returns: An inode
857 */
858
859struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
860 unsigned int mode)
861{
862 struct inode *inode;
863 struct gfs2_inode *dip = ghs->gh_gl->gl_object;
864 struct inode *dir = &dip->i_inode;
865 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
866 struct gfs2_inum inum;
867 int error;
868 u64 generation;
869
870 if (!name->len || name->len > GFS2_FNAMESIZE)
871 return ERR_PTR(-ENAMETOOLONG);
872
873 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
874 error = gfs2_glock_nq(ghs);
875 if (error)
876 goto fail;
877
878 error = create_ok(dip, name, mode);
879 if (error)
880 goto fail_gunlock;
881
882 error = pick_formal_ino(sdp, &inum.no_formal_ino);
883 if (error)
884 goto fail_gunlock;
885
886 error = alloc_dinode(dip, &inum, &generation);
887 if (error)
888 goto fail_gunlock;
889
890 if (inum.no_addr < dip->i_num.no_addr) {
891 gfs2_glock_dq(ghs);
892
893 error = gfs2_glock_nq_num(sdp, inum.no_addr,
894 &gfs2_inode_glops, LM_ST_EXCLUSIVE,
895 GL_SKIP, ghs + 1);
896 if (error) {
897 return ERR_PTR(error);
898 }
899
900 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
901 error = gfs2_glock_nq(ghs);
902 if (error) {
903 gfs2_glock_dq_uninit(ghs + 1);
904 return ERR_PTR(error);
905 }
906
907 error = create_ok(dip, name, mode);
908 if (error)
909 goto fail_gunlock2;
910 } else {
911 error = gfs2_glock_nq_num(sdp, inum.no_addr,
912 &gfs2_inode_glops, LM_ST_EXCLUSIVE,
913 GL_SKIP, ghs + 1);
914 if (error)
915 goto fail_gunlock;
916 }
917
918 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation);
919 if (error)
920 goto fail_gunlock2;
921
922 inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode));
923 if (IS_ERR(inode))
924 goto fail_gunlock2;
925
926 error = gfs2_inode_refresh(GFS2_I(inode));
927 if (error)
928 goto fail_iput;
929
930 error = gfs2_acl_create(dip, GFS2_I(inode));
931 if (error)
932 goto fail_iput;
933
934 error = gfs2_security_init(dip, GFS2_I(inode));
935 if (error)
936 goto fail_iput;
937
938 error = link_dinode(dip, name, GFS2_I(inode));
939 if (error)
940 goto fail_iput;
941
942 if (!inode)
943 return ERR_PTR(-ENOMEM);
944 return inode;
945
946fail_iput:
947 iput(inode);
948fail_gunlock2:
949 gfs2_glock_dq_uninit(ghs + 1);
950fail_gunlock:
951 gfs2_glock_dq(ghs);
952fail:
953 return ERR_PTR(error);
954}
955
956/**
957 * gfs2_rmdiri - Remove a directory
958 * @dip: The parent directory of the directory to be removed
959 * @name: The name of the directory to be removed
960 * @ip: The GFS2 inode of the directory to be removed
961 *
962 * Assumes Glocks on dip and ip are held
963 *
964 * Returns: errno
965 */
966
967int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
968 struct gfs2_inode *ip)
969{
970 struct qstr dotname;
971 int error;
972
973 if (ip->i_di.di_entries != 2) {
974 if (gfs2_consist_inode(ip))
975 gfs2_dinode_print(&ip->i_di);
976 return -EIO;
977 }
978
979 error = gfs2_dir_del(dip, name);
980 if (error)
981 return error;
982
983 error = gfs2_change_nlink(dip, -1);
984 if (error)
985 return error;
986
987 gfs2_str2qstr(&dotname, ".");
988 error = gfs2_dir_del(ip, &dotname);
989 if (error)
990 return error;
991
992 gfs2_str2qstr(&dotname, "..");
993 error = gfs2_dir_del(ip, &dotname);
994 if (error)
995 return error;
996
997 error = gfs2_change_nlink(ip, -2);
998 if (error)
999 return error;
1000
1001 return error;
1002}
1003
1004/*
1005 * gfs2_unlink_ok - check to see that a inode is still in a directory
1006 * @dip: the directory
1007 * @name: the name of the file
1008 * @ip: the inode
1009 *
1010 * Assumes that the lock on (at least) @dip is held.
1011 *
1012 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
1013 */
1014
1015int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
1016 struct gfs2_inode *ip)
1017{
1018 struct gfs2_inum inum;
1019 unsigned int type;
1020 int error;
1021
1022 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
1023 return -EPERM;
1024
1025 if ((dip->i_di.di_mode & S_ISVTX) &&
1026 dip->i_di.di_uid != current->fsuid &&
1027 ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
1028 return -EPERM;
1029
1030 if (IS_APPEND(&dip->i_inode))
1031 return -EPERM;
1032
1033 error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL);
1034 if (error)
1035 return error;
1036
1037 error = gfs2_dir_search(&dip->i_inode, name, &inum, &type);
1038 if (error)
1039 return error;
1040
1041 if (!gfs2_inum_equal(&inum, &ip->i_num))
1042 return -ENOENT;
1043
1044 if (IF2DT(ip->i_di.di_mode) != type) {
1045 gfs2_consist_inode(dip);
1046 return -EIO;
1047 }
1048
1049 return 0;
1050}
1051
1052/*
1053 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1054 * @this: move this
1055 * @to: to here
1056 *
1057 * Follow @to back to the root and make sure we don't encounter @this
1058 * Assumes we already hold the rename lock.
1059 *
1060 * Returns: errno
1061 */
1062
1063int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1064{
1065 struct inode *dir = &to->i_inode;
1066 struct super_block *sb = dir->i_sb;
1067 struct inode *tmp;
1068 struct qstr dotdot;
1069 int error = 0;
1070
1071 gfs2_str2qstr(&dotdot, "..");
1072
1073 igrab(dir);
1074
1075 for (;;) {
1076 if (dir == &this->i_inode) {
1077 error = -EINVAL;
1078 break;
1079 }
1080 if (dir == sb->s_root->d_inode) {
1081 error = 0;
1082 break;
1083 }
1084
1085 tmp = gfs2_lookupi(dir, &dotdot, 1, NULL);
1086 if (IS_ERR(tmp)) {
1087 error = PTR_ERR(tmp);
1088 break;
1089 }
1090
1091 iput(dir);
1092 dir = tmp;
1093 }
1094
1095 iput(dir);
1096
1097 return error;
1098}
1099
1100/**
1101 * gfs2_readlinki - return the contents of a symlink
1102 * @ip: the symlink's inode
1103 * @buf: a pointer to the buffer to be filled
1104 * @len: a pointer to the length of @buf
1105 *
1106 * If @buf is too small, a piece of memory is kmalloc()ed and needs
1107 * to be freed by the caller.
1108 *
1109 * Returns: errno
1110 */
1111
1112int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1113{
1114 struct gfs2_holder i_gh;
1115 struct buffer_head *dibh;
1116 unsigned int x;
1117 int error;
1118
1119 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
1120 error = gfs2_glock_nq_atime(&i_gh);
1121 if (error) {
1122 gfs2_holder_uninit(&i_gh);
1123 return error;
1124 }
1125
1126 if (!ip->i_di.di_size) {
1127 gfs2_consist_inode(ip);
1128 error = -EIO;
1129 goto out;
1130 }
1131
1132 error = gfs2_meta_inode_buffer(ip, &dibh);
1133 if (error)
1134 goto out;
1135
1136 x = ip->i_di.di_size + 1;
1137 if (x > *len) {
1138 *buf = kmalloc(x, GFP_KERNEL);
1139 if (!*buf) {
1140 error = -ENOMEM;
1141 goto out_brelse;
1142 }
1143 }
1144
1145 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1146 *len = x;
1147
1148out_brelse:
1149 brelse(dibh);
1150out:
1151 gfs2_glock_dq_uninit(&i_gh);
1152 return error;
1153}
1154
1155/**
1156 * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
1157 * conditionally update the inode's atime
1158 * @gh: the holder to acquire
1159 *
1160 * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
1161 * Update if the difference between the current time and the inode's current
1162 * atime is greater than an interval specified at mount.
1163 *
1164 * Returns: errno
1165 */
1166
1167int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1168{
1169 struct gfs2_glock *gl = gh->gh_gl;
1170 struct gfs2_sbd *sdp = gl->gl_sbd;
1171 struct gfs2_inode *ip = gl->gl_object;
1172 s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1173 unsigned int state;
1174 int flags;
1175 int error;
1176
1177 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1178 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
1179 gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
1180 return -EINVAL;
1181
1182 state = gh->gh_state;
1183 flags = gh->gh_flags;
1184
1185 error = gfs2_glock_nq(gh);
1186 if (error)
1187 return error;
1188
1189 if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
1190 (sdp->sd_vfs->s_flags & MS_RDONLY))
1191 return 0;
1192
1193 curtime = get_seconds();
1194 if (curtime - ip->i_di.di_atime >= quantum) {
1195 gfs2_glock_dq(gh);
1196 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
1197 gh);
1198 error = gfs2_glock_nq(gh);
1199 if (error)
1200 return error;
1201
1202 /* Verify that atime hasn't been updated while we were
1203 trying to get exclusive lock. */
1204
1205 curtime = get_seconds();
1206 if (curtime - ip->i_di.di_atime >= quantum) {
1207 struct buffer_head *dibh;
1208 struct gfs2_dinode *di;
1209
1210 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1211 if (error == -EROFS)
1212 return 0;
1213 if (error)
1214 goto fail;
1215
1216 error = gfs2_meta_inode_buffer(ip, &dibh);
1217 if (error)
1218 goto fail_end_trans;
1219
1220 ip->i_di.di_atime = curtime;
1221
1222 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1223 di = (struct gfs2_dinode *)dibh->b_data;
1224 di->di_atime = cpu_to_be64(ip->i_di.di_atime);
1225 brelse(dibh);
1226
1227 gfs2_trans_end(sdp);
1228 }
1229
1230 /* If someone else has asked for the glock,
1231 unlock and let them have it. Then reacquire
1232 in the original state. */
1233 if (gfs2_glock_is_blocking(gl)) {
1234 gfs2_glock_dq(gh);
1235 gfs2_holder_reinit(state, flags, gh);
1236 return gfs2_glock_nq(gh);
1237 }
1238 }
1239
1240 return 0;
1241
1242fail_end_trans:
1243 gfs2_trans_end(sdp);
1244fail:
1245 gfs2_glock_dq(gh);
1246 return error;
1247}
1248
1249/**
1250 * glock_compare_atime - Compare two struct gfs2_glock structures for sort
1251 * @arg_a: the first structure
1252 * @arg_b: the second structure
1253 *
1254 * Returns: 1 if A > B
1255 * -1 if A < B
1256 * 0 if A == B
1257 */
1258
1259static int glock_compare_atime(const void *arg_a, const void *arg_b)
1260{
1261 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1262 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1263 const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1264 const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1265
1266 if (a->ln_number > b->ln_number)
1267 return 1;
1268 if (a->ln_number < b->ln_number)
1269 return -1;
1270 if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
1271 return 1;
1272 if (gh_a->gh_state == LM_ST_SHARED && (gh_b->gh_flags & GL_ATIME))
1273 return 1;
1274
1275 return 0;
1276}
1277
1278/**
1279 * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
1280 * atime update
1281 * @num_gh: the number of structures
1282 * @ghs: an array of struct gfs2_holder structures
1283 *
1284 * Returns: 0 on success (all glocks acquired),
1285 * errno on failure (no glocks acquired)
1286 */
1287
1288int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
1289{
1290 struct gfs2_holder **p;
1291 unsigned int x;
1292 int error = 0;
1293
1294 if (!num_gh)
1295 return 0;
1296
1297 if (num_gh == 1) {
1298 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1299 if (ghs->gh_flags & GL_ATIME)
1300 error = gfs2_glock_nq_atime(ghs);
1301 else
1302 error = gfs2_glock_nq(ghs);
1303 return error;
1304 }
1305
1306 p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1307 if (!p)
1308 return -ENOMEM;
1309
1310 for (x = 0; x < num_gh; x++)
1311 p[x] = &ghs[x];
1312
1313 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
1314
1315 for (x = 0; x < num_gh; x++) {
1316 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1317
1318 if (p[x]->gh_flags & GL_ATIME)
1319 error = gfs2_glock_nq_atime(p[x]);
1320 else
1321 error = gfs2_glock_nq(p[x]);
1322
1323 if (error) {
1324 while (x--)
1325 gfs2_glock_dq(p[x]);
1326 break;
1327 }
1328 }
1329
1330 kfree(p);
1331 return error;
1332}
1333
1334
1335static int
1336__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1337{
1338 struct buffer_head *dibh;
1339 int error;
1340
1341 error = gfs2_meta_inode_buffer(ip, &dibh);
1342 if (!error) {
1343 error = inode_setattr(&ip->i_inode, attr);
1344 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
1345 gfs2_inode_attr_out(ip);
1346
1347 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1348 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1349 brelse(dibh);
1350 }
1351 return error;
1352}
1353
1354/**
1355 * gfs2_setattr_simple -
1356 * @ip:
1357 * @attr:
1358 *
1359 * Called with a reference on the vnode.
1360 *
1361 * Returns: errno
1362 */
1363
1364int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1365{
1366 int error;
1367
1368 if (current->journal_info)
1369 return __gfs2_setattr_simple(ip, attr);
1370
1371 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0);
1372 if (error)
1373 return error;
1374
1375 error = __gfs2_setattr_simple(ip, attr);
1376 gfs2_trans_end(GFS2_SB(&ip->i_inode));
1377 return error;
1378}
1379
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
new file mode 100644
index 000000000000..f5d861760579
--- /dev/null
+++ b/fs/gfs2/inode.h
@@ -0,0 +1,56 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__
12
13static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
14{
15 return !ip->i_di.di_height;
16}
17
18static inline int gfs2_is_jdata(struct gfs2_inode *ip)
19{
20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21}
22
23static inline int gfs2_is_dir(struct gfs2_inode *ip)
24{
25 return S_ISDIR(ip->i_di.di_mode);
26}
27
28void gfs2_inode_attr_in(struct gfs2_inode *ip);
29void gfs2_inode_attr_out(struct gfs2_inode *ip);
30struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned type);
31struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum);
32
33int gfs2_inode_refresh(struct gfs2_inode *ip);
34
35int gfs2_dinode_dealloc(struct gfs2_inode *inode);
36int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
37struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
38 int is_root, struct nameidata *nd);
39struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
40 unsigned int mode);
41int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
42 struct gfs2_inode *ip);
43int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
44 struct gfs2_inode *ip);
45int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
46int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
47
48int gfs2_glock_nq_atime(struct gfs2_holder *gh);
49int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
50
51int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
52
53struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
54
55#endif /* __INODE_DOT_H__ */
56
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
new file mode 100644
index 000000000000..effe4a337c1d
--- /dev/null
+++ b/fs/gfs2/lm.c
@@ -0,0 +1,217 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "glock.h"
22#include "lm.h"
23#include "super.h"
24#include "util.h"
25
26/**
27 * gfs2_lm_mount - mount a locking protocol
28 * @sdp: the filesystem
29 * @args: mount arguements
30 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
31 *
32 * Returns: errno
33 */
34
35int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
36{
37 char *proto = sdp->sd_proto_name;
38 char *table = sdp->sd_table_name;
39 int flags = 0;
40 int error;
41
42 if (sdp->sd_args.ar_spectator)
43 flags |= LM_MFLAG_SPECTATOR;
44
45 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
46
47 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
48 gfs2_glock_cb, sdp,
49 GFS2_MIN_LVB_SIZE, flags,
50 &sdp->sd_lockstruct, &sdp->sd_kobj);
51 if (error) {
52 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
53 proto, table, sdp->sd_args.ar_hostdata);
54 goto out;
55 }
56
57 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
58 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
59 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
60 GFS2_MIN_LVB_SIZE)) {
61 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
62 goto out;
63 }
64
65 if (sdp->sd_args.ar_spectator)
66 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
67 else
68 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
69 sdp->sd_lockstruct.ls_jid);
70
71 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
72
73 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
74 !sdp->sd_args.ar_ignore_local_fs) {
75 sdp->sd_args.ar_localflocks = 1;
76 sdp->sd_args.ar_localcaching = 1;
77 }
78
79out:
80 return error;
81}
82
83void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
84{
85 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
86 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
87 sdp->sd_lockstruct.ls_lockspace);
88}
89
90void gfs2_lm_unmount(struct gfs2_sbd *sdp)
91{
92 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
93 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
94}
95
96int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
97{
98 va_list args;
99
100 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
101 return 0;
102
103 va_start(args, fmt);
104 vprintk(fmt, args);
105 va_end(args);
106
107 fs_err(sdp, "about to withdraw from the cluster\n");
108 BUG_ON(sdp->sd_args.ar_debug);
109
110
111 fs_err(sdp, "waiting for outstanding I/O\n");
112
113 /* FIXME: suspend dm device so oustanding bio's complete
114 and all further io requests fail */
115
116 fs_err(sdp, "telling LM to withdraw\n");
117 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
118 fs_err(sdp, "withdrawn\n");
119 dump_stack();
120
121 return -1;
122}
123
124int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
125 void **lockp)
126{
127 int error = -EIO;
128 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
129 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
130 sdp->sd_lockstruct.ls_lockspace, name, lockp);
131 return error;
132}
133
134void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock)
135{
136 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
137 sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
138}
139
140unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
141 unsigned int cur_state, unsigned int req_state,
142 unsigned int flags)
143{
144 int ret = 0;
145 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
146 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
147 req_state, flags);
148 return ret;
149}
150
151unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
152 unsigned int cur_state)
153{
154 int ret = 0;
155 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
156 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
157 return ret;
158}
159
160void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock)
161{
162 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
163 sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
164}
165
166int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
167{
168 int error = -EIO;
169 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
170 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
171 return error;
172}
173
174void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb)
175{
176 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
177 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
178}
179
180int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
181 struct file *file, struct file_lock *fl)
182{
183 int error = -EIO;
184 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
185 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
186 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
187 return error;
188}
189
190int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
191 struct file *file, int cmd, struct file_lock *fl)
192{
193 int error = -EIO;
194 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
195 error = sdp->sd_lockstruct.ls_ops->lm_plock(
196 sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
197 return error;
198}
199
200int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
201 struct file *file, struct file_lock *fl)
202{
203 int error = -EIO;
204 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
205 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
206 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
207 return error;
208}
209
210void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
211 unsigned int message)
212{
213 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
214 sdp->sd_lockstruct.ls_ops->lm_recovery_done(
215 sdp->sd_lockstruct.ls_lockspace, jid, message);
216}
217
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h
new file mode 100644
index 000000000000..21cdc30ee08c
--- /dev/null
+++ b/fs/gfs2/lm.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __LM_DOT_H__
11#define __LM_DOT_H__
12
13struct gfs2_sbd;
14
15#define GFS2_MIN_LVB_SIZE 32
16
17int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
18void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
19void gfs2_lm_unmount(struct gfs2_sbd *sdp);
20int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
21 __attribute__ ((format(printf, 2, 3)));
22int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
23 void **lockp);
24void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock);
25unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
26 unsigned int cur_state, unsigned int req_state,
27 unsigned int flags);
28unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
29 unsigned int cur_state);
30void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock);
31int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp);
32void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb);
33int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
34 struct file *file, struct file_lock *fl);
35int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
36 struct file *file, int cmd, struct file_lock *fl);
37int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
38 struct file *file, struct file_lock *fl);
39void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
40 unsigned int message);
41
42#endif /* __LM_DOT_H__ */
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
new file mode 100644
index 000000000000..663fee728783
--- /dev/null
+++ b/fs/gfs2/locking.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/string.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/kmod.h>
17#include <linux/fs.h>
18#include <linux/delay.h>
19#include <linux/lm_interface.h>
20
21struct lmh_wrapper {
22 struct list_head lw_list;
23 const struct lm_lockops *lw_ops;
24};
25
26/* List of registered low-level locking protocols. A file system selects one
27 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
28
29static LIST_HEAD(lmh_list);
30static DEFINE_MUTEX(lmh_lock);
31
32/**
33 * gfs2_register_lockproto - Register a low-level locking protocol
34 * @proto: the protocol definition
35 *
36 * Returns: 0 on success, -EXXX on failure
37 */
38
39int gfs2_register_lockproto(const struct lm_lockops *proto)
40{
41 struct lmh_wrapper *lw;
42
43 mutex_lock(&lmh_lock);
44
45 list_for_each_entry(lw, &lmh_list, lw_list) {
46 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
47 mutex_unlock(&lmh_lock);
48 printk(KERN_INFO "GFS2: protocol %s already exists\n",
49 proto->lm_proto_name);
50 return -EEXIST;
51 }
52 }
53
54 lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
55 if (!lw) {
56 mutex_unlock(&lmh_lock);
57 return -ENOMEM;
58 }
59
60 lw->lw_ops = proto;
61 list_add(&lw->lw_list, &lmh_list);
62
63 mutex_unlock(&lmh_lock);
64
65 return 0;
66}
67
68/**
69 * gfs2_unregister_lockproto - Unregister a low-level locking protocol
70 * @proto: the protocol definition
71 *
72 */
73
74void gfs2_unregister_lockproto(const struct lm_lockops *proto)
75{
76 struct lmh_wrapper *lw;
77
78 mutex_lock(&lmh_lock);
79
80 list_for_each_entry(lw, &lmh_list, lw_list) {
81 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
82 list_del(&lw->lw_list);
83 mutex_unlock(&lmh_lock);
84 kfree(lw);
85 return;
86 }
87 }
88
89 mutex_unlock(&lmh_lock);
90
91 printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
92 proto->lm_proto_name);
93}
94
95/**
96 * gfs2_mount_lockproto - Mount a lock protocol
97 * @proto_name - the name of the protocol
98 * @table_name - the name of the lock space
99 * @host_data - data specific to this host
100 * @cb - the callback to the code using the lock module
101 * @sdp - The GFS2 superblock
102 * @min_lvb_size - the mininum LVB size that the caller can deal with
103 * @flags - LM_MFLAG_*
104 * @lockstruct - a structure returned describing the mount
105 *
106 * Returns: 0 on success, -EXXX on failure
107 */
108
109int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
110 lm_callback_t cb, void *cb_data,
111 unsigned int min_lvb_size, int flags,
112 struct lm_lockstruct *lockstruct,
113 struct kobject *fskobj)
114{
115 struct lmh_wrapper *lw = NULL;
116 int try = 0;
117 int error, found;
118
119retry:
120 mutex_lock(&lmh_lock);
121
122 found = 0;
123 list_for_each_entry(lw, &lmh_list, lw_list) {
124 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
125 found = 1;
126 break;
127 }
128 }
129
130 if (!found) {
131 if (!try && capable(CAP_SYS_MODULE)) {
132 try = 1;
133 mutex_unlock(&lmh_lock);
134 request_module(proto_name);
135 goto retry;
136 }
137 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
138 error = -ENOENT;
139 goto out;
140 }
141
142 if (!try_module_get(lw->lw_ops->lm_owner)) {
143 try = 0;
144 mutex_unlock(&lmh_lock);
145 msleep(1000);
146 goto retry;
147 }
148
149 error = lw->lw_ops->lm_mount(table_name, host_data, cb, cb_data,
150 min_lvb_size, flags, lockstruct, fskobj);
151 if (error)
152 module_put(lw->lw_ops->lm_owner);
153out:
154 mutex_unlock(&lmh_lock);
155 return error;
156}
157
158void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
159{
160 mutex_lock(&lmh_lock);
161 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
162 if (lockstruct->ls_ops->lm_owner)
163 module_put(lockstruct->ls_ops->lm_owner);
164 mutex_unlock(&lmh_lock);
165}
166
167/**
168 * gfs2_withdraw_lockproto - abnormally unmount a lock module
169 * @lockstruct: the lockstruct passed into mount
170 *
171 */
172
173void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
174{
175 mutex_lock(&lmh_lock);
176 lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
177 if (lockstruct->ls_ops->lm_owner)
178 module_put(lockstruct->ls_ops->lm_owner);
179 mutex_unlock(&lmh_lock);
180}
181
182EXPORT_SYMBOL_GPL(gfs2_register_lockproto);
183EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto);
184
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
new file mode 100644
index 000000000000..89b93b6b45cf
--- /dev/null
+++ b/fs/gfs2/locking/dlm/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
2lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o
3
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
new file mode 100644
index 000000000000..b167addf9fd1
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -0,0 +1,524 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12static char junk_lvb[GDLM_LVB_SIZE];
13
14static void queue_complete(struct gdlm_lock *lp)
15{
16 struct gdlm_ls *ls = lp->ls;
17
18 clear_bit(LFL_ACTIVE, &lp->flags);
19
20 spin_lock(&ls->async_lock);
21 list_add_tail(&lp->clist, &ls->complete);
22 spin_unlock(&ls->async_lock);
23 wake_up(&ls->thread_wait);
24}
25
26static inline void gdlm_ast(void *astarg)
27{
28 queue_complete(astarg);
29}
30
31static inline void gdlm_bast(void *astarg, int mode)
32{
33 struct gdlm_lock *lp = astarg;
34 struct gdlm_ls *ls = lp->ls;
35
36 if (!mode) {
37 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
38 lp->lockname.ln_type,
39 (unsigned long long)lp->lockname.ln_number);
40 return;
41 }
42
43 spin_lock(&ls->async_lock);
44 if (!lp->bast_mode) {
45 list_add_tail(&lp->blist, &ls->blocking);
46 lp->bast_mode = mode;
47 } else if (lp->bast_mode < mode)
48 lp->bast_mode = mode;
49 spin_unlock(&ls->async_lock);
50 wake_up(&ls->thread_wait);
51}
52
53void gdlm_queue_delayed(struct gdlm_lock *lp)
54{
55 struct gdlm_ls *ls = lp->ls;
56
57 spin_lock(&ls->async_lock);
58 list_add_tail(&lp->delay_list, &ls->delayed);
59 spin_unlock(&ls->async_lock);
60}
61
62/* convert gfs lock-state to dlm lock-mode */
63
64static s16 make_mode(s16 lmstate)
65{
66 switch (lmstate) {
67 case LM_ST_UNLOCKED:
68 return DLM_LOCK_NL;
69 case LM_ST_EXCLUSIVE:
70 return DLM_LOCK_EX;
71 case LM_ST_DEFERRED:
72 return DLM_LOCK_CW;
73 case LM_ST_SHARED:
74 return DLM_LOCK_PR;
75 }
76 gdlm_assert(0, "unknown LM state %d", lmstate);
77 return -1;
78}
79
80/* convert dlm lock-mode to gfs lock-state */
81
82s16 gdlm_make_lmstate(s16 dlmmode)
83{
84 switch (dlmmode) {
85 case DLM_LOCK_IV:
86 case DLM_LOCK_NL:
87 return LM_ST_UNLOCKED;
88 case DLM_LOCK_EX:
89 return LM_ST_EXCLUSIVE;
90 case DLM_LOCK_CW:
91 return LM_ST_DEFERRED;
92 case DLM_LOCK_PR:
93 return LM_ST_SHARED;
94 }
95 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
96 return -1;
97}
98
99/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
100 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
101
102static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
103{
104 s16 cur = make_mode(cur_state);
105 if (lp->cur != DLM_LOCK_IV)
106 gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
107}
108
109static inline unsigned int make_flags(struct gdlm_lock *lp,
110 unsigned int gfs_flags,
111 s16 cur, s16 req)
112{
113 unsigned int lkf = 0;
114
115 if (gfs_flags & LM_FLAG_TRY)
116 lkf |= DLM_LKF_NOQUEUE;
117
118 if (gfs_flags & LM_FLAG_TRY_1CB) {
119 lkf |= DLM_LKF_NOQUEUE;
120 lkf |= DLM_LKF_NOQUEUEBAST;
121 }
122
123 if (gfs_flags & LM_FLAG_PRIORITY) {
124 lkf |= DLM_LKF_NOORDER;
125 lkf |= DLM_LKF_HEADQUE;
126 }
127
128 if (gfs_flags & LM_FLAG_ANY) {
129 if (req == DLM_LOCK_PR)
130 lkf |= DLM_LKF_ALTCW;
131 else if (req == DLM_LOCK_CW)
132 lkf |= DLM_LKF_ALTPR;
133 }
134
135 if (lp->lksb.sb_lkid != 0) {
136 lkf |= DLM_LKF_CONVERT;
137
138 /* Conversion deadlock avoidance by DLM */
139
140 if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
141 !(lkf & DLM_LKF_NOQUEUE) &&
142 cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
143 lkf |= DLM_LKF_CONVDEADLK;
144 }
145
146 if (lp->lvb)
147 lkf |= DLM_LKF_VALBLK;
148
149 return lkf;
150}
151
152/* make_strname - convert GFS lock numbers to a string */
153
154static inline void make_strname(struct lm_lockname *lockname,
155 struct gdlm_strname *str)
156{
157 sprintf(str->name, "%8x%16llx", lockname->ln_type,
158 (unsigned long long)lockname->ln_number);
159 str->namelen = GDLM_STRNAME_BYTES;
160}
161
162static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
163 struct gdlm_lock **lpp)
164{
165 struct gdlm_lock *lp;
166
167 lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
168 if (!lp)
169 return -ENOMEM;
170
171 lp->lockname = *name;
172 lp->ls = ls;
173 lp->cur = DLM_LOCK_IV;
174 lp->lvb = NULL;
175 lp->hold_null = NULL;
176 init_completion(&lp->ast_wait);
177 INIT_LIST_HEAD(&lp->clist);
178 INIT_LIST_HEAD(&lp->blist);
179 INIT_LIST_HEAD(&lp->delay_list);
180
181 spin_lock(&ls->async_lock);
182 list_add(&lp->all_list, &ls->all_locks);
183 ls->all_locks_count++;
184 spin_unlock(&ls->async_lock);
185
186 *lpp = lp;
187 return 0;
188}
189
190void gdlm_delete_lp(struct gdlm_lock *lp)
191{
192 struct gdlm_ls *ls = lp->ls;
193
194 spin_lock(&ls->async_lock);
195 if (!list_empty(&lp->clist))
196 list_del_init(&lp->clist);
197 if (!list_empty(&lp->blist))
198 list_del_init(&lp->blist);
199 if (!list_empty(&lp->delay_list))
200 list_del_init(&lp->delay_list);
201 gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type,
202 (unsigned long long)lp->lockname.ln_number);
203 list_del_init(&lp->all_list);
204 ls->all_locks_count--;
205 spin_unlock(&ls->async_lock);
206
207 kfree(lp);
208}
209
210int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
211 void **lockp)
212{
213 struct gdlm_lock *lp;
214 int error;
215
216 error = gdlm_create_lp(lockspace, name, &lp);
217
218 *lockp = lp;
219 return error;
220}
221
222void gdlm_put_lock(void *lock)
223{
224 gdlm_delete_lp(lock);
225}
226
227unsigned int gdlm_do_lock(struct gdlm_lock *lp)
228{
229 struct gdlm_ls *ls = lp->ls;
230 struct gdlm_strname str;
231 int error, bast = 1;
232
233 /*
234 * When recovery is in progress, delay lock requests for submission
235 * once recovery is done. Requests for recovery (NOEXP) and unlocks
236 * can pass.
237 */
238
239 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
240 !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
241 gdlm_queue_delayed(lp);
242 return LM_OUT_ASYNC;
243 }
244
245 /*
246 * Submit the actual lock request.
247 */
248
249 if (test_bit(LFL_NOBAST, &lp->flags))
250 bast = 0;
251
252 make_strname(&lp->lockname, &str);
253
254 set_bit(LFL_ACTIVE, &lp->flags);
255
256 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
257 (unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
258 lp->cur, lp->req, lp->lkf);
259
260 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
261 str.name, str.namelen, 0, gdlm_ast, lp,
262 bast ? gdlm_bast : NULL);
263
264 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
265 lp->lksb.sb_status = -EAGAIN;
266 queue_complete(lp);
267 error = 0;
268 }
269
270 if (error) {
271 log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
272 "flags=%lx", ls->fsname, lp->lockname.ln_type,
273 (unsigned long long)lp->lockname.ln_number, error,
274 lp->cur, lp->req, lp->lkf, lp->flags);
275 return LM_OUT_ERROR;
276 }
277 return LM_OUT_ASYNC;
278}
279
280static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
281{
282 struct gdlm_ls *ls = lp->ls;
283 unsigned int lkf = 0;
284 int error;
285
286 set_bit(LFL_DLM_UNLOCK, &lp->flags);
287 set_bit(LFL_ACTIVE, &lp->flags);
288
289 if (lp->lvb)
290 lkf = DLM_LKF_VALBLK;
291
292 log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
293 (unsigned long long)lp->lockname.ln_number,
294 lp->lksb.sb_lkid, lp->cur, lkf);
295
296 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
297
298 if (error) {
299 log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
300 "flags=%lx", ls->fsname, lp->lockname.ln_type,
301 (unsigned long long)lp->lockname.ln_number, error,
302 lp->cur, lp->req, lp->lkf, lp->flags);
303 return LM_OUT_ERROR;
304 }
305 return LM_OUT_ASYNC;
306}
307
308unsigned int gdlm_lock(void *lock, unsigned int cur_state,
309 unsigned int req_state, unsigned int flags)
310{
311 struct gdlm_lock *lp = lock;
312
313 clear_bit(LFL_DLM_CANCEL, &lp->flags);
314 if (flags & LM_FLAG_NOEXP)
315 set_bit(LFL_NOBLOCK, &lp->flags);
316
317 check_cur_state(lp, cur_state);
318 lp->req = make_mode(req_state);
319 lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
320
321 return gdlm_do_lock(lp);
322}
323
324unsigned int gdlm_unlock(void *lock, unsigned int cur_state)
325{
326 struct gdlm_lock *lp = lock;
327
328 clear_bit(LFL_DLM_CANCEL, &lp->flags);
329 if (lp->cur == DLM_LOCK_IV)
330 return 0;
331 return gdlm_do_unlock(lp);
332}
333
334void gdlm_cancel(void *lock)
335{
336 struct gdlm_lock *lp = lock;
337 struct gdlm_ls *ls = lp->ls;
338 int error, delay_list = 0;
339
340 if (test_bit(LFL_DLM_CANCEL, &lp->flags))
341 return;
342
343 log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
344 (unsigned long long)lp->lockname.ln_number, lp->flags);
345
346 spin_lock(&ls->async_lock);
347 if (!list_empty(&lp->delay_list)) {
348 list_del_init(&lp->delay_list);
349 delay_list = 1;
350 }
351 spin_unlock(&ls->async_lock);
352
353 if (delay_list) {
354 set_bit(LFL_CANCEL, &lp->flags);
355 set_bit(LFL_ACTIVE, &lp->flags);
356 queue_complete(lp);
357 return;
358 }
359
360 if (!test_bit(LFL_ACTIVE, &lp->flags) ||
361 test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
362 log_info("gdlm_cancel skip %x,%llx flags %lx",
363 lp->lockname.ln_type,
364 (unsigned long long)lp->lockname.ln_number, lp->flags);
365 return;
366 }
367
368 /* the lock is blocked in the dlm */
369
370 set_bit(LFL_DLM_CANCEL, &lp->flags);
371 set_bit(LFL_ACTIVE, &lp->flags);
372
373 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
374 NULL, lp);
375
376 log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
377 lp->lockname.ln_type,
378 (unsigned long long)lp->lockname.ln_number, lp->flags);
379
380 if (error == -EBUSY)
381 clear_bit(LFL_DLM_CANCEL, &lp->flags);
382}
383
384static int gdlm_add_lvb(struct gdlm_lock *lp)
385{
386 char *lvb;
387
388 lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL);
389 if (!lvb)
390 return -ENOMEM;
391
392 lp->lksb.sb_lvbptr = lvb;
393 lp->lvb = lvb;
394 return 0;
395}
396
397static void gdlm_del_lvb(struct gdlm_lock *lp)
398{
399 kfree(lp->lvb);
400 lp->lvb = NULL;
401 lp->lksb.sb_lvbptr = NULL;
402}
403
404/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
405 the completion) because gfs won't call hold_lvb() during a callback (from
406 the context of a lock_dlm thread). */
407
408static int hold_null_lock(struct gdlm_lock *lp)
409{
410 struct gdlm_lock *lpn = NULL;
411 int error;
412
413 if (lp->hold_null) {
414 printk(KERN_INFO "lock_dlm: lvb already held\n");
415 return 0;
416 }
417
418 error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
419 if (error)
420 goto out;
421
422 lpn->lksb.sb_lvbptr = junk_lvb;
423 lpn->lvb = junk_lvb;
424
425 lpn->req = DLM_LOCK_NL;
426 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
427 set_bit(LFL_NOBAST, &lpn->flags);
428 set_bit(LFL_INLOCK, &lpn->flags);
429
430 init_completion(&lpn->ast_wait);
431 gdlm_do_lock(lpn);
432 wait_for_completion(&lpn->ast_wait);
433 error = lpn->lksb.sb_status;
434 if (error) {
435 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
436 error);
437 gdlm_delete_lp(lpn);
438 lpn = NULL;
439 }
440out:
441 lp->hold_null = lpn;
442 return error;
443}
444
445/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
446 the completion) because gfs may call unhold_lvb() during a callback (from
447 the context of a lock_dlm thread) which could cause a deadlock since the
448 other lock_dlm thread could be engaged in recovery. */
449
450static void unhold_null_lock(struct gdlm_lock *lp)
451{
452 struct gdlm_lock *lpn = lp->hold_null;
453
454 gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
455 (unsigned long long)lp->lockname.ln_number);
456 lpn->lksb.sb_lvbptr = NULL;
457 lpn->lvb = NULL;
458 set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
459 gdlm_do_unlock(lpn);
460 lp->hold_null = NULL;
461}
462
463/* Acquire a NL lock because gfs requires the value block to remain
464 intact on the resource while the lvb is "held" even if it's holding no locks
465 on the resource. */
466
467int gdlm_hold_lvb(void *lock, char **lvbp)
468{
469 struct gdlm_lock *lp = lock;
470 int error;
471
472 error = gdlm_add_lvb(lp);
473 if (error)
474 return error;
475
476 *lvbp = lp->lvb;
477
478 error = hold_null_lock(lp);
479 if (error)
480 gdlm_del_lvb(lp);
481
482 return error;
483}
484
485void gdlm_unhold_lvb(void *lock, char *lvb)
486{
487 struct gdlm_lock *lp = lock;
488
489 unhold_null_lock(lp);
490 gdlm_del_lvb(lp);
491}
492
493void gdlm_submit_delayed(struct gdlm_ls *ls)
494{
495 struct gdlm_lock *lp, *safe;
496
497 spin_lock(&ls->async_lock);
498 list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
499 list_del_init(&lp->delay_list);
500 list_add_tail(&lp->delay_list, &ls->submit);
501 }
502 spin_unlock(&ls->async_lock);
503 wake_up(&ls->thread_wait);
504}
505
506int gdlm_release_all_locks(struct gdlm_ls *ls)
507{
508 struct gdlm_lock *lp, *safe;
509 int count = 0;
510
511 spin_lock(&ls->async_lock);
512 list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
513 list_del_init(&lp->all_list);
514
515 if (lp->lvb && lp->lvb != junk_lvb)
516 kfree(lp->lvb);
517 kfree(lp);
518 count++;
519 }
520 spin_unlock(&ls->async_lock);
521
522 return count;
523}
524
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
new file mode 100644
index 000000000000..33af707a4d3f
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -0,0 +1,187 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef LOCK_DLM_DOT_H
11#define LOCK_DLM_DOT_H
12
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/string.h>
19#include <linux/list.h>
20#include <linux/socket.h>
21#include <linux/delay.h>
22#include <linux/kthread.h>
23#include <linux/kobject.h>
24#include <linux/fcntl.h>
25#include <linux/wait.h>
26#include <net/sock.h>
27
28#include <linux/dlm.h>
29#include <linux/lm_interface.h>
30
31/*
32 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
33 * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
34 * as "lock_dlm".
35 */
36
37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 50000
40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128
42
43/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
44 We sprintf these numbers into a 24 byte string of hex values to make them
45 human-readable (to make debugging simpler.) */
46
47struct gdlm_strname {
48 unsigned char name[GDLM_STRNAME_BYTES];
49 unsigned short namelen;
50};
51
52enum {
53 DFL_BLOCK_LOCKS = 0,
54 DFL_SPECTATOR = 1,
55 DFL_WITHDRAW = 2,
56};
57
58struct gdlm_ls {
59 u32 id;
60 int jid;
61 int first;
62 int first_done;
63 unsigned long flags;
64 struct kobject kobj;
65 char clustername[GDLM_NAME_LEN];
66 char fsname[GDLM_NAME_LEN];
67 int fsflags;
68 dlm_lockspace_t *dlm_lockspace;
69 lm_callback_t fscb;
70 struct gfs2_sbd *sdp;
71 int recover_jid;
72 int recover_jid_done;
73 int recover_jid_status;
74 spinlock_t async_lock;
75 struct list_head complete;
76 struct list_head blocking;
77 struct list_head delayed;
78 struct list_head submit;
79 struct list_head all_locks;
80 u32 all_locks_count;
81 wait_queue_head_t wait_control;
82 struct task_struct *thread1;
83 struct task_struct *thread2;
84 wait_queue_head_t thread_wait;
85 unsigned long drop_time;
86 int drop_locks_count;
87 int drop_locks_period;
88};
89
90enum {
91 LFL_NOBLOCK = 0,
92 LFL_NOCACHE = 1,
93 LFL_DLM_UNLOCK = 2,
94 LFL_DLM_CANCEL = 3,
95 LFL_SYNC_LVB = 4,
96 LFL_FORCE_PROMOTE = 5,
97 LFL_REREQUEST = 6,
98 LFL_ACTIVE = 7,
99 LFL_INLOCK = 8,
100 LFL_CANCEL = 9,
101 LFL_NOBAST = 10,
102 LFL_HEADQUE = 11,
103 LFL_UNLOCK_DELETE = 12,
104};
105
106struct gdlm_lock {
107 struct gdlm_ls *ls;
108 struct lm_lockname lockname;
109 char *lvb;
110 struct dlm_lksb lksb;
111
112 s16 cur;
113 s16 req;
114 s16 prev_req;
115 u32 lkf; /* dlm flags DLM_LKF_ */
116 unsigned long flags; /* lock_dlm flags LFL_ */
117
118 int bast_mode; /* protected by async_lock */
119 struct completion ast_wait;
120
121 struct list_head clist; /* complete */
122 struct list_head blist; /* blocking */
123 struct list_head delay_list; /* delayed */
124 struct list_head all_list; /* all locks for the fs */
125 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
126};
127
128#define gdlm_assert(assertion, fmt, args...) \
129do { \
130 if (unlikely(!(assertion))) { \
131 printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
132 "lock_dlm: " fmt "\n", \
133 #assertion, ##args); \
134 BUG(); \
135 } \
136} while (0)
137
138#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
139#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
140#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
141#ifdef LOCK_DLM_LOG_DEBUG
142#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
143#else
144#define log_debug(fmt, arg...)
145#endif
146
147/* sysfs.c */
148
149int gdlm_sysfs_init(void);
150void gdlm_sysfs_exit(void);
151int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
152void gdlm_kobject_release(struct gdlm_ls *);
153
154/* thread.c */
155
156int gdlm_init_threads(struct gdlm_ls *);
157void gdlm_release_threads(struct gdlm_ls *);
158
159/* lock.c */
160
161s16 gdlm_make_lmstate(s16);
162void gdlm_queue_delayed(struct gdlm_lock *);
163void gdlm_submit_delayed(struct gdlm_ls *);
164int gdlm_release_all_locks(struct gdlm_ls *);
165void gdlm_delete_lp(struct gdlm_lock *);
166unsigned int gdlm_do_lock(struct gdlm_lock *);
167
168int gdlm_get_lock(void *, struct lm_lockname *, void **);
169void gdlm_put_lock(void *);
170unsigned int gdlm_lock(void *, unsigned int, unsigned int, unsigned int);
171unsigned int gdlm_unlock(void *, unsigned int);
172void gdlm_cancel(void *);
173int gdlm_hold_lvb(void *, char **);
174void gdlm_unhold_lvb(void *, char *);
175
176/* plock.c */
177
178int gdlm_plock_init(void);
179void gdlm_plock_exit(void);
180int gdlm_plock(void *, struct lm_lockname *, struct file *, int,
181 struct file_lock *);
182int gdlm_plock_get(void *, struct lm_lockname *, struct file *,
183 struct file_lock *);
184int gdlm_punlock(void *, struct lm_lockname *, struct file *,
185 struct file_lock *);
186#endif
187
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
new file mode 100644
index 000000000000..2194b1d5b5ec
--- /dev/null
+++ b/fs/gfs2/locking/dlm/main.c
@@ -0,0 +1,64 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/init.h>
11
12#include "lock_dlm.h"
13
14extern int gdlm_drop_count;
15extern int gdlm_drop_period;
16
17extern struct lm_lockops gdlm_ops;
18
19static int __init init_lock_dlm(void)
20{
21 int error;
22
23 error = gfs2_register_lockproto(&gdlm_ops);
24 if (error) {
25 printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
26 error);
27 return error;
28 }
29
30 error = gdlm_sysfs_init();
31 if (error) {
32 gfs2_unregister_lockproto(&gdlm_ops);
33 return error;
34 }
35
36 error = gdlm_plock_init();
37 if (error) {
38 gdlm_sysfs_exit();
39 gfs2_unregister_lockproto(&gdlm_ops);
40 return error;
41 }
42
43 gdlm_drop_count = GDLM_DROP_COUNT;
44 gdlm_drop_period = GDLM_DROP_PERIOD;
45
46 printk(KERN_INFO
47 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
48 return 0;
49}
50
51static void __exit exit_lock_dlm(void)
52{
53 gdlm_plock_exit();
54 gdlm_sysfs_exit();
55 gfs2_unregister_lockproto(&gdlm_ops);
56}
57
58module_init(init_lock_dlm);
59module_exit(exit_lock_dlm);
60
61MODULE_DESCRIPTION("GFS DLM Locking Module");
62MODULE_AUTHOR("Red Hat, Inc.");
63MODULE_LICENSE("GPL");
64
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
new file mode 100644
index 000000000000..1f94dd35a943
--- /dev/null
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -0,0 +1,255 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12int gdlm_drop_count;
13int gdlm_drop_period;
14const struct lm_lockops gdlm_ops;
15
16
17static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
18 int flags, char *table_name)
19{
20 struct gdlm_ls *ls;
21 char buf[256], *p;
22
23 ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
24 if (!ls)
25 return NULL;
26
27 ls->drop_locks_count = gdlm_drop_count;
28 ls->drop_locks_period = gdlm_drop_period;
29 ls->fscb = cb;
30 ls->sdp = sdp;
31 ls->fsflags = flags;
32 spin_lock_init(&ls->async_lock);
33 INIT_LIST_HEAD(&ls->complete);
34 INIT_LIST_HEAD(&ls->blocking);
35 INIT_LIST_HEAD(&ls->delayed);
36 INIT_LIST_HEAD(&ls->submit);
37 INIT_LIST_HEAD(&ls->all_locks);
38 init_waitqueue_head(&ls->thread_wait);
39 init_waitqueue_head(&ls->wait_control);
40 ls->thread1 = NULL;
41 ls->thread2 = NULL;
42 ls->drop_time = jiffies;
43 ls->jid = -1;
44
45 strncpy(buf, table_name, 256);
46 buf[255] = '\0';
47
48 p = strstr(buf, ":");
49 if (!p) {
50 log_info("invalid table_name \"%s\"", table_name);
51 kfree(ls);
52 return NULL;
53 }
54 *p = '\0';
55 p++;
56
57 strncpy(ls->clustername, buf, GDLM_NAME_LEN);
58 strncpy(ls->fsname, p, GDLM_NAME_LEN);
59
60 return ls;
61}
62
63static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
64{
65 char data[256];
66 char *options, *x, *y;
67 int error = 0;
68
69 memset(data, 0, 256);
70 strncpy(data, data_arg, 255);
71
72 for (options = data; (x = strsep(&options, ":")); ) {
73 if (!*x)
74 continue;
75
76 y = strchr(x, '=');
77 if (y)
78 *y++ = 0;
79
80 if (!strcmp(x, "jid")) {
81 if (!y) {
82 log_error("need argument to jid");
83 error = -EINVAL;
84 break;
85 }
86 sscanf(y, "%u", &ls->jid);
87
88 } else if (!strcmp(x, "first")) {
89 if (!y) {
90 log_error("need argument to first");
91 error = -EINVAL;
92 break;
93 }
94 sscanf(y, "%u", &ls->first);
95
96 } else if (!strcmp(x, "id")) {
97 if (!y) {
98 log_error("need argument to id");
99 error = -EINVAL;
100 break;
101 }
102 sscanf(y, "%u", &ls->id);
103
104 } else if (!strcmp(x, "nodir")) {
105 if (!y) {
106 log_error("need argument to nodir");
107 error = -EINVAL;
108 break;
109 }
110 sscanf(y, "%u", nodir);
111
112 } else {
113 log_error("unkonwn option: %s", x);
114 error = -EINVAL;
115 break;
116 }
117 }
118
119 return error;
120}
121
122static int gdlm_mount(char *table_name, char *host_data,
123 lm_callback_t cb, void *cb_data,
124 unsigned int min_lvb_size, int flags,
125 struct lm_lockstruct *lockstruct,
126 struct kobject *fskobj)
127{
128 struct gdlm_ls *ls;
129 int error = -ENOMEM, nodir = 0;
130
131 if (min_lvb_size > GDLM_LVB_SIZE)
132 goto out;
133
134 ls = init_gdlm(cb, cb_data, flags, table_name);
135 if (!ls)
136 goto out;
137
138 error = make_args(ls, host_data, &nodir);
139 if (error)
140 goto out;
141
142 error = gdlm_init_threads(ls);
143 if (error)
144 goto out_free;
145
146 error = gdlm_kobject_setup(ls, fskobj);
147 if (error)
148 goto out_thread;
149
150 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
151 &ls->dlm_lockspace,
152 nodir ? DLM_LSFL_NODIR : 0,
153 GDLM_LVB_SIZE);
154 if (error) {
155 log_error("dlm_new_lockspace error %d", error);
156 goto out_kobj;
157 }
158
159 lockstruct->ls_jid = ls->jid;
160 lockstruct->ls_first = ls->first;
161 lockstruct->ls_lockspace = ls;
162 lockstruct->ls_ops = &gdlm_ops;
163 lockstruct->ls_flags = 0;
164 lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
165 return 0;
166
167out_kobj:
168 gdlm_kobject_release(ls);
169out_thread:
170 gdlm_release_threads(ls);
171out_free:
172 kfree(ls);
173out:
174 return error;
175}
176
177static void gdlm_unmount(void *lockspace)
178{
179 struct gdlm_ls *ls = lockspace;
180 int rv;
181
182 log_debug("unmount flags %lx", ls->flags);
183
184 /* FIXME: serialize unmount and withdraw in case they
185 happen at once. Also, if unmount follows withdraw,
186 wait for withdraw to finish. */
187
188 if (test_bit(DFL_WITHDRAW, &ls->flags))
189 goto out;
190
191 gdlm_kobject_release(ls);
192 dlm_release_lockspace(ls->dlm_lockspace, 2);
193 gdlm_release_threads(ls);
194 rv = gdlm_release_all_locks(ls);
195 if (rv)
196 log_info("gdlm_unmount: %d stray locks freed", rv);
197out:
198 kfree(ls);
199}
200
201static void gdlm_recovery_done(void *lockspace, unsigned int jid,
202 unsigned int message)
203{
204 struct gdlm_ls *ls = lockspace;
205 ls->recover_jid_done = jid;
206 ls->recover_jid_status = message;
207 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
208}
209
210static void gdlm_others_may_mount(void *lockspace)
211{
212 struct gdlm_ls *ls = lockspace;
213 ls->first_done = 1;
214 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
215}
216
217/* Userspace gets the offline uevent, blocks new gfs locks on
218 other mounters, and lets us know (sets WITHDRAW flag). Then,
219 userspace leaves the mount group while we leave the lockspace. */
220
221static void gdlm_withdraw(void *lockspace)
222{
223 struct gdlm_ls *ls = lockspace;
224
225 kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
226
227 wait_event_interruptible(ls->wait_control,
228 test_bit(DFL_WITHDRAW, &ls->flags));
229
230 dlm_release_lockspace(ls->dlm_lockspace, 2);
231 gdlm_release_threads(ls);
232 gdlm_release_all_locks(ls);
233 gdlm_kobject_release(ls);
234}
235
236const struct lm_lockops gdlm_ops = {
237 .lm_proto_name = "lock_dlm",
238 .lm_mount = gdlm_mount,
239 .lm_others_may_mount = gdlm_others_may_mount,
240 .lm_unmount = gdlm_unmount,
241 .lm_withdraw = gdlm_withdraw,
242 .lm_get_lock = gdlm_get_lock,
243 .lm_put_lock = gdlm_put_lock,
244 .lm_lock = gdlm_lock,
245 .lm_unlock = gdlm_unlock,
246 .lm_plock = gdlm_plock,
247 .lm_punlock = gdlm_punlock,
248 .lm_plock_get = gdlm_plock_get,
249 .lm_cancel = gdlm_cancel,
250 .lm_hold_lvb = gdlm_hold_lvb,
251 .lm_unhold_lvb = gdlm_unhold_lvb,
252 .lm_recovery_done = gdlm_recovery_done,
253 .lm_owner = THIS_MODULE,
254};
255
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
new file mode 100644
index 000000000000..7365aec9511b
--- /dev/null
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -0,0 +1,301 @@
1/*
2 * Copyright (C) 2005 Red Hat, Inc. All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License version 2.
7 */
8
9#include <linux/miscdevice.h>
10#include <linux/lock_dlm_plock.h>
11
12#include "lock_dlm.h"
13
14
15static spinlock_t ops_lock;
16static struct list_head send_list;
17static struct list_head recv_list;
18static wait_queue_head_t send_wq;
19static wait_queue_head_t recv_wq;
20
21struct plock_op {
22 struct list_head list;
23 int done;
24 struct gdlm_plock_info info;
25};
26
27static inline void set_version(struct gdlm_plock_info *info)
28{
29 info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
30 info->version[1] = GDLM_PLOCK_VERSION_MINOR;
31 info->version[2] = GDLM_PLOCK_VERSION_PATCH;
32}
33
34static int check_version(struct gdlm_plock_info *info)
35{
36 if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
37 (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
38 log_error("plock device version mismatch: "
39 "kernel (%u.%u.%u), user (%u.%u.%u)",
40 GDLM_PLOCK_VERSION_MAJOR,
41 GDLM_PLOCK_VERSION_MINOR,
42 GDLM_PLOCK_VERSION_PATCH,
43 info->version[0],
44 info->version[1],
45 info->version[2]);
46 return -EINVAL;
47 }
48 return 0;
49}
50
51static void send_op(struct plock_op *op)
52{
53 set_version(&op->info);
54 INIT_LIST_HEAD(&op->list);
55 spin_lock(&ops_lock);
56 list_add_tail(&op->list, &send_list);
57 spin_unlock(&ops_lock);
58 wake_up(&send_wq);
59}
60
61int gdlm_plock(void *lockspace, struct lm_lockname *name,
62 struct file *file, int cmd, struct file_lock *fl)
63{
64 struct gdlm_ls *ls = lockspace;
65 struct plock_op *op;
66 int rv;
67
68 op = kzalloc(sizeof(*op), GFP_KERNEL);
69 if (!op)
70 return -ENOMEM;
71
72 op->info.optype = GDLM_PLOCK_OP_LOCK;
73 op->info.pid = fl->fl_pid;
74 op->info.ex = (fl->fl_type == F_WRLCK);
75 op->info.wait = IS_SETLKW(cmd);
76 op->info.fsid = ls->id;
77 op->info.number = name->ln_number;
78 op->info.start = fl->fl_start;
79 op->info.end = fl->fl_end;
80 op->info.owner = (__u64)(long) fl->fl_owner;
81
82 send_op(op);
83 wait_event(recv_wq, (op->done != 0));
84
85 spin_lock(&ops_lock);
86 if (!list_empty(&op->list)) {
87 printk(KERN_INFO "plock op on list\n");
88 list_del(&op->list);
89 }
90 spin_unlock(&ops_lock);
91
92 rv = op->info.rv;
93
94 if (!rv) {
95 if (posix_lock_file_wait(file, fl) < 0)
96 log_error("gdlm_plock: vfs lock error %x,%llx",
97 name->ln_type,
98 (unsigned long long)name->ln_number);
99 }
100
101 kfree(op);
102 return rv;
103}
104
105int gdlm_punlock(void *lockspace, struct lm_lockname *name,
106 struct file *file, struct file_lock *fl)
107{
108 struct gdlm_ls *ls = lockspace;
109 struct plock_op *op;
110 int rv;
111
112 op = kzalloc(sizeof(*op), GFP_KERNEL);
113 if (!op)
114 return -ENOMEM;
115
116 if (posix_lock_file_wait(file, fl) < 0)
117 log_error("gdlm_punlock: vfs unlock error %x,%llx",
118 name->ln_type, (unsigned long long)name->ln_number);
119
120 op->info.optype = GDLM_PLOCK_OP_UNLOCK;
121 op->info.pid = fl->fl_pid;
122 op->info.fsid = ls->id;
123 op->info.number = name->ln_number;
124 op->info.start = fl->fl_start;
125 op->info.end = fl->fl_end;
126 op->info.owner = (__u64)(long) fl->fl_owner;
127
128 send_op(op);
129 wait_event(recv_wq, (op->done != 0));
130
131 spin_lock(&ops_lock);
132 if (!list_empty(&op->list)) {
133 printk(KERN_INFO "punlock op on list\n");
134 list_del(&op->list);
135 }
136 spin_unlock(&ops_lock);
137
138 rv = op->info.rv;
139
140 kfree(op);
141 return rv;
142}
143
144int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
145 struct file *file, struct file_lock *fl)
146{
147 struct gdlm_ls *ls = lockspace;
148 struct plock_op *op;
149 int rv;
150
151 op = kzalloc(sizeof(*op), GFP_KERNEL);
152 if (!op)
153 return -ENOMEM;
154
155 op->info.optype = GDLM_PLOCK_OP_GET;
156 op->info.pid = fl->fl_pid;
157 op->info.ex = (fl->fl_type == F_WRLCK);
158 op->info.fsid = ls->id;
159 op->info.number = name->ln_number;
160 op->info.start = fl->fl_start;
161 op->info.end = fl->fl_end;
162
163 send_op(op);
164 wait_event(recv_wq, (op->done != 0));
165
166 spin_lock(&ops_lock);
167 if (!list_empty(&op->list)) {
168 printk(KERN_INFO "plock_get op on list\n");
169 list_del(&op->list);
170 }
171 spin_unlock(&ops_lock);
172
173 rv = op->info.rv;
174
175 if (rv == 0)
176 fl->fl_type = F_UNLCK;
177 else if (rv > 0) {
178 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
179 fl->fl_pid = op->info.pid;
180 fl->fl_start = op->info.start;
181 fl->fl_end = op->info.end;
182 }
183
184 kfree(op);
185 return rv;
186}
187
188/* a read copies out one plock request from the send list */
189static ssize_t dev_read(struct file *file, char __user *u, size_t count,
190 loff_t *ppos)
191{
192 struct gdlm_plock_info info;
193 struct plock_op *op = NULL;
194
195 if (count < sizeof(info))
196 return -EINVAL;
197
198 spin_lock(&ops_lock);
199 if (!list_empty(&send_list)) {
200 op = list_entry(send_list.next, struct plock_op, list);
201 list_move(&op->list, &recv_list);
202 memcpy(&info, &op->info, sizeof(info));
203 }
204 spin_unlock(&ops_lock);
205
206 if (!op)
207 return -EAGAIN;
208
209 if (copy_to_user(u, &info, sizeof(info)))
210 return -EFAULT;
211 return sizeof(info);
212}
213
214/* a write copies in one plock result that should match a plock_op
215 on the recv list */
216static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
217 loff_t *ppos)
218{
219 struct gdlm_plock_info info;
220 struct plock_op *op;
221 int found = 0;
222
223 if (count != sizeof(info))
224 return -EINVAL;
225
226 if (copy_from_user(&info, u, sizeof(info)))
227 return -EFAULT;
228
229 if (check_version(&info))
230 return -EINVAL;
231
232 spin_lock(&ops_lock);
233 list_for_each_entry(op, &recv_list, list) {
234 if (op->info.fsid == info.fsid && op->info.number == info.number &&
235 op->info.owner == info.owner) {
236 list_del_init(&op->list);
237 found = 1;
238 op->done = 1;
239 memcpy(&op->info, &info, sizeof(info));
240 break;
241 }
242 }
243 spin_unlock(&ops_lock);
244
245 if (found)
246 wake_up(&recv_wq);
247 else
248 printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
249 (unsigned long long)info.number);
250 return count;
251}
252
253static unsigned int dev_poll(struct file *file, poll_table *wait)
254{
255 poll_wait(file, &send_wq, wait);
256
257 spin_lock(&ops_lock);
258 if (!list_empty(&send_list)) {
259 spin_unlock(&ops_lock);
260 return POLLIN | POLLRDNORM;
261 }
262 spin_unlock(&ops_lock);
263 return 0;
264}
265
266static struct file_operations dev_fops = {
267 .read = dev_read,
268 .write = dev_write,
269 .poll = dev_poll,
270 .owner = THIS_MODULE
271};
272
273static struct miscdevice plock_dev_misc = {
274 .minor = MISC_DYNAMIC_MINOR,
275 .name = GDLM_PLOCK_MISC_NAME,
276 .fops = &dev_fops
277};
278
279int gdlm_plock_init(void)
280{
281 int rv;
282
283 spin_lock_init(&ops_lock);
284 INIT_LIST_HEAD(&send_list);
285 INIT_LIST_HEAD(&recv_list);
286 init_waitqueue_head(&send_wq);
287 init_waitqueue_head(&recv_wq);
288
289 rv = misc_register(&plock_dev_misc);
290 if (rv)
291 printk(KERN_INFO "gdlm_plock_init: misc_register failed %d",
292 rv);
293 return rv;
294}
295
296void gdlm_plock_exit(void)
297{
298 if (misc_deregister(&plock_dev_misc) < 0)
299 printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed");
300}
301
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
new file mode 100644
index 000000000000..29ae06f94944
--- /dev/null
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -0,0 +1,226 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/ctype.h>
11#include <linux/stat.h>
12
13#include "lock_dlm.h"
14
15extern struct lm_lockops gdlm_ops;
16
17static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
18{
19 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
20}
21
22static ssize_t block_show(struct gdlm_ls *ls, char *buf)
23{
24 ssize_t ret;
25 int val = 0;
26
27 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
28 val = 1;
29 ret = sprintf(buf, "%d\n", val);
30 return ret;
31}
32
33static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
34{
35 ssize_t ret = len;
36 int val;
37
38 val = simple_strtol(buf, NULL, 0);
39
40 if (val == 1)
41 set_bit(DFL_BLOCK_LOCKS, &ls->flags);
42 else if (val == 0) {
43 clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
44 gdlm_submit_delayed(ls);
45 } else {
46 ret = -EINVAL;
47 }
48 return ret;
49}
50
51static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
52{
53 ssize_t ret;
54 int val = 0;
55
56 if (test_bit(DFL_WITHDRAW, &ls->flags))
57 val = 1;
58 ret = sprintf(buf, "%d\n", val);
59 return ret;
60}
61
62static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
63{
64 ssize_t ret = len;
65 int val;
66
67 val = simple_strtol(buf, NULL, 0);
68
69 if (val == 1)
70 set_bit(DFL_WITHDRAW, &ls->flags);
71 else
72 ret = -EINVAL;
73 wake_up(&ls->wait_control);
74 return ret;
75}
76
77static ssize_t id_show(struct gdlm_ls *ls, char *buf)
78{
79 return sprintf(buf, "%u\n", ls->id);
80}
81
82static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
83{
84 return sprintf(buf, "%d\n", ls->jid);
85}
86
87static ssize_t first_show(struct gdlm_ls *ls, char *buf)
88{
89 return sprintf(buf, "%d\n", ls->first);
90}
91
92static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
93{
94 return sprintf(buf, "%d\n", ls->first_done);
95}
96
97static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
98{
99 return sprintf(buf, "%d\n", ls->recover_jid);
100}
101
102static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
103{
104 ls->recover_jid = simple_strtol(buf, NULL, 0);
105 ls->fscb(ls->sdp, LM_CB_NEED_RECOVERY, &ls->recover_jid);
106 return len;
107}
108
109static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
110{
111 return sprintf(buf, "%d\n", ls->recover_jid_done);
112}
113
114static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
115{
116 return sprintf(buf, "%d\n", ls->recover_jid_status);
117}
118
119struct gdlm_attr {
120 struct attribute attr;
121 ssize_t (*show)(struct gdlm_ls *, char *);
122 ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
123};
124
125#define GDLM_ATTR(_name,_mode,_show,_store) \
126static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
127
128GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
129GDLM_ATTR(block, 0644, block_show, block_store);
130GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
131GDLM_ATTR(id, 0444, id_show, NULL);
132GDLM_ATTR(jid, 0444, jid_show, NULL);
133GDLM_ATTR(first, 0444, first_show, NULL);
134GDLM_ATTR(first_done, 0444, first_done_show, NULL);
135GDLM_ATTR(recover, 0644, recover_show, recover_store);
136GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
137GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
138
139static struct attribute *gdlm_attrs[] = {
140 &gdlm_attr_proto_name.attr,
141 &gdlm_attr_block.attr,
142 &gdlm_attr_withdraw.attr,
143 &gdlm_attr_id.attr,
144 &gdlm_attr_jid.attr,
145 &gdlm_attr_first.attr,
146 &gdlm_attr_first_done.attr,
147 &gdlm_attr_recover.attr,
148 &gdlm_attr_recover_done.attr,
149 &gdlm_attr_recover_status.attr,
150 NULL,
151};
152
153static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
154 char *buf)
155{
156 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
157 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
158 return a->show ? a->show(ls, buf) : 0;
159}
160
161static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
162 const char *buf, size_t len)
163{
164 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
165 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
166 return a->store ? a->store(ls, buf, len) : len;
167}
168
169static struct sysfs_ops gdlm_attr_ops = {
170 .show = gdlm_attr_show,
171 .store = gdlm_attr_store,
172};
173
174static struct kobj_type gdlm_ktype = {
175 .default_attrs = gdlm_attrs,
176 .sysfs_ops = &gdlm_attr_ops,
177};
178
179static struct kset gdlm_kset = {
180 .subsys = &kernel_subsys,
181 .kobj = {.name = "lock_dlm",},
182 .ktype = &gdlm_ktype,
183};
184
185int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
186{
187 int error;
188
189 error = kobject_set_name(&ls->kobj, "%s", "lock_module");
190 if (error) {
191 log_error("can't set kobj name %d", error);
192 return error;
193 }
194
195 ls->kobj.kset = &gdlm_kset;
196 ls->kobj.ktype = &gdlm_ktype;
197 ls->kobj.parent = fskobj;
198
199 error = kobject_register(&ls->kobj);
200 if (error)
201 log_error("can't register kobj %d", error);
202
203 return error;
204}
205
206void gdlm_kobject_release(struct gdlm_ls *ls)
207{
208 kobject_unregister(&ls->kobj);
209}
210
211int gdlm_sysfs_init(void)
212{
213 int error;
214
215 error = kset_register(&gdlm_kset);
216 if (error)
217 printk("lock_dlm: cannot register kset %d\n", error);
218
219 return error;
220}
221
222void gdlm_sysfs_exit(void)
223{
224 kset_unregister(&gdlm_kset);
225}
226
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
new file mode 100644
index 000000000000..9cf1f168eaf8
--- /dev/null
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -0,0 +1,359 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
13 thread gets to it. */
14
15static void queue_submit(struct gdlm_lock *lp)
16{
17 struct gdlm_ls *ls = lp->ls;
18
19 spin_lock(&ls->async_lock);
20 list_add_tail(&lp->delay_list, &ls->submit);
21 spin_unlock(&ls->async_lock);
22 wake_up(&ls->thread_wait);
23}
24
25static void process_blocking(struct gdlm_lock *lp, int bast_mode)
26{
27 struct gdlm_ls *ls = lp->ls;
28 unsigned int cb = 0;
29
30 switch (gdlm_make_lmstate(bast_mode)) {
31 case LM_ST_EXCLUSIVE:
32 cb = LM_CB_NEED_E;
33 break;
34 case LM_ST_DEFERRED:
35 cb = LM_CB_NEED_D;
36 break;
37 case LM_ST_SHARED:
38 cb = LM_CB_NEED_S;
39 break;
40 default:
41 gdlm_assert(0, "unknown bast mode %u", lp->bast_mode);
42 }
43
44 ls->fscb(ls->sdp, cb, &lp->lockname);
45}
46
47static void process_complete(struct gdlm_lock *lp)
48{
49 struct gdlm_ls *ls = lp->ls;
50 struct lm_async_cb acb;
51 s16 prev_mode = lp->cur;
52
53 memset(&acb, 0, sizeof(acb));
54
55 if (lp->lksb.sb_status == -DLM_ECANCEL) {
56 log_info("complete dlm cancel %x,%llx flags %lx",
57 lp->lockname.ln_type,
58 (unsigned long long)lp->lockname.ln_number,
59 lp->flags);
60
61 lp->req = lp->cur;
62 acb.lc_ret |= LM_OUT_CANCELED;
63 if (lp->cur == DLM_LOCK_IV)
64 lp->lksb.sb_lkid = 0;
65 goto out;
66 }
67
68 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
69 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
70 log_info("unlock sb_status %d %x,%llx flags %lx",
71 lp->lksb.sb_status, lp->lockname.ln_type,
72 (unsigned long long)lp->lockname.ln_number,
73 lp->flags);
74 return;
75 }
76
77 lp->cur = DLM_LOCK_IV;
78 lp->req = DLM_LOCK_IV;
79 lp->lksb.sb_lkid = 0;
80
81 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
82 gdlm_delete_lp(lp);
83 return;
84 }
85 goto out;
86 }
87
88 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
89 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
90
91 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
92 if (lp->req == DLM_LOCK_PR)
93 lp->req = DLM_LOCK_CW;
94 else if (lp->req == DLM_LOCK_CW)
95 lp->req = DLM_LOCK_PR;
96 }
97
98 /*
99 * A canceled lock request. The lock was just taken off the delayed
100 * list and was never even submitted to dlm.
101 */
102
103 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
104 log_info("complete internal cancel %x,%llx",
105 lp->lockname.ln_type,
106 (unsigned long long)lp->lockname.ln_number);
107 lp->req = lp->cur;
108 acb.lc_ret |= LM_OUT_CANCELED;
109 goto out;
110 }
111
112 /*
113 * An error occured.
114 */
115
116 if (lp->lksb.sb_status) {
117 /* a "normal" error */
118 if ((lp->lksb.sb_status == -EAGAIN) &&
119 (lp->lkf & DLM_LKF_NOQUEUE)) {
120 lp->req = lp->cur;
121 if (lp->cur == DLM_LOCK_IV)
122 lp->lksb.sb_lkid = 0;
123 goto out;
124 }
125
126 /* this could only happen with cancels I think */
127 log_info("ast sb_status %d %x,%llx flags %lx",
128 lp->lksb.sb_status, lp->lockname.ln_type,
129 (unsigned long long)lp->lockname.ln_number,
130 lp->flags);
131 return;
132 }
133
134 /*
135 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
136 */
137
138 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
139 complete(&lp->ast_wait);
140 return;
141 }
142
143 /*
144 * A lock has been demoted to NL because it initially completed during
145 * BLOCK_LOCKS. Now it must be requested in the originally requested
146 * mode.
147 */
148
149 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
150 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
151 lp->lockname.ln_type,
152 (unsigned long long)lp->lockname.ln_number);
153 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
154 lp->lockname.ln_type,
155 (unsigned long long)lp->lockname.ln_number);
156
157 lp->cur = DLM_LOCK_NL;
158 lp->req = lp->prev_req;
159 lp->prev_req = DLM_LOCK_IV;
160 lp->lkf &= ~DLM_LKF_CONVDEADLK;
161
162 set_bit(LFL_NOCACHE, &lp->flags);
163
164 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
165 !test_bit(LFL_NOBLOCK, &lp->flags))
166 gdlm_queue_delayed(lp);
167 else
168 queue_submit(lp);
169 return;
170 }
171
172 /*
173 * A request is granted during dlm recovery. It may be granted
174 * because the locks of a failed node were cleared. In that case,
175 * there may be inconsistent data beneath this lock and we must wait
176 * for recovery to complete to use it. When gfs recovery is done this
177 * granted lock will be converted to NL and then reacquired in this
178 * granted state.
179 */
180
181 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
182 !test_bit(LFL_NOBLOCK, &lp->flags) &&
183 lp->req != DLM_LOCK_NL) {
184
185 lp->cur = lp->req;
186 lp->prev_req = lp->req;
187 lp->req = DLM_LOCK_NL;
188 lp->lkf |= DLM_LKF_CONVERT;
189 lp->lkf &= ~DLM_LKF_CONVDEADLK;
190
191 log_debug("rereq %x,%llx id %x %d,%d",
192 lp->lockname.ln_type,
193 (unsigned long long)lp->lockname.ln_number,
194 lp->lksb.sb_lkid, lp->cur, lp->req);
195
196 set_bit(LFL_REREQUEST, &lp->flags);
197 queue_submit(lp);
198 return;
199 }
200
201 /*
202 * DLM demoted the lock to NL before it was granted so GFS must be
203 * told it cannot cache data for this lock.
204 */
205
206 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
207 set_bit(LFL_NOCACHE, &lp->flags);
208
209out:
210 /*
211 * This is an internal lock_dlm lock
212 */
213
214 if (test_bit(LFL_INLOCK, &lp->flags)) {
215 clear_bit(LFL_NOBLOCK, &lp->flags);
216 lp->cur = lp->req;
217 complete(&lp->ast_wait);
218 return;
219 }
220
221 /*
222 * Normal completion of a lock request. Tell GFS it now has the lock.
223 */
224
225 clear_bit(LFL_NOBLOCK, &lp->flags);
226 lp->cur = lp->req;
227
228 acb.lc_name = lp->lockname;
229 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
230
231 if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
232 (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
233 acb.lc_ret |= LM_OUT_CACHEABLE;
234
235 ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
236}
237
238static inline int no_work(struct gdlm_ls *ls, int blocking)
239{
240 int ret;
241
242 spin_lock(&ls->async_lock);
243 ret = list_empty(&ls->complete) && list_empty(&ls->submit);
244 if (ret && blocking)
245 ret = list_empty(&ls->blocking);
246 spin_unlock(&ls->async_lock);
247
248 return ret;
249}
250
251static inline int check_drop(struct gdlm_ls *ls)
252{
253 if (!ls->drop_locks_count)
254 return 0;
255
256 if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
257 ls->drop_time = jiffies;
258 if (ls->all_locks_count >= ls->drop_locks_count)
259 return 1;
260 }
261 return 0;
262}
263
264static int gdlm_thread(void *data)
265{
266 struct gdlm_ls *ls = (struct gdlm_ls *) data;
267 struct gdlm_lock *lp = NULL;
268 int blist = 0;
269 uint8_t complete, blocking, submit, drop;
270 DECLARE_WAITQUEUE(wait, current);
271
272 /* Only thread1 is allowed to do blocking callbacks since gfs
273 may wait for a completion callback within a blocking cb. */
274
275 if (current == ls->thread1)
276 blist = 1;
277
278 while (!kthread_should_stop()) {
279 set_current_state(TASK_INTERRUPTIBLE);
280 add_wait_queue(&ls->thread_wait, &wait);
281 if (no_work(ls, blist))
282 schedule();
283 remove_wait_queue(&ls->thread_wait, &wait);
284 set_current_state(TASK_RUNNING);
285
286 complete = blocking = submit = drop = 0;
287
288 spin_lock(&ls->async_lock);
289
290 if (blist && !list_empty(&ls->blocking)) {
291 lp = list_entry(ls->blocking.next, struct gdlm_lock,
292 blist);
293 list_del_init(&lp->blist);
294 blocking = lp->bast_mode;
295 lp->bast_mode = 0;
296 } else if (!list_empty(&ls->complete)) {
297 lp = list_entry(ls->complete.next, struct gdlm_lock,
298 clist);
299 list_del_init(&lp->clist);
300 complete = 1;
301 } else if (!list_empty(&ls->submit)) {
302 lp = list_entry(ls->submit.next, struct gdlm_lock,
303 delay_list);
304 list_del_init(&lp->delay_list);
305 submit = 1;
306 }
307
308 drop = check_drop(ls);
309 spin_unlock(&ls->async_lock);
310
311 if (complete)
312 process_complete(lp);
313
314 else if (blocking)
315 process_blocking(lp, blocking);
316
317 else if (submit)
318 gdlm_do_lock(lp);
319
320 if (drop)
321 ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL);
322
323 schedule();
324 }
325
326 return 0;
327}
328
329int gdlm_init_threads(struct gdlm_ls *ls)
330{
331 struct task_struct *p;
332 int error;
333
334 p = kthread_run(gdlm_thread, ls, "lock_dlm1");
335 error = IS_ERR(p);
336 if (error) {
337 log_error("can't start lock_dlm1 thread %d", error);
338 return error;
339 }
340 ls->thread1 = p;
341
342 p = kthread_run(gdlm_thread, ls, "lock_dlm2");
343 error = IS_ERR(p);
344 if (error) {
345 log_error("can't start lock_dlm2 thread %d", error);
346 kthread_stop(ls->thread1);
347 return error;
348 }
349 ls->thread2 = p;
350
351 return 0;
352}
353
354void gdlm_release_threads(struct gdlm_ls *ls)
355{
356 kthread_stop(ls->thread1);
357 kthread_stop(ls->thread2);
358}
359
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile
new file mode 100644
index 000000000000..35e9730bc3a8
--- /dev/null
+++ b/fs/gfs2/locking/nolock/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o
2lock_nolock-y := main.o
3
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
new file mode 100644
index 000000000000..acfbc941f319
--- /dev/null
+++ b/fs/gfs2/locking/nolock/main.c
@@ -0,0 +1,246 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/types.h>
15#include <linux/fs.h>
16#include <linux/smp_lock.h>
17#include <linux/lm_interface.h>
18
19struct nolock_lockspace {
20 unsigned int nl_lvb_size;
21};
22
23static const struct lm_lockops nolock_ops;
24
25static int nolock_mount(char *table_name, char *host_data,
26 lm_callback_t cb, void *cb_data,
27 unsigned int min_lvb_size, int flags,
28 struct lm_lockstruct *lockstruct,
29 struct kobject *fskobj)
30{
31 char *c;
32 unsigned int jid;
33 struct nolock_lockspace *nl;
34
35 c = strstr(host_data, "jid=");
36 if (!c)
37 jid = 0;
38 else {
39 c += 4;
40 sscanf(c, "%u", &jid);
41 }
42
43 nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
44 if (!nl)
45 return -ENOMEM;
46
47 nl->nl_lvb_size = min_lvb_size;
48
49 lockstruct->ls_jid = jid;
50 lockstruct->ls_first = 1;
51 lockstruct->ls_lvb_size = min_lvb_size;
52 lockstruct->ls_lockspace = nl;
53 lockstruct->ls_ops = &nolock_ops;
54 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
55
56 return 0;
57}
58
59static void nolock_others_may_mount(void *lockspace)
60{
61}
62
63static void nolock_unmount(void *lockspace)
64{
65 struct nolock_lockspace *nl = lockspace;
66 kfree(nl);
67}
68
69static void nolock_withdraw(void *lockspace)
70{
71}
72
73/**
74 * nolock_get_lock - get a lm_lock_t given a descripton of the lock
75 * @lockspace: the lockspace the lock lives in
76 * @name: the name of the lock
77 * @lockp: return the lm_lock_t here
78 *
79 * Returns: 0 on success, -EXXX on failure
80 */
81
82static int nolock_get_lock(void *lockspace, struct lm_lockname *name,
83 void **lockp)
84{
85 *lockp = lockspace;
86 return 0;
87}
88
89/**
90 * nolock_put_lock - get rid of a lock structure
91 * @lock: the lock to throw away
92 *
93 */
94
95static void nolock_put_lock(void *lock)
96{
97}
98
99/**
100 * nolock_lock - acquire a lock
101 * @lock: the lock to manipulate
102 * @cur_state: the current state
103 * @req_state: the requested state
104 * @flags: modifier flags
105 *
106 * Returns: A bitmap of LM_OUT_*
107 */
108
109static unsigned int nolock_lock(void *lock, unsigned int cur_state,
110 unsigned int req_state, unsigned int flags)
111{
112 return req_state | LM_OUT_CACHEABLE;
113}
114
115/**
116 * nolock_unlock - unlock a lock
117 * @lock: the lock to manipulate
118 * @cur_state: the current state
119 *
120 * Returns: 0
121 */
122
123static unsigned int nolock_unlock(void *lock, unsigned int cur_state)
124{
125 return 0;
126}
127
128static void nolock_cancel(void *lock)
129{
130}
131
132/**
133 * nolock_hold_lvb - hold on to a lock value block
134 * @lock: the lock the LVB is associated with
135 * @lvbp: return the lm_lvb_t here
136 *
137 * Returns: 0 on success, -EXXX on failure
138 */
139
140static int nolock_hold_lvb(void *lock, char **lvbp)
141{
142 struct nolock_lockspace *nl = lock;
143 int error = 0;
144
145 *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL);
146 if (!*lvbp)
147 error = -ENOMEM;
148
149 return error;
150}
151
152/**
153 * nolock_unhold_lvb - release a LVB
154 * @lock: the lock the LVB is associated with
155 * @lvb: the lock value block
156 *
157 */
158
159static void nolock_unhold_lvb(void *lock, char *lvb)
160{
161 kfree(lvb);
162}
163
164static int nolock_plock_get(void *lockspace, struct lm_lockname *name,
165 struct file *file, struct file_lock *fl)
166{
167 struct file_lock tmp;
168 int ret;
169
170 ret = posix_test_lock(file, fl, &tmp);
171 fl->fl_type = F_UNLCK;
172 if (ret)
173 memcpy(fl, &tmp, sizeof(struct file_lock));
174
175 return 0;
176}
177
178static int nolock_plock(void *lockspace, struct lm_lockname *name,
179 struct file *file, int cmd, struct file_lock *fl)
180{
181 int error;
182 error = posix_lock_file_wait(file, fl);
183 return error;
184}
185
186static int nolock_punlock(void *lockspace, struct lm_lockname *name,
187 struct file *file, struct file_lock *fl)
188{
189 int error;
190 error = posix_lock_file_wait(file, fl);
191 return error;
192}
193
194static void nolock_recovery_done(void *lockspace, unsigned int jid,
195 unsigned int message)
196{
197}
198
199static const struct lm_lockops nolock_ops = {
200 .lm_proto_name = "lock_nolock",
201 .lm_mount = nolock_mount,
202 .lm_others_may_mount = nolock_others_may_mount,
203 .lm_unmount = nolock_unmount,
204 .lm_withdraw = nolock_withdraw,
205 .lm_get_lock = nolock_get_lock,
206 .lm_put_lock = nolock_put_lock,
207 .lm_lock = nolock_lock,
208 .lm_unlock = nolock_unlock,
209 .lm_cancel = nolock_cancel,
210 .lm_hold_lvb = nolock_hold_lvb,
211 .lm_unhold_lvb = nolock_unhold_lvb,
212 .lm_plock_get = nolock_plock_get,
213 .lm_plock = nolock_plock,
214 .lm_punlock = nolock_punlock,
215 .lm_recovery_done = nolock_recovery_done,
216 .lm_owner = THIS_MODULE,
217};
218
219static int __init init_nolock(void)
220{
221 int error;
222
223 error = gfs2_register_lockproto(&nolock_ops);
224 if (error) {
225 printk(KERN_WARNING
226 "lock_nolock: can't register protocol: %d\n", error);
227 return error;
228 }
229
230 printk(KERN_INFO
231 "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
232 return 0;
233}
234
235static void __exit exit_nolock(void)
236{
237 gfs2_unregister_lockproto(&nolock_ops);
238}
239
240module_init(init_nolock);
241module_exit(exit_nolock);
242
243MODULE_DESCRIPTION("GFS Nolock Locking Module");
244MODULE_AUTHOR("Red Hat, Inc.");
245MODULE_LICENSE("GPL");
246
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
new file mode 100644
index 000000000000..554fe5bd1b72
--- /dev/null
+++ b/fs/gfs2/log.c
@@ -0,0 +1,687 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "log.h"
24#include "lops.h"
25#include "meta_io.h"
26#include "util.h"
27#include "dir.h"
28
29#define PULL 1
30
31/**
32 * gfs2_struct2blk - compute stuff
33 * @sdp: the filesystem
34 * @nstruct: the number of structures
35 * @ssize: the size of the structures
36 *
37 * Compute the number of log descriptor blocks needed to hold a certain number
38 * of structures of a certain size.
39 *
40 * Returns: the number of blocks needed (minimum is always 1)
41 */
42
43unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
44 unsigned int ssize)
45{
46 unsigned int blks;
47 unsigned int first, second;
48
49 blks = 1;
50 first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
51
52 if (nstruct > first) {
53 second = (sdp->sd_sb.sb_bsize -
54 sizeof(struct gfs2_meta_header)) / ssize;
55 blks += DIV_ROUND_UP(nstruct - first, second);
56 }
57
58 return blks;
59}
60
61/**
62 * gfs2_ail1_start_one - Start I/O on a part of the AIL
63 * @sdp: the filesystem
64 * @tr: the part of the AIL
65 *
66 */
67
68static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
69{
70 struct gfs2_bufdata *bd, *s;
71 struct buffer_head *bh;
72 int retry;
73
74 BUG_ON(!spin_is_locked(&sdp->sd_log_lock));
75
76 do {
77 retry = 0;
78
79 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
80 bd_ail_st_list) {
81 bh = bd->bd_bh;
82
83 gfs2_assert(sdp, bd->bd_ail == ai);
84
85 if (!buffer_busy(bh)) {
86 if (!buffer_uptodate(bh)) {
87 gfs2_log_unlock(sdp);
88 gfs2_io_error_bh(sdp, bh);
89 gfs2_log_lock(sdp);
90 }
91 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
92 continue;
93 }
94
95 if (!buffer_dirty(bh))
96 continue;
97
98 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
99
100 gfs2_log_unlock(sdp);
101 wait_on_buffer(bh);
102 ll_rw_block(WRITE, 1, &bh);
103 gfs2_log_lock(sdp);
104
105 retry = 1;
106 break;
107 }
108 } while (retry);
109}
110
111/**
112 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
113 * @sdp: the filesystem
114 * @ai: the AIL entry
115 *
116 */
117
118static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
119{
120 struct gfs2_bufdata *bd, *s;
121 struct buffer_head *bh;
122
123 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
124 bd_ail_st_list) {
125 bh = bd->bd_bh;
126
127 gfs2_assert(sdp, bd->bd_ail == ai);
128
129 if (buffer_busy(bh)) {
130 if (flags & DIO_ALL)
131 continue;
132 else
133 break;
134 }
135
136 if (!buffer_uptodate(bh))
137 gfs2_io_error_bh(sdp, bh);
138
139 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
140 }
141
142 return list_empty(&ai->ai_ail1_list);
143}
144
145void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
146{
147 struct list_head *head = &sdp->sd_ail1_list;
148 u64 sync_gen;
149 struct list_head *first;
150 struct gfs2_ail *first_ai, *ai, *tmp;
151 int done = 0;
152
153 gfs2_log_lock(sdp);
154 if (list_empty(head)) {
155 gfs2_log_unlock(sdp);
156 return;
157 }
158 sync_gen = sdp->sd_ail_sync_gen++;
159
160 first = head->prev;
161 first_ai = list_entry(first, struct gfs2_ail, ai_list);
162 first_ai->ai_sync_gen = sync_gen;
163 gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */
164
165 if (flags & DIO_ALL)
166 first = NULL;
167
168 while(!done) {
169 if (first && (head->prev != first ||
170 gfs2_ail1_empty_one(sdp, first_ai, 0)))
171 break;
172
173 done = 1;
174 list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) {
175 if (ai->ai_sync_gen >= sync_gen)
176 continue;
177 ai->ai_sync_gen = sync_gen;
178 gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */
179 done = 0;
180 break;
181 }
182 }
183
184 gfs2_log_unlock(sdp);
185}
186
187int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
188{
189 struct gfs2_ail *ai, *s;
190 int ret;
191
192 gfs2_log_lock(sdp);
193
194 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
195 if (gfs2_ail1_empty_one(sdp, ai, flags))
196 list_move(&ai->ai_list, &sdp->sd_ail2_list);
197 else if (!(flags & DIO_ALL))
198 break;
199 }
200
201 ret = list_empty(&sdp->sd_ail1_list);
202
203 gfs2_log_unlock(sdp);
204
205 return ret;
206}
207
208
209/**
210 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
211 * @sdp: the filesystem
212 * @ai: the AIL entry
213 *
214 */
215
216static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
217{
218 struct list_head *head = &ai->ai_ail2_list;
219 struct gfs2_bufdata *bd;
220
221 while (!list_empty(head)) {
222 bd = list_entry(head->prev, struct gfs2_bufdata,
223 bd_ail_st_list);
224 gfs2_assert(sdp, bd->bd_ail == ai);
225 bd->bd_ail = NULL;
226 list_del(&bd->bd_ail_st_list);
227 list_del(&bd->bd_ail_gl_list);
228 atomic_dec(&bd->bd_gl->gl_ail_count);
229 brelse(bd->bd_bh);
230 }
231}
232
233static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
234{
235 struct gfs2_ail *ai, *safe;
236 unsigned int old_tail = sdp->sd_log_tail;
237 int wrap = (new_tail < old_tail);
238 int a, b, rm;
239
240 gfs2_log_lock(sdp);
241
242 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
243 a = (old_tail <= ai->ai_first);
244 b = (ai->ai_first < new_tail);
245 rm = (wrap) ? (a || b) : (a && b);
246 if (!rm)
247 continue;
248
249 gfs2_ail2_empty_one(sdp, ai);
250 list_del(&ai->ai_list);
251 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
252 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
253 kfree(ai);
254 }
255
256 gfs2_log_unlock(sdp);
257}
258
259/**
260 * gfs2_log_reserve - Make a log reservation
261 * @sdp: The GFS2 superblock
262 * @blks: The number of blocks to reserve
263 *
264 * Returns: errno
265 */
266
267int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
268{
269 unsigned int try = 0;
270
271 if (gfs2_assert_warn(sdp, blks) ||
272 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
273 return -EINVAL;
274
275 mutex_lock(&sdp->sd_log_reserve_mutex);
276 gfs2_log_lock(sdp);
277 while(sdp->sd_log_blks_free <= blks) {
278 gfs2_log_unlock(sdp);
279 gfs2_ail1_empty(sdp, 0);
280 gfs2_log_flush(sdp, NULL);
281
282 if (try++)
283 gfs2_ail1_start(sdp, 0);
284 gfs2_log_lock(sdp);
285 }
286 sdp->sd_log_blks_free -= blks;
287 gfs2_log_unlock(sdp);
288 mutex_unlock(&sdp->sd_log_reserve_mutex);
289
290 down_read(&sdp->sd_log_flush_lock);
291
292 return 0;
293}
294
295/**
296 * gfs2_log_release - Release a given number of log blocks
297 * @sdp: The GFS2 superblock
298 * @blks: The number of blocks
299 *
300 */
301
302void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
303{
304
305 gfs2_log_lock(sdp);
306 sdp->sd_log_blks_free += blks;
307 gfs2_assert_withdraw(sdp,
308 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
309 gfs2_log_unlock(sdp);
310 up_read(&sdp->sd_log_flush_lock);
311}
312
313static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
314{
315 int error;
316 struct buffer_head bh_map;
317
318 error = gfs2_block_map(sdp->sd_jdesc->jd_inode, lbn, 0, &bh_map, 1);
319 if (error || !bh_map.b_blocknr)
320 printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn);
321 gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
322
323 return bh_map.b_blocknr;
324}
325
326/**
327 * log_distance - Compute distance between two journal blocks
328 * @sdp: The GFS2 superblock
329 * @newer: The most recent journal block of the pair
330 * @older: The older journal block of the pair
331 *
332 * Compute the distance (in the journal direction) between two
333 * blocks in the journal
334 *
335 * Returns: the distance in blocks
336 */
337
338static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
339 unsigned int older)
340{
341 int dist;
342
343 dist = newer - older;
344 if (dist < 0)
345 dist += sdp->sd_jdesc->jd_blocks;
346
347 return dist;
348}
349
350static unsigned int current_tail(struct gfs2_sbd *sdp)
351{
352 struct gfs2_ail *ai;
353 unsigned int tail;
354
355 gfs2_log_lock(sdp);
356
357 if (list_empty(&sdp->sd_ail1_list)) {
358 tail = sdp->sd_log_head;
359 } else {
360 ai = list_entry(sdp->sd_ail1_list.prev, struct gfs2_ail, ai_list);
361 tail = ai->ai_first;
362 }
363
364 gfs2_log_unlock(sdp);
365
366 return tail;
367}
368
369static inline void log_incr_head(struct gfs2_sbd *sdp)
370{
371 if (sdp->sd_log_flush_head == sdp->sd_log_tail)
372 gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head);
373
374 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
375 sdp->sd_log_flush_head = 0;
376 sdp->sd_log_flush_wrapped = 1;
377 }
378}
379
380/**
381 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
382 * @sdp: The GFS2 superblock
383 *
384 * Returns: the buffer_head
385 */
386
387struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
388{
389 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
390 struct gfs2_log_buf *lb;
391 struct buffer_head *bh;
392
393 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
394 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
395
396 bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
397 lock_buffer(bh);
398 memset(bh->b_data, 0, bh->b_size);
399 set_buffer_uptodate(bh);
400 clear_buffer_dirty(bh);
401 unlock_buffer(bh);
402
403 log_incr_head(sdp);
404
405 return bh;
406}
407
408/**
409 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
410 * @sdp: the filesystem
411 * @data: the data the buffer_head should point to
412 *
413 * Returns: the log buffer descriptor
414 */
415
416struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
417 struct buffer_head *real)
418{
419 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
420 struct gfs2_log_buf *lb;
421 struct buffer_head *bh;
422
423 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
424 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
425 lb->lb_real = real;
426
427 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
428 atomic_set(&bh->b_count, 1);
429 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
430 set_bh_page(bh, real->b_page, bh_offset(real));
431 bh->b_blocknr = blkno;
432 bh->b_size = sdp->sd_sb.sb_bsize;
433 bh->b_bdev = sdp->sd_vfs->s_bdev;
434
435 log_incr_head(sdp);
436
437 return bh;
438}
439
440static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
441{
442 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
443
444 ail2_empty(sdp, new_tail);
445
446 gfs2_log_lock(sdp);
447 sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
448 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
449 gfs2_log_unlock(sdp);
450
451 sdp->sd_log_tail = new_tail;
452}
453
454/**
455 * log_write_header - Get and initialize a journal header buffer
456 * @sdp: The GFS2 superblock
457 *
458 * Returns: the initialized log buffer descriptor
459 */
460
461static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
462{
463 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
464 struct buffer_head *bh;
465 struct gfs2_log_header *lh;
466 unsigned int tail;
467 u32 hash;
468
469 bh = sb_getblk(sdp->sd_vfs, blkno);
470 lock_buffer(bh);
471 memset(bh->b_data, 0, bh->b_size);
472 set_buffer_uptodate(bh);
473 clear_buffer_dirty(bh);
474 unlock_buffer(bh);
475
476 gfs2_ail1_empty(sdp, 0);
477 tail = current_tail(sdp);
478
479 lh = (struct gfs2_log_header *)bh->b_data;
480 memset(lh, 0, sizeof(struct gfs2_log_header));
481 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
482 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
483 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
484 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
485 lh->lh_flags = cpu_to_be32(flags);
486 lh->lh_tail = cpu_to_be32(tail);
487 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
488 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
489 lh->lh_hash = cpu_to_be32(hash);
490
491 set_buffer_dirty(bh);
492 if (sync_dirty_buffer(bh))
493 gfs2_io_error_bh(sdp, bh);
494 brelse(bh);
495
496 if (sdp->sd_log_tail != tail)
497 log_pull_tail(sdp, tail, pull);
498 else
499 gfs2_assert_withdraw(sdp, !pull);
500
501 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
502 log_incr_head(sdp);
503}
504
505static void log_flush_commit(struct gfs2_sbd *sdp)
506{
507 struct list_head *head = &sdp->sd_log_flush_list;
508 struct gfs2_log_buf *lb;
509 struct buffer_head *bh;
510
511 while (!list_empty(head)) {
512 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
513 list_del(&lb->lb_list);
514 bh = lb->lb_bh;
515
516 wait_on_buffer(bh);
517 if (!buffer_uptodate(bh))
518 gfs2_io_error_bh(sdp, bh);
519 if (lb->lb_real) {
520 while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
521 schedule();
522 free_buffer_head(bh);
523 } else
524 brelse(bh);
525 kfree(lb);
526 }
527
528 log_write_header(sdp, 0, 0);
529}
530
531/**
532 * gfs2_log_flush - flush incore transaction(s)
533 * @sdp: the filesystem
534 * @gl: The glock structure to flush. If NULL, flush the whole incore log
535 *
536 */
537
538void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
539{
540 struct gfs2_ail *ai;
541
542 down_write(&sdp->sd_log_flush_lock);
543
544 if (gl) {
545 gfs2_log_lock(sdp);
546 if (list_empty(&gl->gl_le.le_list)) {
547 gfs2_log_unlock(sdp);
548 up_write(&sdp->sd_log_flush_lock);
549 return;
550 }
551 gfs2_log_unlock(sdp);
552 }
553
554 ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
555 INIT_LIST_HEAD(&ai->ai_ail1_list);
556 INIT_LIST_HEAD(&ai->ai_ail2_list);
557
558 gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
559 gfs2_assert_withdraw(sdp,
560 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
561
562 sdp->sd_log_flush_head = sdp->sd_log_head;
563 sdp->sd_log_flush_wrapped = 0;
564 ai->ai_first = sdp->sd_log_flush_head;
565
566 lops_before_commit(sdp);
567 if (!list_empty(&sdp->sd_log_flush_list))
568 log_flush_commit(sdp);
569 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
570 log_write_header(sdp, 0, PULL);
571 lops_after_commit(sdp, ai);
572 sdp->sd_log_head = sdp->sd_log_flush_head;
573
574 sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
575
576 sdp->sd_log_blks_reserved = 0;
577 sdp->sd_log_commited_buf = 0;
578 sdp->sd_log_num_hdrs = 0;
579 sdp->sd_log_commited_revoke = 0;
580
581 gfs2_log_lock(sdp);
582 if (!list_empty(&ai->ai_ail1_list)) {
583 list_add(&ai->ai_list, &sdp->sd_ail1_list);
584 ai = NULL;
585 }
586 gfs2_log_unlock(sdp);
587
588 sdp->sd_vfs->s_dirt = 0;
589 up_write(&sdp->sd_log_flush_lock);
590
591 kfree(ai);
592}
593
594static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
595{
596 unsigned int reserved = 0;
597 unsigned int old;
598
599 gfs2_log_lock(sdp);
600
601 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
602 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
603 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
604 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
605
606 if (sdp->sd_log_commited_buf)
607 reserved += sdp->sd_log_commited_buf;
608 if (sdp->sd_log_commited_revoke)
609 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
610 sizeof(u64));
611 if (reserved)
612 reserved++;
613
614 old = sdp->sd_log_blks_free;
615 sdp->sd_log_blks_free += tr->tr_reserved -
616 (reserved - sdp->sd_log_blks_reserved);
617
618 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
619 gfs2_assert_withdraw(sdp,
620 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
621 sdp->sd_log_num_hdrs);
622
623 sdp->sd_log_blks_reserved = reserved;
624
625 gfs2_log_unlock(sdp);
626}
627
628/**
629 * gfs2_log_commit - Commit a transaction to the log
630 * @sdp: the filesystem
631 * @tr: the transaction
632 *
633 * Returns: errno
634 */
635
636void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
637{
638 log_refund(sdp, tr);
639 lops_incore_commit(sdp, tr);
640
641 sdp->sd_vfs->s_dirt = 1;
642 up_read(&sdp->sd_log_flush_lock);
643
644 gfs2_log_lock(sdp);
645 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
646 gfs2_log_unlock(sdp);
647 gfs2_log_flush(sdp, NULL);
648 } else {
649 gfs2_log_unlock(sdp);
650 }
651}
652
653/**
654 * gfs2_log_shutdown - write a shutdown header into a journal
655 * @sdp: the filesystem
656 *
657 */
658
659void gfs2_log_shutdown(struct gfs2_sbd *sdp)
660{
661 down_write(&sdp->sd_log_flush_lock);
662
663 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
664 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
665 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
666 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
667 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
668 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
669 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
670 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
671 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
672
673 sdp->sd_log_flush_head = sdp->sd_log_head;
674 sdp->sd_log_flush_wrapped = 0;
675
676 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
677
678 gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
679 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
680 gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
681
682 sdp->sd_log_head = sdp->sd_log_flush_head;
683 sdp->sd_log_tail = sdp->sd_log_head;
684
685 up_write(&sdp->sd_log_flush_lock);
686}
687
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
new file mode 100644
index 000000000000..7f5737d55612
--- /dev/null
+++ b/fs/gfs2/log.h
@@ -0,0 +1,65 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __LOG_DOT_H__
11#define __LOG_DOT_H__
12
13#include <linux/list.h>
14#include <linux/spinlock.h>
15#include "incore.h"
16
17/**
18 * gfs2_log_lock - acquire the right to mess with the log manager
19 * @sdp: the filesystem
20 *
21 */
22
23static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
24{
25 spin_lock(&sdp->sd_log_lock);
26}
27
28/**
29 * gfs2_log_unlock - release the right to mess with the log manager
30 * @sdp: the filesystem
31 *
32 */
33
34static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
35{
36 spin_unlock(&sdp->sd_log_lock);
37}
38
39static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
40 unsigned int value)
41{
42 if (++value == sdp->sd_jdesc->jd_blocks) {
43 value = 0;
44 }
45 sdp->sd_log_head = sdp->sd_log_tail = value;
46}
47
48unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
49 unsigned int ssize);
50
51void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
52int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
53
54int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
55void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
56
57struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
58struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59 struct buffer_head *real);
60void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
61void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62
63void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64
65#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
new file mode 100644
index 000000000000..881e337b6a70
--- /dev/null
+++ b/fs/gfs2/lops.c
@@ -0,0 +1,809 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "glock.h"
21#include "log.h"
22#include "lops.h"
23#include "meta_io.h"
24#include "recovery.h"
25#include "rgrp.h"
26#include "trans.h"
27#include "util.h"
28
29static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
30{
31 struct gfs2_glock *gl;
32 struct gfs2_trans *tr = current->journal_info;
33
34 tr->tr_touched = 1;
35
36 if (!list_empty(&le->le_list))
37 return;
38
39 gl = container_of(le, struct gfs2_glock, gl_le);
40 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
41 return;
42 gfs2_glock_hold(gl);
43 set_bit(GLF_DIRTY, &gl->gl_flags);
44
45 gfs2_log_lock(sdp);
46 sdp->sd_log_num_gl++;
47 list_add(&le->le_list, &sdp->sd_log_le_gl);
48 gfs2_log_unlock(sdp);
49}
50
51static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
52{
53 struct list_head *head = &sdp->sd_log_le_gl;
54 struct gfs2_glock *gl;
55
56 while (!list_empty(head)) {
57 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
58 list_del_init(&gl->gl_le.le_list);
59 sdp->sd_log_num_gl--;
60
61 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
62 gfs2_glock_put(gl);
63 }
64 gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
65}
66
67static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
68{
69 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
70 struct gfs2_trans *tr;
71
72 if (!list_empty(&bd->bd_list_tr))
73 return;
74
75 tr = current->journal_info;
76 tr->tr_touched = 1;
77 tr->tr_num_buf++;
78 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
79
80 if (!list_empty(&le->le_list))
81 return;
82
83 gfs2_trans_add_gl(bd->bd_gl);
84
85 gfs2_meta_check(sdp, bd->bd_bh);
86 gfs2_pin(sdp, bd->bd_bh);
87
88 gfs2_log_lock(sdp);
89 sdp->sd_log_num_buf++;
90 list_add(&le->le_list, &sdp->sd_log_le_buf);
91 gfs2_log_unlock(sdp);
92
93 tr->tr_num_buf_new++;
94}
95
96static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
97{
98 struct list_head *head = &tr->tr_list_buf;
99 struct gfs2_bufdata *bd;
100
101 while (!list_empty(head)) {
102 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
103 list_del_init(&bd->bd_list_tr);
104 tr->tr_num_buf--;
105 }
106 gfs2_assert_warn(sdp, !tr->tr_num_buf);
107}
108
109static void buf_lo_before_commit(struct gfs2_sbd *sdp)
110{
111 struct buffer_head *bh;
112 struct gfs2_log_descriptor *ld;
113 struct gfs2_bufdata *bd1 = NULL, *bd2;
114 unsigned int total = sdp->sd_log_num_buf;
115 unsigned int offset = sizeof(struct gfs2_log_descriptor);
116 unsigned int limit;
117 unsigned int num;
118 unsigned n;
119 __be64 *ptr;
120
121 offset += sizeof(__be64) - 1;
122 offset &= ~(sizeof(__be64) - 1);
123 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
124 /* for 4k blocks, limit = 503 */
125
126 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
127 while(total) {
128 num = total;
129 if (total > limit)
130 num = limit;
131 bh = gfs2_log_get_buf(sdp);
132 sdp->sd_log_num_hdrs++;
133 ld = (struct gfs2_log_descriptor *)bh->b_data;
134 ptr = (__be64 *)(bh->b_data + offset);
135 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
136 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
137 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
138 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
139 ld->ld_length = cpu_to_be32(num + 1);
140 ld->ld_data1 = cpu_to_be32(num);
141 ld->ld_data2 = cpu_to_be32(0);
142 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
143
144 n = 0;
145 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
146 bd_le.le_list) {
147 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
148 if (++n >= num)
149 break;
150 }
151
152 set_buffer_dirty(bh);
153 ll_rw_block(WRITE, 1, &bh);
154
155 n = 0;
156 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
157 bd_le.le_list) {
158 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
159 set_buffer_dirty(bh);
160 ll_rw_block(WRITE, 1, &bh);
161 if (++n >= num)
162 break;
163 }
164
165 total -= num;
166 }
167}
168
169static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
170{
171 struct list_head *head = &sdp->sd_log_le_buf;
172 struct gfs2_bufdata *bd;
173
174 while (!list_empty(head)) {
175 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
176 list_del_init(&bd->bd_le.le_list);
177 sdp->sd_log_num_buf--;
178
179 gfs2_unpin(sdp, bd->bd_bh, ai);
180 }
181 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
182}
183
184static void buf_lo_before_scan(struct gfs2_jdesc *jd,
185 struct gfs2_log_header *head, int pass)
186{
187 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
188
189 if (pass != 0)
190 return;
191
192 sdp->sd_found_blocks = 0;
193 sdp->sd_replayed_blocks = 0;
194}
195
196static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
197 struct gfs2_log_descriptor *ld, __be64 *ptr,
198 int pass)
199{
200 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
201 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
202 struct gfs2_glock *gl = ip->i_gl;
203 unsigned int blks = be32_to_cpu(ld->ld_data1);
204 struct buffer_head *bh_log, *bh_ip;
205 u64 blkno;
206 int error = 0;
207
208 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
209 return 0;
210
211 gfs2_replay_incr_blk(sdp, &start);
212
213 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
214 blkno = be64_to_cpu(*ptr++);
215
216 sdp->sd_found_blocks++;
217
218 if (gfs2_revoke_check(sdp, blkno, start))
219 continue;
220
221 error = gfs2_replay_read_block(jd, start, &bh_log);
222 if (error)
223 return error;
224
225 bh_ip = gfs2_meta_new(gl, blkno);
226 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
227
228 if (gfs2_meta_check(sdp, bh_ip))
229 error = -EIO;
230 else
231 mark_buffer_dirty(bh_ip);
232
233 brelse(bh_log);
234 brelse(bh_ip);
235
236 if (error)
237 break;
238
239 sdp->sd_replayed_blocks++;
240 }
241
242 return error;
243}
244
245static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
246{
247 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
248 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
249
250 if (error) {
251 gfs2_meta_sync(ip->i_gl);
252 return;
253 }
254 if (pass != 1)
255 return;
256
257 gfs2_meta_sync(ip->i_gl);
258
259 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
260 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
261}
262
263static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
264{
265 struct gfs2_trans *tr;
266
267 tr = current->journal_info;
268 tr->tr_touched = 1;
269 tr->tr_num_revoke++;
270
271 gfs2_log_lock(sdp);
272 sdp->sd_log_num_revoke++;
273 list_add(&le->le_list, &sdp->sd_log_le_revoke);
274 gfs2_log_unlock(sdp);
275}
276
277static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
278{
279 struct gfs2_log_descriptor *ld;
280 struct gfs2_meta_header *mh;
281 struct buffer_head *bh;
282 unsigned int offset;
283 struct list_head *head = &sdp->sd_log_le_revoke;
284 struct gfs2_revoke *rv;
285
286 if (!sdp->sd_log_num_revoke)
287 return;
288
289 bh = gfs2_log_get_buf(sdp);
290 ld = (struct gfs2_log_descriptor *)bh->b_data;
291 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
292 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
293 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
294 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
295 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
296 sizeof(u64)));
297 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
298 ld->ld_data2 = cpu_to_be32(0);
299 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
300 offset = sizeof(struct gfs2_log_descriptor);
301
302 while (!list_empty(head)) {
303 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
304 list_del_init(&rv->rv_le.le_list);
305 sdp->sd_log_num_revoke--;
306
307 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
308 set_buffer_dirty(bh);
309 ll_rw_block(WRITE, 1, &bh);
310
311 bh = gfs2_log_get_buf(sdp);
312 mh = (struct gfs2_meta_header *)bh->b_data;
313 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
314 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
315 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
316 offset = sizeof(struct gfs2_meta_header);
317 }
318
319 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
320 kfree(rv);
321
322 offset += sizeof(u64);
323 }
324 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
325
326 set_buffer_dirty(bh);
327 ll_rw_block(WRITE, 1, &bh);
328}
329
330static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
331 struct gfs2_log_header *head, int pass)
332{
333 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
334
335 if (pass != 0)
336 return;
337
338 sdp->sd_found_revokes = 0;
339 sdp->sd_replay_tail = head->lh_tail;
340}
341
342static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
343 struct gfs2_log_descriptor *ld, __be64 *ptr,
344 int pass)
345{
346 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
347 unsigned int blks = be32_to_cpu(ld->ld_length);
348 unsigned int revokes = be32_to_cpu(ld->ld_data1);
349 struct buffer_head *bh;
350 unsigned int offset;
351 u64 blkno;
352 int first = 1;
353 int error;
354
355 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
356 return 0;
357
358 offset = sizeof(struct gfs2_log_descriptor);
359
360 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
361 error = gfs2_replay_read_block(jd, start, &bh);
362 if (error)
363 return error;
364
365 if (!first)
366 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
367
368 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
369 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
370
371 error = gfs2_revoke_add(sdp, blkno, start);
372 if (error < 0)
373 return error;
374 else if (error)
375 sdp->sd_found_revokes++;
376
377 if (!--revokes)
378 break;
379 offset += sizeof(u64);
380 }
381
382 brelse(bh);
383 offset = sizeof(struct gfs2_meta_header);
384 first = 0;
385 }
386
387 return 0;
388}
389
390static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
391{
392 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
393
394 if (error) {
395 gfs2_revoke_clean(sdp);
396 return;
397 }
398 if (pass != 1)
399 return;
400
401 fs_info(sdp, "jid=%u: Found %u revoke tags\n",
402 jd->jd_jid, sdp->sd_found_revokes);
403
404 gfs2_revoke_clean(sdp);
405}
406
407static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
408{
409 struct gfs2_rgrpd *rgd;
410 struct gfs2_trans *tr = current->journal_info;
411
412 tr->tr_touched = 1;
413
414 if (!list_empty(&le->le_list))
415 return;
416
417 rgd = container_of(le, struct gfs2_rgrpd, rd_le);
418 gfs2_rgrp_bh_hold(rgd);
419
420 gfs2_log_lock(sdp);
421 sdp->sd_log_num_rg++;
422 list_add(&le->le_list, &sdp->sd_log_le_rg);
423 gfs2_log_unlock(sdp);
424}
425
426static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
427{
428 struct list_head *head = &sdp->sd_log_le_rg;
429 struct gfs2_rgrpd *rgd;
430
431 while (!list_empty(head)) {
432 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
433 list_del_init(&rgd->rd_le.le_list);
434 sdp->sd_log_num_rg--;
435
436 gfs2_rgrp_repolish_clones(rgd);
437 gfs2_rgrp_bh_put(rgd);
438 }
439 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
440}
441
442/**
443 * databuf_lo_add - Add a databuf to the transaction.
444 *
445 * This is used in two distinct cases:
446 * i) In ordered write mode
447 * We put the data buffer on a list so that we can ensure that its
448 * synced to disk at the right time
449 * ii) In journaled data mode
450 * We need to journal the data block in the same way as metadata in
451 * the functions above. The difference is that here we have a tag
452 * which is two __be64's being the block number (as per meta data)
453 * and a flag which says whether the data block needs escaping or
454 * not. This means we need a new log entry for each 251 or so data
455 * blocks, which isn't an enormous overhead but twice as much as
456 * for normal metadata blocks.
457 */
458static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
459{
460 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
461 struct gfs2_trans *tr = current->journal_info;
462 struct address_space *mapping = bd->bd_bh->b_page->mapping;
463 struct gfs2_inode *ip = GFS2_I(mapping->host);
464
465 tr->tr_touched = 1;
466 if (list_empty(&bd->bd_list_tr) &&
467 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
468 tr->tr_num_buf++;
469 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
470 gfs2_pin(sdp, bd->bd_bh);
471 tr->tr_num_buf_new++;
472 }
473 gfs2_trans_add_gl(bd->bd_gl);
474 gfs2_log_lock(sdp);
475 if (list_empty(&le->le_list)) {
476 if (ip->i_di.di_flags & GFS2_DIF_JDATA)
477 sdp->sd_log_num_jdata++;
478 sdp->sd_log_num_databuf++;
479 list_add(&le->le_list, &sdp->sd_log_le_databuf);
480 }
481 gfs2_log_unlock(sdp);
482}
483
484static int gfs2_check_magic(struct buffer_head *bh)
485{
486 struct page *page = bh->b_page;
487 void *kaddr;
488 __be32 *ptr;
489 int rv = 0;
490
491 kaddr = kmap_atomic(page, KM_USER0);
492 ptr = kaddr + bh_offset(bh);
493 if (*ptr == cpu_to_be32(GFS2_MAGIC))
494 rv = 1;
495 kunmap_atomic(page, KM_USER0);
496
497 return rv;
498}
499
500/**
501 * databuf_lo_before_commit - Scan the data buffers, writing as we go
502 *
503 * Here we scan through the lists of buffers and make the assumption
504 * that any buffer thats been pinned is being journaled, and that
505 * any unpinned buffer is an ordered write data buffer and therefore
506 * will be written back rather than journaled.
507 */
508static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
509{
510 LIST_HEAD(started);
511 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
512 struct buffer_head *bh = NULL;
513 unsigned int offset = sizeof(struct gfs2_log_descriptor);
514 struct gfs2_log_descriptor *ld;
515 unsigned int limit;
516 unsigned int total_dbuf = sdp->sd_log_num_databuf;
517 unsigned int total_jdata = sdp->sd_log_num_jdata;
518 unsigned int num, n;
519 __be64 *ptr = NULL;
520
521 offset += 2*sizeof(__be64) - 1;
522 offset &= ~(2*sizeof(__be64) - 1);
523 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
524
525 /*
526 * Start writing ordered buffers, write journaled buffers
527 * into the log along with a header
528 */
529 gfs2_log_lock(sdp);
530 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
531 bd_le.le_list);
532 while(total_dbuf) {
533 num = total_jdata;
534 if (num > limit)
535 num = limit;
536 n = 0;
537 list_for_each_entry_safe_continue(bd1, bdt,
538 &sdp->sd_log_le_databuf,
539 bd_le.le_list) {
540 /* An ordered write buffer */
541 if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
542 list_move(&bd1->bd_le.le_list, &started);
543 if (bd1 == bd2) {
544 bd2 = NULL;
545 bd2 = list_prepare_entry(bd2,
546 &sdp->sd_log_le_databuf,
547 bd_le.le_list);
548 }
549 total_dbuf--;
550 if (bd1->bd_bh) {
551 get_bh(bd1->bd_bh);
552 if (buffer_dirty(bd1->bd_bh)) {
553 gfs2_log_unlock(sdp);
554 wait_on_buffer(bd1->bd_bh);
555 ll_rw_block(WRITE, 1,
556 &bd1->bd_bh);
557 gfs2_log_lock(sdp);
558 }
559 brelse(bd1->bd_bh);
560 continue;
561 }
562 continue;
563 } else if (bd1->bd_bh) { /* A journaled buffer */
564 int magic;
565 gfs2_log_unlock(sdp);
566 if (!bh) {
567 bh = gfs2_log_get_buf(sdp);
568 sdp->sd_log_num_hdrs++;
569 ld = (struct gfs2_log_descriptor *)
570 bh->b_data;
571 ptr = (__be64 *)(bh->b_data + offset);
572 ld->ld_header.mh_magic =
573 cpu_to_be32(GFS2_MAGIC);
574 ld->ld_header.mh_type =
575 cpu_to_be32(GFS2_METATYPE_LD);
576 ld->ld_header.mh_format =
577 cpu_to_be32(GFS2_FORMAT_LD);
578 ld->ld_type =
579 cpu_to_be32(GFS2_LOG_DESC_JDATA);
580 ld->ld_length = cpu_to_be32(num + 1);
581 ld->ld_data1 = cpu_to_be32(num);
582 ld->ld_data2 = cpu_to_be32(0);
583 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
584 }
585 magic = gfs2_check_magic(bd1->bd_bh);
586 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
587 *ptr++ = cpu_to_be64((__u64)magic);
588 clear_buffer_escaped(bd1->bd_bh);
589 if (unlikely(magic != 0))
590 set_buffer_escaped(bd1->bd_bh);
591 gfs2_log_lock(sdp);
592 if (n++ > num)
593 break;
594 } else if (!bd1->bd_bh) {
595 total_dbuf--;
596 sdp->sd_log_num_databuf--;
597 list_del_init(&bd1->bd_le.le_list);
598 if (bd1 == bd2) {
599 bd2 = NULL;
600 bd2 = list_prepare_entry(bd2,
601 &sdp->sd_log_le_databuf,
602 bd_le.le_list);
603 }
604 kmem_cache_free(gfs2_bufdata_cachep, bd1);
605 }
606 }
607 gfs2_log_unlock(sdp);
608 if (bh) {
609 set_buffer_dirty(bh);
610 ll_rw_block(WRITE, 1, &bh);
611 bh = NULL;
612 }
613 n = 0;
614 gfs2_log_lock(sdp);
615 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
616 bd_le.le_list) {
617 if (!bd2->bd_bh)
618 continue;
619 /* copy buffer if it needs escaping */
620 gfs2_log_unlock(sdp);
621 if (unlikely(buffer_escaped(bd2->bd_bh))) {
622 void *kaddr;
623 struct page *page = bd2->bd_bh->b_page;
624 bh = gfs2_log_get_buf(sdp);
625 kaddr = kmap_atomic(page, KM_USER0);
626 memcpy(bh->b_data,
627 kaddr + bh_offset(bd2->bd_bh),
628 sdp->sd_sb.sb_bsize);
629 kunmap_atomic(page, KM_USER0);
630 *(__be32 *)bh->b_data = 0;
631 } else {
632 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
633 }
634 set_buffer_dirty(bh);
635 ll_rw_block(WRITE, 1, &bh);
636 gfs2_log_lock(sdp);
637 if (++n >= num)
638 break;
639 }
640 bh = NULL;
641 total_dbuf -= num;
642 total_jdata -= num;
643 }
644 gfs2_log_unlock(sdp);
645
646 /* Wait on all ordered buffers */
647 while (!list_empty(&started)) {
648 gfs2_log_lock(sdp);
649 bd1 = list_entry(started.next, struct gfs2_bufdata,
650 bd_le.le_list);
651 list_del_init(&bd1->bd_le.le_list);
652 sdp->sd_log_num_databuf--;
653 bh = bd1->bd_bh;
654 if (bh) {
655 bh->b_private = NULL;
656 get_bh(bh);
657 gfs2_log_unlock(sdp);
658 wait_on_buffer(bh);
659 brelse(bh);
660 } else
661 gfs2_log_unlock(sdp);
662
663 kmem_cache_free(gfs2_bufdata_cachep, bd1);
664 }
665
666 /* We've removed all the ordered write bufs here, so only jdata left */
667 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
668}
669
670static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
671 struct gfs2_log_descriptor *ld,
672 __be64 *ptr, int pass)
673{
674 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
675 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
676 struct gfs2_glock *gl = ip->i_gl;
677 unsigned int blks = be32_to_cpu(ld->ld_data1);
678 struct buffer_head *bh_log, *bh_ip;
679 u64 blkno;
680 u64 esc;
681 int error = 0;
682
683 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
684 return 0;
685
686 gfs2_replay_incr_blk(sdp, &start);
687 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
688 blkno = be64_to_cpu(*ptr++);
689 esc = be64_to_cpu(*ptr++);
690
691 sdp->sd_found_blocks++;
692
693 if (gfs2_revoke_check(sdp, blkno, start))
694 continue;
695
696 error = gfs2_replay_read_block(jd, start, &bh_log);
697 if (error)
698 return error;
699
700 bh_ip = gfs2_meta_new(gl, blkno);
701 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
702
703 /* Unescape */
704 if (esc) {
705 __be32 *eptr = (__be32 *)bh_ip->b_data;
706 *eptr = cpu_to_be32(GFS2_MAGIC);
707 }
708 mark_buffer_dirty(bh_ip);
709
710 brelse(bh_log);
711 brelse(bh_ip);
712 if (error)
713 break;
714
715 sdp->sd_replayed_blocks++;
716 }
717
718 return error;
719}
720
721/* FIXME: sort out accounting for log blocks etc. */
722
723static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
724{
725 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
726 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
727
728 if (error) {
729 gfs2_meta_sync(ip->i_gl);
730 return;
731 }
732 if (pass != 1)
733 return;
734
735 /* data sync? */
736 gfs2_meta_sync(ip->i_gl);
737
738 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
739 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
740}
741
742static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
743{
744 struct list_head *head = &sdp->sd_log_le_databuf;
745 struct gfs2_bufdata *bd;
746
747 while (!list_empty(head)) {
748 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
749 list_del_init(&bd->bd_le.le_list);
750 sdp->sd_log_num_databuf--;
751 sdp->sd_log_num_jdata--;
752 gfs2_unpin(sdp, bd->bd_bh, ai);
753 }
754 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
755 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
756}
757
758
759const struct gfs2_log_operations gfs2_glock_lops = {
760 .lo_add = glock_lo_add,
761 .lo_after_commit = glock_lo_after_commit,
762 .lo_name = "glock",
763};
764
765const struct gfs2_log_operations gfs2_buf_lops = {
766 .lo_add = buf_lo_add,
767 .lo_incore_commit = buf_lo_incore_commit,
768 .lo_before_commit = buf_lo_before_commit,
769 .lo_after_commit = buf_lo_after_commit,
770 .lo_before_scan = buf_lo_before_scan,
771 .lo_scan_elements = buf_lo_scan_elements,
772 .lo_after_scan = buf_lo_after_scan,
773 .lo_name = "buf",
774};
775
776const struct gfs2_log_operations gfs2_revoke_lops = {
777 .lo_add = revoke_lo_add,
778 .lo_before_commit = revoke_lo_before_commit,
779 .lo_before_scan = revoke_lo_before_scan,
780 .lo_scan_elements = revoke_lo_scan_elements,
781 .lo_after_scan = revoke_lo_after_scan,
782 .lo_name = "revoke",
783};
784
785const struct gfs2_log_operations gfs2_rg_lops = {
786 .lo_add = rg_lo_add,
787 .lo_after_commit = rg_lo_after_commit,
788 .lo_name = "rg",
789};
790
791const struct gfs2_log_operations gfs2_databuf_lops = {
792 .lo_add = databuf_lo_add,
793 .lo_incore_commit = buf_lo_incore_commit,
794 .lo_before_commit = databuf_lo_before_commit,
795 .lo_after_commit = databuf_lo_after_commit,
796 .lo_scan_elements = databuf_lo_scan_elements,
797 .lo_after_scan = databuf_lo_after_scan,
798 .lo_name = "databuf",
799};
800
801const struct gfs2_log_operations *gfs2_log_ops[] = {
802 &gfs2_glock_lops,
803 &gfs2_buf_lops,
804 &gfs2_revoke_lops,
805 &gfs2_rg_lops,
806 &gfs2_databuf_lops,
807 NULL,
808};
809
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
new file mode 100644
index 000000000000..5839c05ae6be
--- /dev/null
+++ b/fs/gfs2/lops.h
@@ -0,0 +1,99 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __LOPS_DOT_H__
11#define __LOPS_DOT_H__
12
13#include <linux/list.h>
14#include "incore.h"
15
16extern const struct gfs2_log_operations gfs2_glock_lops;
17extern const struct gfs2_log_operations gfs2_buf_lops;
18extern const struct gfs2_log_operations gfs2_revoke_lops;
19extern const struct gfs2_log_operations gfs2_rg_lops;
20extern const struct gfs2_log_operations gfs2_databuf_lops;
21
22extern const struct gfs2_log_operations *gfs2_log_ops[];
23
24static inline void lops_init_le(struct gfs2_log_element *le,
25 const struct gfs2_log_operations *lops)
26{
27 INIT_LIST_HEAD(&le->le_list);
28 le->le_ops = lops;
29}
30
31static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
32{
33 if (le->le_ops->lo_add)
34 le->le_ops->lo_add(sdp, le);
35}
36
37static inline void lops_incore_commit(struct gfs2_sbd *sdp,
38 struct gfs2_trans *tr)
39{
40 int x;
41 for (x = 0; gfs2_log_ops[x]; x++)
42 if (gfs2_log_ops[x]->lo_incore_commit)
43 gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
44}
45
46static inline void lops_before_commit(struct gfs2_sbd *sdp)
47{
48 int x;
49 for (x = 0; gfs2_log_ops[x]; x++)
50 if (gfs2_log_ops[x]->lo_before_commit)
51 gfs2_log_ops[x]->lo_before_commit(sdp);
52}
53
54static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
55{
56 int x;
57 for (x = 0; gfs2_log_ops[x]; x++)
58 if (gfs2_log_ops[x]->lo_after_commit)
59 gfs2_log_ops[x]->lo_after_commit(sdp, ai);
60}
61
62static inline void lops_before_scan(struct gfs2_jdesc *jd,
63 struct gfs2_log_header *head,
64 unsigned int pass)
65{
66 int x;
67 for (x = 0; gfs2_log_ops[x]; x++)
68 if (gfs2_log_ops[x]->lo_before_scan)
69 gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
70}
71
72static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
73 struct gfs2_log_descriptor *ld,
74 __be64 *ptr,
75 unsigned int pass)
76{
77 int x, error;
78 for (x = 0; gfs2_log_ops[x]; x++)
79 if (gfs2_log_ops[x]->lo_scan_elements) {
80 error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
81 ld, ptr, pass);
82 if (error)
83 return error;
84 }
85
86 return 0;
87}
88
89static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
90 unsigned int pass)
91{
92 int x;
93 for (x = 0; gfs2_log_ops[x]; x++)
94 if (gfs2_log_ops[x]->lo_before_scan)
95 gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
96}
97
98#endif /* __LOPS_DOT_H__ */
99
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
new file mode 100644
index 000000000000..21508a13bb78
--- /dev/null
+++ b/fs/gfs2/main.c
@@ -0,0 +1,150 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19#include <asm/atomic.h>
20
21#include "gfs2.h"
22#include "incore.h"
23#include "ops_fstype.h"
24#include "sys.h"
25#include "util.h"
26#include "glock.h"
27
28static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
29{
30 struct gfs2_inode *ip = foo;
31 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
32 SLAB_CTOR_CONSTRUCTOR) {
33 inode_init_once(&ip->i_inode);
34 spin_lock_init(&ip->i_spin);
35 init_rwsem(&ip->i_rw_mutex);
36 memset(ip->i_cache, 0, sizeof(ip->i_cache));
37 }
38}
39
40static void gfs2_init_glock_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
41{
42 struct gfs2_glock *gl = foo;
43 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
44 SLAB_CTOR_CONSTRUCTOR) {
45 INIT_HLIST_NODE(&gl->gl_list);
46 spin_lock_init(&gl->gl_spin);
47 INIT_LIST_HEAD(&gl->gl_holders);
48 INIT_LIST_HEAD(&gl->gl_waiters1);
49 INIT_LIST_HEAD(&gl->gl_waiters2);
50 INIT_LIST_HEAD(&gl->gl_waiters3);
51 gl->gl_lvb = NULL;
52 atomic_set(&gl->gl_lvb_count, 0);
53 INIT_LIST_HEAD(&gl->gl_reclaim);
54 INIT_LIST_HEAD(&gl->gl_ail_list);
55 atomic_set(&gl->gl_ail_count, 0);
56 }
57}
58
59/**
60 * init_gfs2_fs - Register GFS2 as a filesystem
61 *
62 * Returns: 0 on success, error code on failure
63 */
64
65static int __init init_gfs2_fs(void)
66{
67 int error;
68
69 error = gfs2_sys_init();
70 if (error)
71 return error;
72
73 error = gfs2_glock_init();
74 if (error)
75 goto fail;
76
77 error = -ENOMEM;
78 gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
79 sizeof(struct gfs2_glock),
80 0, 0,
81 gfs2_init_glock_once, NULL);
82 if (!gfs2_glock_cachep)
83 goto fail;
84
85 gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
86 sizeof(struct gfs2_inode),
87 0, (SLAB_RECLAIM_ACCOUNT|
88 SLAB_PANIC|SLAB_MEM_SPREAD),
89 gfs2_init_inode_once, NULL);
90 if (!gfs2_inode_cachep)
91 goto fail;
92
93 gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
94 sizeof(struct gfs2_bufdata),
95 0, 0, NULL, NULL);
96 if (!gfs2_bufdata_cachep)
97 goto fail;
98
99 error = register_filesystem(&gfs2_fs_type);
100 if (error)
101 goto fail;
102
103 error = register_filesystem(&gfs2meta_fs_type);
104 if (error)
105 goto fail_unregister;
106
107 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
108
109 return 0;
110
111fail_unregister:
112 unregister_filesystem(&gfs2_fs_type);
113fail:
114 if (gfs2_bufdata_cachep)
115 kmem_cache_destroy(gfs2_bufdata_cachep);
116
117 if (gfs2_inode_cachep)
118 kmem_cache_destroy(gfs2_inode_cachep);
119
120 if (gfs2_glock_cachep)
121 kmem_cache_destroy(gfs2_glock_cachep);
122
123 gfs2_sys_uninit();
124 return error;
125}
126
127/**
128 * exit_gfs2_fs - Unregister the file system
129 *
130 */
131
132static void __exit exit_gfs2_fs(void)
133{
134 unregister_filesystem(&gfs2_fs_type);
135 unregister_filesystem(&gfs2meta_fs_type);
136
137 kmem_cache_destroy(gfs2_bufdata_cachep);
138 kmem_cache_destroy(gfs2_inode_cachep);
139 kmem_cache_destroy(gfs2_glock_cachep);
140
141 gfs2_sys_uninit();
142}
143
144MODULE_DESCRIPTION("Global File System");
145MODULE_AUTHOR("Red Hat, Inc.");
146MODULE_LICENSE("GPL");
147
148module_init(init_gfs2_fs);
149module_exit(exit_gfs2_fs);
150
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
new file mode 100644
index 000000000000..3912d6a4b1e6
--- /dev/null
+++ b/fs/gfs2/meta_io.c
@@ -0,0 +1,590 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/writeback.h>
18#include <linux/swap.h>
19#include <linux/delay.h>
20#include <linux/bio.h>
21#include <linux/gfs2_ondisk.h>
22#include <linux/lm_interface.h>
23
24#include "gfs2.h"
25#include "incore.h"
26#include "glock.h"
27#include "glops.h"
28#include "inode.h"
29#include "log.h"
30#include "lops.h"
31#include "meta_io.h"
32#include "rgrp.h"
33#include "trans.h"
34#include "util.h"
35#include "ops_address.h"
36
37static int aspace_get_block(struct inode *inode, sector_t lblock,
38 struct buffer_head *bh_result, int create)
39{
40 gfs2_assert_warn(inode->i_sb->s_fs_info, 0);
41 return -EOPNOTSUPP;
42}
43
44static int gfs2_aspace_writepage(struct page *page,
45 struct writeback_control *wbc)
46{
47 return block_write_full_page(page, aspace_get_block, wbc);
48}
49
50static const struct address_space_operations aspace_aops = {
51 .writepage = gfs2_aspace_writepage,
52 .releasepage = gfs2_releasepage,
53};
54
55/**
56 * gfs2_aspace_get - Create and initialize a struct inode structure
57 * @sdp: the filesystem the aspace is in
58 *
59 * Right now a struct inode is just a struct inode. Maybe Linux
60 * will supply a more lightweight address space construct (that works)
61 * in the future.
62 *
63 * Make sure pages/buffers in this aspace aren't in high memory.
64 *
65 * Returns: the aspace
66 */
67
68struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
69{
70 struct inode *aspace;
71
72 aspace = new_inode(sdp->sd_vfs);
73 if (aspace) {
74 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
75 aspace->i_mapping->a_ops = &aspace_aops;
76 aspace->i_size = ~0ULL;
77 aspace->i_private = NULL;
78 insert_inode_hash(aspace);
79 }
80 return aspace;
81}
82
83void gfs2_aspace_put(struct inode *aspace)
84{
85 remove_inode_hash(aspace);
86 iput(aspace);
87}
88
89/**
90 * gfs2_meta_inval - Invalidate all buffers associated with a glock
91 * @gl: the glock
92 *
93 */
94
95void gfs2_meta_inval(struct gfs2_glock *gl)
96{
97 struct gfs2_sbd *sdp = gl->gl_sbd;
98 struct inode *aspace = gl->gl_aspace;
99 struct address_space *mapping = gl->gl_aspace->i_mapping;
100
101 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
102
103 atomic_inc(&aspace->i_writecount);
104 truncate_inode_pages(mapping, 0);
105 atomic_dec(&aspace->i_writecount);
106
107 gfs2_assert_withdraw(sdp, !mapping->nrpages);
108}
109
110/**
111 * gfs2_meta_sync - Sync all buffers associated with a glock
112 * @gl: The glock
113 *
114 */
115
116void gfs2_meta_sync(struct gfs2_glock *gl)
117{
118 struct address_space *mapping = gl->gl_aspace->i_mapping;
119 int error;
120
121 filemap_fdatawrite(mapping);
122 error = filemap_fdatawait(mapping);
123
124 if (error)
125 gfs2_io_error(gl->gl_sbd);
126}
127
128/**
129 * getbuf - Get a buffer with a given address space
130 * @sdp: the filesystem
131 * @aspace: the address space
132 * @blkno: the block number (filesystem scope)
133 * @create: 1 if the buffer should be created
134 *
135 * Returns: the buffer
136 */
137
138static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
139 u64 blkno, int create)
140{
141 struct page *page;
142 struct buffer_head *bh;
143 unsigned int shift;
144 unsigned long index;
145 unsigned int bufnum;
146
147 shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
148 index = blkno >> shift; /* convert block to page */
149 bufnum = blkno - (index << shift); /* block buf index within page */
150
151 if (create) {
152 for (;;) {
153 page = grab_cache_page(aspace->i_mapping, index);
154 if (page)
155 break;
156 yield();
157 }
158 } else {
159 page = find_lock_page(aspace->i_mapping, index);
160 if (!page)
161 return NULL;
162 }
163
164 if (!page_has_buffers(page))
165 create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
166
167 /* Locate header for our buffer within our page */
168 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
169 /* Do nothing */;
170 get_bh(bh);
171
172 if (!buffer_mapped(bh))
173 map_bh(bh, sdp->sd_vfs, blkno);
174
175 unlock_page(page);
176 mark_page_accessed(page);
177 page_cache_release(page);
178
179 return bh;
180}
181
182static void meta_prep_new(struct buffer_head *bh)
183{
184 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
185
186 lock_buffer(bh);
187 clear_buffer_dirty(bh);
188 set_buffer_uptodate(bh);
189 unlock_buffer(bh);
190
191 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
192}
193
194/**
195 * gfs2_meta_new - Get a block
196 * @gl: The glock associated with this block
197 * @blkno: The block number
198 *
199 * Returns: The buffer
200 */
201
202struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
203{
204 struct buffer_head *bh;
205 bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
206 meta_prep_new(bh);
207 return bh;
208}
209
210/**
211 * gfs2_meta_read - Read a block from disk
212 * @gl: The glock covering the block
213 * @blkno: The block number
214 * @flags: flags
215 * @bhp: the place where the buffer is returned (NULL on failure)
216 *
217 * Returns: errno
218 */
219
220int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
221 struct buffer_head **bhp)
222{
223 *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
224 if (!buffer_uptodate(*bhp))
225 ll_rw_block(READ_META, 1, bhp);
226 if (flags & DIO_WAIT) {
227 int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
228 if (error) {
229 brelse(*bhp);
230 return error;
231 }
232 }
233
234 return 0;
235}
236
237/**
238 * gfs2_meta_wait - Reread a block from disk
239 * @sdp: the filesystem
240 * @bh: The block to wait for
241 *
242 * Returns: errno
243 */
244
245int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
246{
247 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
248 return -EIO;
249
250 wait_on_buffer(bh);
251
252 if (!buffer_uptodate(bh)) {
253 struct gfs2_trans *tr = current->journal_info;
254 if (tr && tr->tr_touched)
255 gfs2_io_error_bh(sdp, bh);
256 return -EIO;
257 }
258 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
259 return -EIO;
260
261 return 0;
262}
263
264/**
265 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
266 * @gl: the glock the buffer belongs to
267 * @bh: The buffer to be attached to
268 * @meta: Flag to indicate whether its metadata or not
269 */
270
271void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
272 int meta)
273{
274 struct gfs2_bufdata *bd;
275
276 if (meta)
277 lock_page(bh->b_page);
278
279 if (bh->b_private) {
280 if (meta)
281 unlock_page(bh->b_page);
282 return;
283 }
284
285 bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
286 memset(bd, 0, sizeof(struct gfs2_bufdata));
287 bd->bd_bh = bh;
288 bd->bd_gl = gl;
289
290 INIT_LIST_HEAD(&bd->bd_list_tr);
291 if (meta)
292 lops_init_le(&bd->bd_le, &gfs2_buf_lops);
293 else
294 lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
295 bh->b_private = bd;
296
297 if (meta)
298 unlock_page(bh->b_page);
299}
300
301/**
302 * gfs2_pin - Pin a buffer in memory
303 * @sdp: the filesystem the buffer belongs to
304 * @bh: The buffer to be pinned
305 *
306 */
307
308void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
309{
310 struct gfs2_bufdata *bd = bh->b_private;
311
312 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
313
314 if (test_set_buffer_pinned(bh))
315 gfs2_assert_withdraw(sdp, 0);
316
317 wait_on_buffer(bh);
318
319 /* If this buffer is in the AIL and it has already been written
320 to in-place disk block, remove it from the AIL. */
321
322 gfs2_log_lock(sdp);
323 if (bd->bd_ail && !buffer_in_io(bh))
324 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
325 gfs2_log_unlock(sdp);
326
327 clear_buffer_dirty(bh);
328 wait_on_buffer(bh);
329
330 if (!buffer_uptodate(bh))
331 gfs2_io_error_bh(sdp, bh);
332
333 get_bh(bh);
334}
335
336/**
337 * gfs2_unpin - Unpin a buffer
338 * @sdp: the filesystem the buffer belongs to
339 * @bh: The buffer to unpin
340 * @ai:
341 *
342 */
343
344void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
345 struct gfs2_ail *ai)
346{
347 struct gfs2_bufdata *bd = bh->b_private;
348
349 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
350
351 if (!buffer_pinned(bh))
352 gfs2_assert_withdraw(sdp, 0);
353
354 mark_buffer_dirty(bh);
355 clear_buffer_pinned(bh);
356
357 gfs2_log_lock(sdp);
358 if (bd->bd_ail) {
359 list_del(&bd->bd_ail_st_list);
360 brelse(bh);
361 } else {
362 struct gfs2_glock *gl = bd->bd_gl;
363 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
364 atomic_inc(&gl->gl_ail_count);
365 }
366 bd->bd_ail = ai;
367 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
368 gfs2_log_unlock(sdp);
369}
370
371/**
372 * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
373 * @ip: the inode who owns the buffers
374 * @bstart: the first buffer in the run
375 * @blen: the number of buffers in the run
376 *
377 */
378
379void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
380{
381 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
382 struct inode *aspace = ip->i_gl->gl_aspace;
383 struct buffer_head *bh;
384
385 while (blen) {
386 bh = getbuf(sdp, aspace, bstart, NO_CREATE);
387 if (bh) {
388 struct gfs2_bufdata *bd = bh->b_private;
389
390 if (test_clear_buffer_pinned(bh)) {
391 struct gfs2_trans *tr = current->journal_info;
392 gfs2_log_lock(sdp);
393 list_del_init(&bd->bd_le.le_list);
394 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
395 sdp->sd_log_num_buf--;
396 gfs2_log_unlock(sdp);
397 tr->tr_num_buf_rm++;
398 brelse(bh);
399 }
400 if (bd) {
401 gfs2_log_lock(sdp);
402 if (bd->bd_ail) {
403 u64 blkno = bh->b_blocknr;
404 bd->bd_ail = NULL;
405 list_del(&bd->bd_ail_st_list);
406 list_del(&bd->bd_ail_gl_list);
407 atomic_dec(&bd->bd_gl->gl_ail_count);
408 brelse(bh);
409 gfs2_log_unlock(sdp);
410 gfs2_trans_add_revoke(sdp, blkno);
411 } else
412 gfs2_log_unlock(sdp);
413 }
414
415 lock_buffer(bh);
416 clear_buffer_dirty(bh);
417 clear_buffer_uptodate(bh);
418 unlock_buffer(bh);
419
420 brelse(bh);
421 }
422
423 bstart++;
424 blen--;
425 }
426}
427
428/**
429 * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
430 * @ip: The GFS2 inode
431 *
432 * This releases buffers that are in the most-recently-used array of
433 * blocks used for indirect block addressing for this inode.
434 */
435
436void gfs2_meta_cache_flush(struct gfs2_inode *ip)
437{
438 struct buffer_head **bh_slot;
439 unsigned int x;
440
441 spin_lock(&ip->i_spin);
442
443 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
444 bh_slot = &ip->i_cache[x];
445 if (!*bh_slot)
446 break;
447 brelse(*bh_slot);
448 *bh_slot = NULL;
449 }
450
451 spin_unlock(&ip->i_spin);
452}
453
454/**
455 * gfs2_meta_indirect_buffer - Get a metadata buffer
456 * @ip: The GFS2 inode
457 * @height: The level of this buf in the metadata (indir addr) tree (if any)
458 * @num: The block number (device relative) of the buffer
459 * @new: Non-zero if we may create a new buffer
460 * @bhp: the buffer is returned here
461 *
462 * Try to use the gfs2_inode's MRU metadata tree cache.
463 *
464 * Returns: errno
465 */
466
467int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
468 int new, struct buffer_head **bhp)
469{
470 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
471 struct gfs2_glock *gl = ip->i_gl;
472 struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
473 int in_cache = 0;
474
475 spin_lock(&ip->i_spin);
476 if (*bh_slot && (*bh_slot)->b_blocknr == num) {
477 bh = *bh_slot;
478 get_bh(bh);
479 in_cache = 1;
480 }
481 spin_unlock(&ip->i_spin);
482
483 if (!bh)
484 bh = getbuf(gl->gl_sbd, gl->gl_aspace, num, CREATE);
485
486 if (!bh)
487 return -ENOBUFS;
488
489 if (new) {
490 if (gfs2_assert_warn(sdp, height))
491 goto err;
492 meta_prep_new(bh);
493 gfs2_trans_add_bh(ip->i_gl, bh, 1);
494 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
495 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
496 } else {
497 u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
498 if (!buffer_uptodate(bh)) {
499 ll_rw_block(READ_META, 1, &bh);
500 if (gfs2_meta_wait(sdp, bh))
501 goto err;
502 }
503 if (gfs2_metatype_check(sdp, bh, mtype))
504 goto err;
505 }
506
507 if (!in_cache) {
508 spin_lock(&ip->i_spin);
509 if (*bh_slot)
510 brelse(*bh_slot);
511 *bh_slot = bh;
512 get_bh(bh);
513 spin_unlock(&ip->i_spin);
514 }
515
516 *bhp = bh;
517 return 0;
518err:
519 brelse(bh);
520 return -EIO;
521}
522
523/**
524 * gfs2_meta_ra - start readahead on an extent of a file
525 * @gl: the glock the blocks belong to
526 * @dblock: the starting disk block
527 * @extlen: the number of blocks in the extent
528 *
529 * returns: the first buffer in the extent
530 */
531
532struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
533{
534 struct gfs2_sbd *sdp = gl->gl_sbd;
535 struct inode *aspace = gl->gl_aspace;
536 struct buffer_head *first_bh, *bh;
537 u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
538 sdp->sd_sb.sb_bsize_shift;
539
540 BUG_ON(!extlen);
541
542 if (max_ra < 1)
543 max_ra = 1;
544 if (extlen > max_ra)
545 extlen = max_ra;
546
547 first_bh = getbuf(sdp, aspace, dblock, CREATE);
548
549 if (buffer_uptodate(first_bh))
550 goto out;
551 if (!buffer_locked(first_bh))
552 ll_rw_block(READ_META, 1, &first_bh);
553
554 dblock++;
555 extlen--;
556
557 while (extlen) {
558 bh = getbuf(sdp, aspace, dblock, CREATE);
559
560 if (!buffer_uptodate(bh) && !buffer_locked(bh))
561 ll_rw_block(READA, 1, &bh);
562 brelse(bh);
563 dblock++;
564 extlen--;
565 if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
566 goto out;
567 }
568
569 wait_on_buffer(first_bh);
570out:
571 return first_bh;
572}
573
574/**
575 * gfs2_meta_syncfs - sync all the buffers in a filesystem
576 * @sdp: the filesystem
577 *
578 */
579
580void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
581{
582 gfs2_log_flush(sdp, NULL);
583 for (;;) {
584 gfs2_ail1_start(sdp, DIO_ALL);
585 if (gfs2_ail1_empty(sdp, DIO_ALL))
586 break;
587 msleep(10);
588 }
589}
590
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
new file mode 100644
index 000000000000..3ec939e20dff
--- /dev/null
+++ b/fs/gfs2/meta_io.h
@@ -0,0 +1,78 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __DIO_DOT_H__
11#define __DIO_DOT_H__
12
13#include <linux/buffer_head.h>
14#include <linux/string.h>
15#include "incore.h"
16
17static inline void gfs2_buffer_clear(struct buffer_head *bh)
18{
19 memset(bh->b_data, 0, bh->b_size);
20}
21
22static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
23{
24 BUG_ON(head > bh->b_size);
25 memset(bh->b_data + head, 0, bh->b_size - head);
26}
27
28static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
29 int to_head,
30 struct buffer_head *from_bh,
31 int from_head)
32{
33 BUG_ON(from_head < to_head);
34 memcpy(to_bh->b_data + to_head, from_bh->b_data + from_head,
35 from_bh->b_size - from_head);
36 memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
37 0, from_head - to_head);
38}
39
40struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
41void gfs2_aspace_put(struct inode *aspace);
42
43void gfs2_meta_inval(struct gfs2_glock *gl);
44void gfs2_meta_sync(struct gfs2_glock *gl);
45
46struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
47int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
48 int flags, struct buffer_head **bhp);
49int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
50
51void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
52 int meta);
53void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
54void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
55 struct gfs2_ail *ai);
56
57void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
58
59void gfs2_meta_cache_flush(struct gfs2_inode *ip);
60int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
61 int new, struct buffer_head **bhp);
62
63static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
64 struct buffer_head **bhp)
65{
66 return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
67}
68
69struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
70void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
71
72#define buffer_busy(bh) \
73((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
74#define buffer_in_io(bh) \
75((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
76
77#endif /* __DIO_DOT_H__ */
78
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
new file mode 100644
index 000000000000..ef3092e29607
--- /dev/null
+++ b/fs/gfs2/mount.c
@@ -0,0 +1,214 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "mount.h"
21#include "sys.h"
22#include "util.h"
23
24/**
25 * gfs2_mount_args - Parse mount options
26 * @sdp:
27 * @data:
28 *
29 * Return: errno
30 */
31
32int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
33{
34 struct gfs2_args *args = &sdp->sd_args;
35 char *data = data_arg;
36 char *options, *o, *v;
37 int error = 0;
38
39 if (!remount) {
40 /* If someone preloaded options, use those instead */
41 spin_lock(&gfs2_sys_margs_lock);
42 if (gfs2_sys_margs) {
43 data = gfs2_sys_margs;
44 gfs2_sys_margs = NULL;
45 }
46 spin_unlock(&gfs2_sys_margs_lock);
47
48 /* Set some defaults */
49 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
50 args->ar_quota = GFS2_QUOTA_DEFAULT;
51 args->ar_data = GFS2_DATA_DEFAULT;
52 }
53
54 /* Split the options into tokens with the "," character and
55 process them */
56
57 for (options = data; (o = strsep(&options, ",")); ) {
58 if (!*o)
59 continue;
60
61 v = strchr(o, '=');
62 if (v)
63 *v++ = 0;
64
65 if (!strcmp(o, "lockproto")) {
66 if (!v)
67 goto need_value;
68 if (remount && strcmp(v, args->ar_lockproto))
69 goto cant_remount;
70 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
71 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
72 }
73
74 else if (!strcmp(o, "locktable")) {
75 if (!v)
76 goto need_value;
77 if (remount && strcmp(v, args->ar_locktable))
78 goto cant_remount;
79 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
80 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
81 }
82
83 else if (!strcmp(o, "hostdata")) {
84 if (!v)
85 goto need_value;
86 if (remount && strcmp(v, args->ar_hostdata))
87 goto cant_remount;
88 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
89 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
90 }
91
92 else if (!strcmp(o, "spectator")) {
93 if (remount && !args->ar_spectator)
94 goto cant_remount;
95 args->ar_spectator = 1;
96 sdp->sd_vfs->s_flags |= MS_RDONLY;
97 }
98
99 else if (!strcmp(o, "ignore_local_fs")) {
100 if (remount && !args->ar_ignore_local_fs)
101 goto cant_remount;
102 args->ar_ignore_local_fs = 1;
103 }
104
105 else if (!strcmp(o, "localflocks")) {
106 if (remount && !args->ar_localflocks)
107 goto cant_remount;
108 args->ar_localflocks = 1;
109 }
110
111 else if (!strcmp(o, "localcaching")) {
112 if (remount && !args->ar_localcaching)
113 goto cant_remount;
114 args->ar_localcaching = 1;
115 }
116
117 else if (!strcmp(o, "debug"))
118 args->ar_debug = 1;
119
120 else if (!strcmp(o, "nodebug"))
121 args->ar_debug = 0;
122
123 else if (!strcmp(o, "upgrade")) {
124 if (remount && !args->ar_upgrade)
125 goto cant_remount;
126 args->ar_upgrade = 1;
127 }
128
129 else if (!strcmp(o, "num_glockd")) {
130 unsigned int x;
131 if (!v)
132 goto need_value;
133 sscanf(v, "%u", &x);
134 if (remount && x != args->ar_num_glockd)
135 goto cant_remount;
136 if (!x || x > GFS2_GLOCKD_MAX) {
137 fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
138 GFS2_GLOCKD_MAX, x);
139 error = -EINVAL;
140 break;
141 }
142 args->ar_num_glockd = x;
143 }
144
145 else if (!strcmp(o, "acl")) {
146 args->ar_posix_acl = 1;
147 sdp->sd_vfs->s_flags |= MS_POSIXACL;
148 }
149
150 else if (!strcmp(o, "noacl")) {
151 args->ar_posix_acl = 0;
152 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
153 }
154
155 else if (!strcmp(o, "quota")) {
156 if (!v)
157 goto need_value;
158 if (!strcmp(v, "off"))
159 args->ar_quota = GFS2_QUOTA_OFF;
160 else if (!strcmp(v, "account"))
161 args->ar_quota = GFS2_QUOTA_ACCOUNT;
162 else if (!strcmp(v, "on"))
163 args->ar_quota = GFS2_QUOTA_ON;
164 else {
165 fs_info(sdp, "invalid value for quota\n");
166 error = -EINVAL;
167 break;
168 }
169 }
170
171 else if (!strcmp(o, "suiddir"))
172 args->ar_suiddir = 1;
173
174 else if (!strcmp(o, "nosuiddir"))
175 args->ar_suiddir = 0;
176
177 else if (!strcmp(o, "data")) {
178 if (!v)
179 goto need_value;
180 if (!strcmp(v, "writeback"))
181 args->ar_data = GFS2_DATA_WRITEBACK;
182 else if (!strcmp(v, "ordered"))
183 args->ar_data = GFS2_DATA_ORDERED;
184 else {
185 fs_info(sdp, "invalid value for data\n");
186 error = -EINVAL;
187 break;
188 }
189 }
190
191 else {
192 fs_info(sdp, "unknown option: %s\n", o);
193 error = -EINVAL;
194 break;
195 }
196 }
197
198 if (error)
199 fs_info(sdp, "invalid mount option(s)\n");
200
201 if (data != data_arg)
202 kfree(data);
203
204 return error;
205
206need_value:
207 fs_info(sdp, "need value for option %s\n", o);
208 return -EINVAL;
209
210cant_remount:
211 fs_info(sdp, "can't remount with option %s\n", o);
212 return -EINVAL;
213}
214
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
new file mode 100644
index 000000000000..401288acfdf3
--- /dev/null
+++ b/fs/gfs2/mount.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __MOUNT_DOT_H__
11#define __MOUNT_DOT_H__
12
13struct gfs2_sbd;
14
15int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
16
17#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
new file mode 100644
index 000000000000..1025960b0e6e
--- /dev/null
+++ b/fs/gfs2/ondisk.c
@@ -0,0 +1,308 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15
16#include "gfs2.h"
17#include <linux/gfs2_ondisk.h>
18
19#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
20 struct->member);
21
22/*
23 * gfs2_xxx_in - read in an xxx struct
24 * first arg: the cpu-order structure
25 * buf: the disk-order buffer
26 *
27 * gfs2_xxx_out - write out an xxx struct
28 * first arg: the cpu-order structure
29 * buf: the disk-order buffer
30 *
31 * gfs2_xxx_print - print out an xxx struct
32 * first arg: the cpu-order structure
33 */
34
35void gfs2_inum_in(struct gfs2_inum *no, const void *buf)
36{
37 const struct gfs2_inum *str = buf;
38
39 no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
40 no->no_addr = be64_to_cpu(str->no_addr);
41}
42
43void gfs2_inum_out(const struct gfs2_inum *no, void *buf)
44{
45 struct gfs2_inum *str = buf;
46
47 str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
48 str->no_addr = cpu_to_be64(no->no_addr);
49}
50
51static void gfs2_inum_print(const struct gfs2_inum *no)
52{
53 printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
54 printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr);
55}
56
57static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf)
58{
59 const struct gfs2_meta_header *str = buf;
60
61 mh->mh_magic = be32_to_cpu(str->mh_magic);
62 mh->mh_type = be32_to_cpu(str->mh_type);
63 mh->mh_format = be32_to_cpu(str->mh_format);
64}
65
66static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf)
67{
68 struct gfs2_meta_header *str = buf;
69
70 str->mh_magic = cpu_to_be32(mh->mh_magic);
71 str->mh_type = cpu_to_be32(mh->mh_type);
72 str->mh_format = cpu_to_be32(mh->mh_format);
73}
74
75static void gfs2_meta_header_print(const struct gfs2_meta_header *mh)
76{
77 pv(mh, mh_magic, "0x%.8X");
78 pv(mh, mh_type, "%u");
79 pv(mh, mh_format, "%u");
80}
81
82void gfs2_sb_in(struct gfs2_sb *sb, const void *buf)
83{
84 const struct gfs2_sb *str = buf;
85
86 gfs2_meta_header_in(&sb->sb_header, buf);
87
88 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
89 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
90 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
91 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
92
93 gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
94 gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
95
96 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
97 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
98}
99
100void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf)
101{
102 const struct gfs2_rindex *str = buf;
103
104 ri->ri_addr = be64_to_cpu(str->ri_addr);
105 ri->ri_length = be32_to_cpu(str->ri_length);
106 ri->ri_data0 = be64_to_cpu(str->ri_data0);
107 ri->ri_data = be32_to_cpu(str->ri_data);
108 ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
109
110}
111
112void gfs2_rindex_print(const struct gfs2_rindex *ri)
113{
114 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
115 pv(ri, ri_length, "%u");
116
117 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
118 pv(ri, ri_data, "%u");
119
120 pv(ri, ri_bitbytes, "%u");
121}
122
123void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf)
124{
125 const struct gfs2_rgrp *str = buf;
126
127 gfs2_meta_header_in(&rg->rg_header, buf);
128 rg->rg_flags = be32_to_cpu(str->rg_flags);
129 rg->rg_free = be32_to_cpu(str->rg_free);
130 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
131 rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
132}
133
134void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf)
135{
136 struct gfs2_rgrp *str = buf;
137
138 gfs2_meta_header_out(&rg->rg_header, buf);
139 str->rg_flags = cpu_to_be32(rg->rg_flags);
140 str->rg_free = cpu_to_be32(rg->rg_free);
141 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
142 str->__pad = cpu_to_be32(0);
143 str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
144 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
145}
146
147void gfs2_quota_in(struct gfs2_quota *qu, const void *buf)
148{
149 const struct gfs2_quota *str = buf;
150
151 qu->qu_limit = be64_to_cpu(str->qu_limit);
152 qu->qu_warn = be64_to_cpu(str->qu_warn);
153 qu->qu_value = be64_to_cpu(str->qu_value);
154}
155
156void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf)
157{
158 const struct gfs2_dinode *str = buf;
159
160 gfs2_meta_header_in(&di->di_header, buf);
161 gfs2_inum_in(&di->di_num, &str->di_num);
162
163 di->di_mode = be32_to_cpu(str->di_mode);
164 di->di_uid = be32_to_cpu(str->di_uid);
165 di->di_gid = be32_to_cpu(str->di_gid);
166 di->di_nlink = be32_to_cpu(str->di_nlink);
167 di->di_size = be64_to_cpu(str->di_size);
168 di->di_blocks = be64_to_cpu(str->di_blocks);
169 di->di_atime = be64_to_cpu(str->di_atime);
170 di->di_mtime = be64_to_cpu(str->di_mtime);
171 di->di_ctime = be64_to_cpu(str->di_ctime);
172 di->di_major = be32_to_cpu(str->di_major);
173 di->di_minor = be32_to_cpu(str->di_minor);
174
175 di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
176 di->di_goal_data = be64_to_cpu(str->di_goal_data);
177 di->di_generation = be64_to_cpu(str->di_generation);
178
179 di->di_flags = be32_to_cpu(str->di_flags);
180 di->di_payload_format = be32_to_cpu(str->di_payload_format);
181 di->di_height = be16_to_cpu(str->di_height);
182
183 di->di_depth = be16_to_cpu(str->di_depth);
184 di->di_entries = be32_to_cpu(str->di_entries);
185
186 di->di_eattr = be64_to_cpu(str->di_eattr);
187
188}
189
190void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf)
191{
192 struct gfs2_dinode *str = buf;
193
194 gfs2_meta_header_out(&di->di_header, buf);
195 gfs2_inum_out(&di->di_num, (char *)&str->di_num);
196
197 str->di_mode = cpu_to_be32(di->di_mode);
198 str->di_uid = cpu_to_be32(di->di_uid);
199 str->di_gid = cpu_to_be32(di->di_gid);
200 str->di_nlink = cpu_to_be32(di->di_nlink);
201 str->di_size = cpu_to_be64(di->di_size);
202 str->di_blocks = cpu_to_be64(di->di_blocks);
203 str->di_atime = cpu_to_be64(di->di_atime);
204 str->di_mtime = cpu_to_be64(di->di_mtime);
205 str->di_ctime = cpu_to_be64(di->di_ctime);
206 str->di_major = cpu_to_be32(di->di_major);
207 str->di_minor = cpu_to_be32(di->di_minor);
208
209 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
210 str->di_goal_data = cpu_to_be64(di->di_goal_data);
211 str->di_generation = cpu_to_be64(di->di_generation);
212
213 str->di_flags = cpu_to_be32(di->di_flags);
214 str->di_payload_format = cpu_to_be32(di->di_payload_format);
215 str->di_height = cpu_to_be16(di->di_height);
216
217 str->di_depth = cpu_to_be16(di->di_depth);
218 str->di_entries = cpu_to_be32(di->di_entries);
219
220 str->di_eattr = cpu_to_be64(di->di_eattr);
221
222}
223
224void gfs2_dinode_print(const struct gfs2_dinode *di)
225{
226 gfs2_meta_header_print(&di->di_header);
227 gfs2_inum_print(&di->di_num);
228
229 pv(di, di_mode, "0%o");
230 pv(di, di_uid, "%u");
231 pv(di, di_gid, "%u");
232 pv(di, di_nlink, "%u");
233 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
234 printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks);
235 printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime);
236 printk(KERN_INFO " di_mtime = %lld\n", (long long)di->di_mtime);
237 printk(KERN_INFO " di_ctime = %lld\n", (long long)di->di_ctime);
238 pv(di, di_major, "%u");
239 pv(di, di_minor, "%u");
240
241 printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
242 printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
243
244 pv(di, di_flags, "0x%.8X");
245 pv(di, di_payload_format, "%u");
246 pv(di, di_height, "%u");
247
248 pv(di, di_depth, "%u");
249 pv(di, di_entries, "%u");
250
251 printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr);
252}
253
254void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf)
255{
256 const struct gfs2_log_header *str = buf;
257
258 gfs2_meta_header_in(&lh->lh_header, buf);
259 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
260 lh->lh_flags = be32_to_cpu(str->lh_flags);
261 lh->lh_tail = be32_to_cpu(str->lh_tail);
262 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
263 lh->lh_hash = be32_to_cpu(str->lh_hash);
264}
265
266void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf)
267{
268 const struct gfs2_inum_range *str = buf;
269
270 ir->ir_start = be64_to_cpu(str->ir_start);
271 ir->ir_length = be64_to_cpu(str->ir_length);
272}
273
274void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf)
275{
276 struct gfs2_inum_range *str = buf;
277
278 str->ir_start = cpu_to_be64(ir->ir_start);
279 str->ir_length = cpu_to_be64(ir->ir_length);
280}
281
282void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf)
283{
284 const struct gfs2_statfs_change *str = buf;
285
286 sc->sc_total = be64_to_cpu(str->sc_total);
287 sc->sc_free = be64_to_cpu(str->sc_free);
288 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
289}
290
291void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf)
292{
293 struct gfs2_statfs_change *str = buf;
294
295 str->sc_total = cpu_to_be64(sc->sc_total);
296 str->sc_free = cpu_to_be64(sc->sc_free);
297 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
298}
299
300void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf)
301{
302 const struct gfs2_quota_change *str = buf;
303
304 qc->qc_change = be64_to_cpu(str->qc_change);
305 qc->qc_flags = be32_to_cpu(str->qc_flags);
306 qc->qc_id = be32_to_cpu(str->qc_id);
307}
308
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
new file mode 100644
index 000000000000..4fb743f4e4a4
--- /dev/null
+++ b/fs/gfs2/ops_address.c
@@ -0,0 +1,790 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/pagevec.h>
17#include <linux/mpage.h>
18#include <linux/fs.h>
19#include <linux/gfs2_ondisk.h>
20#include <linux/lm_interface.h>
21
22#include "gfs2.h"
23#include "incore.h"
24#include "bmap.h"
25#include "glock.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "ops_address.h"
30#include "quota.h"
31#include "trans.h"
32#include "rgrp.h"
33#include "ops_file.h"
34#include "util.h"
35#include "glops.h"
36
37
38static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
39 unsigned int from, unsigned int to)
40{
41 struct buffer_head *head = page_buffers(page);
42 unsigned int bsize = head->b_size;
43 struct buffer_head *bh;
44 unsigned int start, end;
45
46 for (bh = head, start = 0; bh != head || !start;
47 bh = bh->b_this_page, start = end) {
48 end = start + bsize;
49 if (end <= from || start >= to)
50 continue;
51 gfs2_trans_add_bh(ip->i_gl, bh, 0);
52 }
53}
54
55/**
56 * gfs2_get_block - Fills in a buffer head with details about a block
57 * @inode: The inode
58 * @lblock: The block number to look up
59 * @bh_result: The buffer head to return the result in
60 * @create: Non-zero if we may add block to the file
61 *
62 * Returns: errno
63 */
64
65int gfs2_get_block(struct inode *inode, sector_t lblock,
66 struct buffer_head *bh_result, int create)
67{
68 return gfs2_block_map(inode, lblock, create, bh_result, 32);
69}
70
71/**
72 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
73 * @inode: The inode
74 * @lblock: The block number to look up
75 * @bh_result: The buffer head to return the result in
76 * @create: Non-zero if we may add block to the file
77 *
78 * Returns: errno
79 */
80
81static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
82 struct buffer_head *bh_result, int create)
83{
84 int error;
85
86 error = gfs2_block_map(inode, lblock, 0, bh_result, 1);
87 if (error)
88 return error;
89 if (bh_result->b_blocknr == 0)
90 return -EIO;
91 return 0;
92}
93
94static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
95 struct buffer_head *bh_result, int create)
96{
97 return gfs2_block_map(inode, lblock, 0, bh_result, 32);
98}
99
100/**
101 * gfs2_writepage - Write complete page
102 * @page: Page to write
103 *
104 * Returns: errno
105 *
106 * Some of this is copied from block_write_full_page() although we still
107 * call it to do most of the work.
108 */
109
110static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
111{
112 struct inode *inode = page->mapping->host;
113 struct gfs2_inode *ip = GFS2_I(inode);
114 struct gfs2_sbd *sdp = GFS2_SB(inode);
115 loff_t i_size = i_size_read(inode);
116 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
117 unsigned offset;
118 int error;
119 int done_trans = 0;
120
121 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
122 unlock_page(page);
123 return -EIO;
124 }
125 if (current->journal_info)
126 goto out_ignore;
127
128 /* Is the page fully outside i_size? (truncate in progress) */
129 offset = i_size & (PAGE_CACHE_SIZE-1);
130 if (page->index > end_index || (page->index == end_index && !offset)) {
131 page->mapping->a_ops->invalidatepage(page, 0);
132 unlock_page(page);
133 return 0; /* don't care */
134 }
135
136 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
137 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
138 if (error)
139 goto out_ignore;
140 if (!page_has_buffers(page)) {
141 create_empty_buffers(page, inode->i_sb->s_blocksize,
142 (1 << BH_Dirty)|(1 << BH_Uptodate));
143 }
144 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
145 done_trans = 1;
146 }
147 error = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
148 if (done_trans)
149 gfs2_trans_end(sdp);
150 gfs2_meta_cache_flush(ip);
151 return error;
152
153out_ignore:
154 redirty_page_for_writepage(wbc, page);
155 unlock_page(page);
156 return 0;
157}
158
159static int zero_readpage(struct page *page)
160{
161 void *kaddr;
162
163 kaddr = kmap_atomic(page, KM_USER0);
164 memset(kaddr, 0, PAGE_CACHE_SIZE);
165 kunmap_atomic(page, KM_USER0);
166
167 SetPageUptodate(page);
168
169 return 0;
170}
171
172/**
173 * stuffed_readpage - Fill in a Linux page with stuffed file data
174 * @ip: the inode
175 * @page: the page
176 *
177 * Returns: errno
178 */
179
180static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
181{
182 struct buffer_head *dibh;
183 void *kaddr;
184 int error;
185
186 /* Only the first page of a stuffed file might contain data */
187 if (unlikely(page->index))
188 return zero_readpage(page);
189
190 error = gfs2_meta_inode_buffer(ip, &dibh);
191 if (error)
192 return error;
193
194 kaddr = kmap_atomic(page, KM_USER0);
195 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
196 ip->i_di.di_size);
197 memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
198 kunmap_atomic(page, KM_USER0);
199
200 brelse(dibh);
201
202 SetPageUptodate(page);
203
204 return 0;
205}
206
207
208/**
209 * gfs2_readpage - readpage with locking
210 * @file: The file to read a page for. N.B. This may be NULL if we are
211 * reading an internal file.
212 * @page: The page to read
213 *
214 * Returns: errno
215 */
216
217static int gfs2_readpage(struct file *file, struct page *page)
218{
219 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
220 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
221 struct gfs2_file *gf = NULL;
222 struct gfs2_holder gh;
223 int error;
224 int do_unlock = 0;
225
226 if (likely(file != &gfs2_internal_file_sentinel)) {
227 if (file) {
228 gf = file->private_data;
229 if (test_bit(GFF_EXLOCK, &gf->f_flags))
230 /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
231 goto skip_lock;
232 }
233 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
234 do_unlock = 1;
235 error = gfs2_glock_nq_m_atime(1, &gh);
236 if (unlikely(error))
237 goto out_unlock;
238 }
239
240skip_lock:
241 if (gfs2_is_stuffed(ip)) {
242 error = stuffed_readpage(ip, page);
243 unlock_page(page);
244 } else
245 error = mpage_readpage(page, gfs2_get_block);
246
247 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
248 error = -EIO;
249
250 if (do_unlock) {
251 gfs2_glock_dq_m(1, &gh);
252 gfs2_holder_uninit(&gh);
253 }
254out:
255 return error;
256out_unlock:
257 unlock_page(page);
258 if (do_unlock)
259 gfs2_holder_uninit(&gh);
260 goto out;
261}
262
263/**
264 * gfs2_readpages - Read a bunch of pages at once
265 *
266 * Some notes:
267 * 1. This is only for readahead, so we can simply ignore any things
268 * which are slightly inconvenient (such as locking conflicts between
269 * the page lock and the glock) and return having done no I/O. Its
270 * obviously not something we'd want to do on too regular a basis.
271 * Any I/O we ignore at this time will be done via readpage later.
272 * 2. We have to handle stuffed files here too.
273 * 3. mpage_readpages() does most of the heavy lifting in the common case.
274 * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
275 * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
276 * well as read-ahead.
277 */
278static int gfs2_readpages(struct file *file, struct address_space *mapping,
279 struct list_head *pages, unsigned nr_pages)
280{
281 struct inode *inode = mapping->host;
282 struct gfs2_inode *ip = GFS2_I(inode);
283 struct gfs2_sbd *sdp = GFS2_SB(inode);
284 struct gfs2_holder gh;
285 unsigned page_idx;
286 int ret;
287 int do_unlock = 0;
288
289 if (likely(file != &gfs2_internal_file_sentinel)) {
290 if (file) {
291 struct gfs2_file *gf = file->private_data;
292 if (test_bit(GFF_EXLOCK, &gf->f_flags))
293 goto skip_lock;
294 }
295 gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
296 LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
297 do_unlock = 1;
298 ret = gfs2_glock_nq_m_atime(1, &gh);
299 if (ret == GLR_TRYFAILED)
300 goto out_noerror;
301 if (unlikely(ret))
302 goto out_unlock;
303 }
304skip_lock:
305 if (gfs2_is_stuffed(ip)) {
306 struct pagevec lru_pvec;
307 pagevec_init(&lru_pvec, 0);
308 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
309 struct page *page = list_entry(pages->prev, struct page, lru);
310 prefetchw(&page->flags);
311 list_del(&page->lru);
312 if (!add_to_page_cache(page, mapping,
313 page->index, GFP_KERNEL)) {
314 ret = stuffed_readpage(ip, page);
315 unlock_page(page);
316 if (!pagevec_add(&lru_pvec, page))
317 __pagevec_lru_add(&lru_pvec);
318 } else {
319 page_cache_release(page);
320 }
321 }
322 pagevec_lru_add(&lru_pvec);
323 ret = 0;
324 } else {
325 /* What we really want to do .... */
326 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
327 }
328
329 if (do_unlock) {
330 gfs2_glock_dq_m(1, &gh);
331 gfs2_holder_uninit(&gh);
332 }
333out:
334 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
335 ret = -EIO;
336 return ret;
337out_noerror:
338 ret = 0;
339out_unlock:
340 /* unlock all pages, we can't do any I/O right now */
341 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
342 struct page *page = list_entry(pages->prev, struct page, lru);
343 list_del(&page->lru);
344 unlock_page(page);
345 page_cache_release(page);
346 }
347 if (do_unlock)
348 gfs2_holder_uninit(&gh);
349 goto out;
350}
351
352/**
353 * gfs2_prepare_write - Prepare to write a page to a file
354 * @file: The file to write to
355 * @page: The page which is to be prepared for writing
356 * @from: From (byte range within page)
357 * @to: To (byte range within page)
358 *
359 * Returns: errno
360 */
361
362static int gfs2_prepare_write(struct file *file, struct page *page,
363 unsigned from, unsigned to)
364{
365 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
366 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
367 unsigned int data_blocks, ind_blocks, rblocks;
368 int alloc_required;
369 int error = 0;
370 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
371 loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
372 struct gfs2_alloc *al;
373
374 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
375 error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
376 if (error)
377 goto out_uninit;
378
379 gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
380
381 error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
382 if (error)
383 goto out_unlock;
384
385
386 if (alloc_required) {
387 al = gfs2_alloc_get(ip);
388
389 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
390 if (error)
391 goto out_alloc_put;
392
393 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
394 if (error)
395 goto out_qunlock;
396
397 al->al_requested = data_blocks + ind_blocks;
398 error = gfs2_inplace_reserve(ip);
399 if (error)
400 goto out_qunlock;
401 }
402
403 rblocks = RES_DINODE + ind_blocks;
404 if (gfs2_is_jdata(ip))
405 rblocks += data_blocks ? data_blocks : 1;
406 if (ind_blocks || data_blocks)
407 rblocks += RES_STATFS + RES_QUOTA;
408
409 error = gfs2_trans_begin(sdp, rblocks, 0);
410 if (error)
411 goto out;
412
413 if (gfs2_is_stuffed(ip)) {
414 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
415 error = gfs2_unstuff_dinode(ip, page);
416 if (error == 0)
417 goto prepare_write;
418 } else if (!PageUptodate(page))
419 error = stuffed_readpage(ip, page);
420 goto out;
421 }
422
423prepare_write:
424 error = block_prepare_write(page, from, to, gfs2_get_block);
425
426out:
427 if (error) {
428 gfs2_trans_end(sdp);
429 if (alloc_required) {
430 gfs2_inplace_release(ip);
431out_qunlock:
432 gfs2_quota_unlock(ip);
433out_alloc_put:
434 gfs2_alloc_put(ip);
435 }
436out_unlock:
437 gfs2_glock_dq_m(1, &ip->i_gh);
438out_uninit:
439 gfs2_holder_uninit(&ip->i_gh);
440 }
441
442 return error;
443}
444
445/**
446 * gfs2_commit_write - Commit write to a file
447 * @file: The file to write to
448 * @page: The page containing the data
449 * @from: From (byte range within page)
450 * @to: To (byte range within page)
451 *
452 * Returns: errno
453 */
454
455static int gfs2_commit_write(struct file *file, struct page *page,
456 unsigned from, unsigned to)
457{
458 struct inode *inode = page->mapping->host;
459 struct gfs2_inode *ip = GFS2_I(inode);
460 struct gfs2_sbd *sdp = GFS2_SB(inode);
461 int error = -EOPNOTSUPP;
462 struct buffer_head *dibh;
463 struct gfs2_alloc *al = &ip->i_alloc;
464 struct gfs2_dinode *di;
465
466 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
467 goto fail_nounlock;
468
469 error = gfs2_meta_inode_buffer(ip, &dibh);
470 if (error)
471 goto fail_endtrans;
472
473 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
474 di = (struct gfs2_dinode *)dibh->b_data;
475
476 if (gfs2_is_stuffed(ip)) {
477 u64 file_size;
478 void *kaddr;
479
480 file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to;
481
482 kaddr = kmap_atomic(page, KM_USER0);
483 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
484 kaddr + from, to - from);
485 kunmap_atomic(page, KM_USER0);
486
487 SetPageUptodate(page);
488
489 if (inode->i_size < file_size)
490 i_size_write(inode, file_size);
491 } else {
492 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
493 gfs2_is_jdata(ip))
494 gfs2_page_add_databufs(ip, page, from, to);
495 error = generic_commit_write(file, page, from, to);
496 if (error)
497 goto fail;
498 }
499
500 if (ip->i_di.di_size < inode->i_size) {
501 ip->i_di.di_size = inode->i_size;
502 di->di_size = cpu_to_be64(inode->i_size);
503 }
504
505 di->di_mode = cpu_to_be32(inode->i_mode);
506 di->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
507 di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
508 di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
509
510 brelse(dibh);
511 gfs2_trans_end(sdp);
512 if (al->al_requested) {
513 gfs2_inplace_release(ip);
514 gfs2_quota_unlock(ip);
515 gfs2_alloc_put(ip);
516 }
517 gfs2_glock_dq_m(1, &ip->i_gh);
518 gfs2_holder_uninit(&ip->i_gh);
519 return 0;
520
521fail:
522 brelse(dibh);
523fail_endtrans:
524 gfs2_trans_end(sdp);
525 if (al->al_requested) {
526 gfs2_inplace_release(ip);
527 gfs2_quota_unlock(ip);
528 gfs2_alloc_put(ip);
529 }
530 gfs2_glock_dq_m(1, &ip->i_gh);
531 gfs2_holder_uninit(&ip->i_gh);
532fail_nounlock:
533 ClearPageUptodate(page);
534 return error;
535}
536
537/**
538 * gfs2_bmap - Block map function
539 * @mapping: Address space info
540 * @lblock: The block to map
541 *
542 * Returns: The disk address for the block or 0 on hole or error
543 */
544
545static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
546{
547 struct gfs2_inode *ip = GFS2_I(mapping->host);
548 struct gfs2_holder i_gh;
549 sector_t dblock = 0;
550 int error;
551
552 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
553 if (error)
554 return 0;
555
556 if (!gfs2_is_stuffed(ip))
557 dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
558
559 gfs2_glock_dq_uninit(&i_gh);
560
561 return dblock;
562}
563
564static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
565{
566 struct gfs2_bufdata *bd;
567
568 gfs2_log_lock(sdp);
569 bd = bh->b_private;
570 if (bd) {
571 bd->bd_bh = NULL;
572 bh->b_private = NULL;
573 }
574 gfs2_log_unlock(sdp);
575
576 lock_buffer(bh);
577 clear_buffer_dirty(bh);
578 bh->b_bdev = NULL;
579 clear_buffer_mapped(bh);
580 clear_buffer_req(bh);
581 clear_buffer_new(bh);
582 clear_buffer_delay(bh);
583 unlock_buffer(bh);
584}
585
586static void gfs2_invalidatepage(struct page *page, unsigned long offset)
587{
588 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
589 struct buffer_head *head, *bh, *next;
590 unsigned int curr_off = 0;
591
592 BUG_ON(!PageLocked(page));
593 if (!page_has_buffers(page))
594 return;
595
596 bh = head = page_buffers(page);
597 do {
598 unsigned int next_off = curr_off + bh->b_size;
599 next = bh->b_this_page;
600
601 if (offset <= curr_off)
602 discard_buffer(sdp, bh);
603
604 curr_off = next_off;
605 bh = next;
606 } while (bh != head);
607
608 if (!offset)
609 try_to_release_page(page, 0);
610
611 return;
612}
613
614static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
615 const struct iovec *iov, loff_t offset,
616 unsigned long nr_segs)
617{
618 struct file *file = iocb->ki_filp;
619 struct inode *inode = file->f_mapping->host;
620 struct gfs2_inode *ip = GFS2_I(inode);
621 struct gfs2_holder gh;
622 int rv;
623
624 if (rw == READ)
625 mutex_lock(&inode->i_mutex);
626 /*
627 * Shared lock, even if its a write, since we do no allocation
628 * on this path. All we need change is atime.
629 */
630 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
631 rv = gfs2_glock_nq_m_atime(1, &gh);
632 if (rv)
633 goto out;
634
635 if (offset > i_size_read(inode))
636 goto out;
637
638 /*
639 * Should we return an error here? I can't see that O_DIRECT for
640 * a journaled file makes any sense. For now we'll silently fall
641 * back to buffered I/O, likewise we do the same for stuffed
642 * files since they are (a) small and (b) unaligned.
643 */
644 if (gfs2_is_jdata(ip))
645 goto out;
646
647 if (gfs2_is_stuffed(ip))
648 goto out;
649
650 rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
651 inode->i_sb->s_bdev,
652 iov, offset, nr_segs,
653 gfs2_get_block_direct, NULL);
654out:
655 gfs2_glock_dq_m(1, &gh);
656 gfs2_holder_uninit(&gh);
657 if (rw == READ)
658 mutex_unlock(&inode->i_mutex);
659
660 return rv;
661}
662
663/**
664 * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
665 * @bh: the buffer we're stuck on
666 *
667 */
668
669static void stuck_releasepage(struct buffer_head *bh)
670{
671 struct inode *inode = bh->b_page->mapping->host;
672 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
673 struct gfs2_bufdata *bd = bh->b_private;
674 struct gfs2_glock *gl;
675static unsigned limit = 0;
676
677 if (limit > 3)
678 return;
679 limit++;
680
681 fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
682 fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
683 (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
684 fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
685 fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
686
687 if (!bd)
688 return;
689
690 gl = bd->bd_gl;
691
692 fs_warn(sdp, "gl = (%u, %llu)\n",
693 gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
694
695 fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
696 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
697 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
698
699 if (gl->gl_ops == &gfs2_inode_glops) {
700 struct gfs2_inode *ip = gl->gl_object;
701 unsigned int x;
702
703 if (!ip)
704 return;
705
706 fs_warn(sdp, "ip = %llu %llu\n",
707 (unsigned long long)ip->i_num.no_formal_ino,
708 (unsigned long long)ip->i_num.no_addr);
709
710 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
711 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
712 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
713 }
714}
715
716/**
717 * gfs2_releasepage - free the metadata associated with a page
718 * @page: the page that's being released
719 * @gfp_mask: passed from Linux VFS, ignored by us
720 *
721 * Call try_to_free_buffers() if the buffers in this page can be
722 * released.
723 *
724 * Returns: 0
725 */
726
727int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
728{
729 struct inode *aspace = page->mapping->host;
730 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
731 struct buffer_head *bh, *head;
732 struct gfs2_bufdata *bd;
733 unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ;
734
735 if (!page_has_buffers(page))
736 goto out;
737
738 head = bh = page_buffers(page);
739 do {
740 while (atomic_read(&bh->b_count)) {
741 if (!atomic_read(&aspace->i_writecount))
742 return 0;
743
744 if (time_after_eq(jiffies, t)) {
745 stuck_releasepage(bh);
746 /* should we withdraw here? */
747 return 0;
748 }
749
750 yield();
751 }
752
753 gfs2_assert_warn(sdp, !buffer_pinned(bh));
754 gfs2_assert_warn(sdp, !buffer_dirty(bh));
755
756 gfs2_log_lock(sdp);
757 bd = bh->b_private;
758 if (bd) {
759 gfs2_assert_warn(sdp, bd->bd_bh == bh);
760 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
761 gfs2_assert_warn(sdp, !bd->bd_ail);
762 bd->bd_bh = NULL;
763 if (!list_empty(&bd->bd_le.le_list))
764 bd = NULL;
765 bh->b_private = NULL;
766 }
767 gfs2_log_unlock(sdp);
768 if (bd)
769 kmem_cache_free(gfs2_bufdata_cachep, bd);
770
771 bh = bh->b_this_page;
772 } while (bh != head);
773
774out:
775 return try_to_free_buffers(page);
776}
777
778const struct address_space_operations gfs2_file_aops = {
779 .writepage = gfs2_writepage,
780 .readpage = gfs2_readpage,
781 .readpages = gfs2_readpages,
782 .sync_page = block_sync_page,
783 .prepare_write = gfs2_prepare_write,
784 .commit_write = gfs2_commit_write,
785 .bmap = gfs2_bmap,
786 .invalidatepage = gfs2_invalidatepage,
787 .releasepage = gfs2_releasepage,
788 .direct_IO = gfs2_direct_IO,
789};
790
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
new file mode 100644
index 000000000000..35aaee4aa7e1
--- /dev/null
+++ b/fs/gfs2/ops_address.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_ADDRESS_DOT_H__
11#define __OPS_ADDRESS_DOT_H__
12
13#include <linux/fs.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16
17extern const struct address_space_operations gfs2_file_aops;
18extern int gfs2_get_block(struct inode *inode, sector_t lblock,
19 struct buffer_head *bh_result, int create);
20extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
21
22#endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
new file mode 100644
index 000000000000..00041b1b8025
--- /dev/null
+++ b/fs/gfs2/ops_dentry.c
@@ -0,0 +1,119 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/smp_lock.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/crc32.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "dir.h"
23#include "glock.h"
24#include "ops_dentry.h"
25#include "util.h"
26
27/**
28 * gfs2_drevalidate - Check directory lookup consistency
29 * @dentry: the mapping to check
30 * @nd:
31 *
32 * Check to make sure the lookup necessary to arrive at this inode from its
33 * parent is still good.
34 *
35 * Returns: 1 if the dentry is ok, 0 if it isn't
36 */
37
38static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
39{
40 struct dentry *parent = dget_parent(dentry);
41 struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode);
42 struct gfs2_inode *dip = GFS2_I(parent->d_inode);
43 struct inode *inode = dentry->d_inode;
44 struct gfs2_holder d_gh;
45 struct gfs2_inode *ip;
46 struct gfs2_inum inum;
47 unsigned int type;
48 int error;
49
50 if (inode && is_bad_inode(inode))
51 goto invalid;
52
53 if (sdp->sd_args.ar_localcaching)
54 goto valid;
55
56 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
57 if (error)
58 goto fail;
59
60 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
61 switch (error) {
62 case 0:
63 if (!inode)
64 goto invalid_gunlock;
65 break;
66 case -ENOENT:
67 if (!inode)
68 goto valid_gunlock;
69 goto invalid_gunlock;
70 default:
71 goto fail_gunlock;
72 }
73
74 ip = GFS2_I(inode);
75
76 if (!gfs2_inum_equal(&ip->i_num, &inum))
77 goto invalid_gunlock;
78
79 if (IF2DT(ip->i_di.di_mode) != type) {
80 gfs2_consist_inode(dip);
81 goto fail_gunlock;
82 }
83
84valid_gunlock:
85 gfs2_glock_dq_uninit(&d_gh);
86valid:
87 dput(parent);
88 return 1;
89
90invalid_gunlock:
91 gfs2_glock_dq_uninit(&d_gh);
92invalid:
93 if (inode && S_ISDIR(inode->i_mode)) {
94 if (have_submounts(dentry))
95 goto valid;
96 shrink_dcache_parent(dentry);
97 }
98 d_drop(dentry);
99 dput(parent);
100 return 0;
101
102fail_gunlock:
103 gfs2_glock_dq_uninit(&d_gh);
104fail:
105 dput(parent);
106 return 0;
107}
108
109static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
110{
111 str->hash = gfs2_disk_hash(str->name, str->len);
112 return 0;
113}
114
115struct dentry_operations gfs2_dops = {
116 .d_revalidate = gfs2_drevalidate,
117 .d_hash = gfs2_dhash,
118};
119
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h
new file mode 100644
index 000000000000..5caa3db4d3f5
--- /dev/null
+++ b/fs/gfs2/ops_dentry.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_DENTRY_DOT_H__
11#define __OPS_DENTRY_DOT_H__
12
13#include <linux/dcache.h>
14
15extern struct dentry_operations gfs2_dops;
16
17#endif /* __OPS_DENTRY_DOT_H__ */
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
new file mode 100644
index 000000000000..86127d93bd35
--- /dev/null
+++ b/fs/gfs2/ops_export.c
@@ -0,0 +1,298 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "dir.h"
22#include "glock.h"
23#include "glops.h"
24#include "inode.h"
25#include "ops_export.h"
26#include "rgrp.h"
27#include "util.h"
28
29static struct dentry *gfs2_decode_fh(struct super_block *sb,
30 __u32 *fh,
31 int fh_len,
32 int fh_type,
33 int (*acceptable)(void *context,
34 struct dentry *dentry),
35 void *context)
36{
37 struct gfs2_fh_obj fh_obj;
38 struct gfs2_inum *this, parent;
39
40 if (fh_type != fh_len)
41 return NULL;
42
43 this = &fh_obj.this;
44 fh_obj.imode = DT_UNKNOWN;
45 memset(&parent, 0, sizeof(struct gfs2_inum));
46
47 switch (fh_type) {
48 case GFS2_LARGE_FH_SIZE:
49 parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
50 parent.no_formal_ino |= be32_to_cpu(fh[5]);
51 parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
52 parent.no_addr |= be32_to_cpu(fh[7]);
53 fh_obj.imode = be32_to_cpu(fh[8]);
54 case GFS2_SMALL_FH_SIZE:
55 this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
56 this->no_formal_ino |= be32_to_cpu(fh[1]);
57 this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
58 this->no_addr |= be32_to_cpu(fh[3]);
59 break;
60 default:
61 return NULL;
62 }
63
64 return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
65 acceptable, context);
66}
67
68static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
69 int connectable)
70{
71 struct inode *inode = dentry->d_inode;
72 struct super_block *sb = inode->i_sb;
73 struct gfs2_inode *ip = GFS2_I(inode);
74
75 if (*len < GFS2_SMALL_FH_SIZE ||
76 (connectable && *len < GFS2_LARGE_FH_SIZE))
77 return 255;
78
79 fh[0] = ip->i_num.no_formal_ino >> 32;
80 fh[0] = cpu_to_be32(fh[0]);
81 fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
82 fh[1] = cpu_to_be32(fh[1]);
83 fh[2] = ip->i_num.no_addr >> 32;
84 fh[2] = cpu_to_be32(fh[2]);
85 fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
86 fh[3] = cpu_to_be32(fh[3]);
87 *len = GFS2_SMALL_FH_SIZE;
88
89 if (!connectable || inode == sb->s_root->d_inode)
90 return *len;
91
92 spin_lock(&dentry->d_lock);
93 inode = dentry->d_parent->d_inode;
94 ip = GFS2_I(inode);
95 igrab(inode);
96 spin_unlock(&dentry->d_lock);
97
98 fh[4] = ip->i_num.no_formal_ino >> 32;
99 fh[4] = cpu_to_be32(fh[4]);
100 fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
101 fh[5] = cpu_to_be32(fh[5]);
102 fh[6] = ip->i_num.no_addr >> 32;
103 fh[6] = cpu_to_be32(fh[6]);
104 fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
105 fh[7] = cpu_to_be32(fh[7]);
106
107 fh[8] = cpu_to_be32(inode->i_mode);
108 fh[9] = 0; /* pad to double word */
109 *len = GFS2_LARGE_FH_SIZE;
110
111 iput(inode);
112
113 return *len;
114}
115
116struct get_name_filldir {
117 struct gfs2_inum inum;
118 char *name;
119};
120
121static int get_name_filldir(void *opaque, const char *name, unsigned int length,
122 u64 offset, struct gfs2_inum *inum,
123 unsigned int type)
124{
125 struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
126
127 if (!gfs2_inum_equal(inum, &gnfd->inum))
128 return 0;
129
130 memcpy(gnfd->name, name, length);
131 gnfd->name[length] = 0;
132
133 return 1;
134}
135
136static int gfs2_get_name(struct dentry *parent, char *name,
137 struct dentry *child)
138{
139 struct inode *dir = parent->d_inode;
140 struct inode *inode = child->d_inode;
141 struct gfs2_inode *dip, *ip;
142 struct get_name_filldir gnfd;
143 struct gfs2_holder gh;
144 u64 offset = 0;
145 int error;
146
147 if (!dir)
148 return -EINVAL;
149
150 if (!S_ISDIR(dir->i_mode) || !inode)
151 return -EINVAL;
152
153 dip = GFS2_I(dir);
154 ip = GFS2_I(inode);
155
156 *name = 0;
157 gnfd.inum = ip->i_num;
158 gnfd.name = name;
159
160 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
161 if (error)
162 return error;
163
164 error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir);
165
166 gfs2_glock_dq_uninit(&gh);
167
168 if (!error && !*name)
169 error = -ENOENT;
170
171 return error;
172}
173
174static struct dentry *gfs2_get_parent(struct dentry *child)
175{
176 struct qstr dotdot;
177 struct inode *inode;
178 struct dentry *dentry;
179
180 gfs2_str2qstr(&dotdot, "..");
181 inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL);
182
183 if (!inode)
184 return ERR_PTR(-ENOENT);
185 /*
186 * In case of an error, @inode carries the error value, and we
187 * have to return that as a(n invalid) pointer to dentry.
188 */
189 if (IS_ERR(inode))
190 return ERR_PTR(PTR_ERR(inode));
191
192 dentry = d_alloc_anon(inode);
193 if (!dentry) {
194 iput(inode);
195 return ERR_PTR(-ENOMEM);
196 }
197
198 return dentry;
199}
200
201static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
202{
203 struct gfs2_sbd *sdp = sb->s_fs_info;
204 struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
205 struct gfs2_inum *inum = &fh_obj->this;
206 struct gfs2_holder i_gh, ri_gh, rgd_gh;
207 struct gfs2_rgrpd *rgd;
208 struct inode *inode;
209 struct dentry *dentry;
210 int error;
211
212 /* System files? */
213
214 inode = gfs2_ilookup(sb, inum);
215 if (inode) {
216 if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
217 iput(inode);
218 return ERR_PTR(-ESTALE);
219 }
220 goto out_inode;
221 }
222
223 error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
224 LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
225 &i_gh);
226 if (error)
227 return ERR_PTR(error);
228
229 error = gfs2_rindex_hold(sdp, &ri_gh);
230 if (error)
231 goto fail;
232
233 error = -EINVAL;
234 rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
235 if (!rgd)
236 goto fail_rindex;
237
238 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
239 if (error)
240 goto fail_rindex;
241
242 error = -ESTALE;
243 if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
244 goto fail_rgd;
245
246 gfs2_glock_dq_uninit(&rgd_gh);
247 gfs2_glock_dq_uninit(&ri_gh);
248
249 inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
250 if (!inode)
251 goto fail;
252 if (IS_ERR(inode)) {
253 error = PTR_ERR(inode);
254 goto fail;
255 }
256
257 error = gfs2_inode_refresh(GFS2_I(inode));
258 if (error) {
259 iput(inode);
260 goto fail;
261 }
262
263 error = -EIO;
264 if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
265 iput(inode);
266 goto fail;
267 }
268
269 gfs2_glock_dq_uninit(&i_gh);
270
271out_inode:
272 dentry = d_alloc_anon(inode);
273 if (!dentry) {
274 iput(inode);
275 return ERR_PTR(-ENOMEM);
276 }
277
278 return dentry;
279
280fail_rgd:
281 gfs2_glock_dq_uninit(&rgd_gh);
282
283fail_rindex:
284 gfs2_glock_dq_uninit(&ri_gh);
285
286fail:
287 gfs2_glock_dq_uninit(&i_gh);
288 return ERR_PTR(error);
289}
290
291struct export_operations gfs2_export_ops = {
292 .decode_fh = gfs2_decode_fh,
293 .encode_fh = gfs2_encode_fh,
294 .get_name = gfs2_get_name,
295 .get_parent = gfs2_get_parent,
296 .get_dentry = gfs2_get_dentry,
297};
298
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
new file mode 100644
index 000000000000..09aca5046fb1
--- /dev/null
+++ b/fs/gfs2/ops_export.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_EXPORT_DOT_H__
11#define __OPS_EXPORT_DOT_H__
12
13#define GFS2_SMALL_FH_SIZE 4
14#define GFS2_LARGE_FH_SIZE 10
15
16extern struct export_operations gfs2_export_ops;
17struct gfs2_fh_obj {
18 struct gfs2_inum this;
19 __u32 imode;
20};
21
22#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
new file mode 100644
index 000000000000..3064f133bf3c
--- /dev/null
+++ b/fs/gfs2/ops_file.c
@@ -0,0 +1,661 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/uio.h>
17#include <linux/blkdev.h>
18#include <linux/mm.h>
19#include <linux/smp_lock.h>
20#include <linux/fs.h>
21#include <linux/gfs2_ondisk.h>
22#include <linux/ext2_fs.h>
23#include <linux/crc32.h>
24#include <linux/lm_interface.h>
25#include <asm/uaccess.h>
26
27#include "gfs2.h"
28#include "incore.h"
29#include "bmap.h"
30#include "dir.h"
31#include "glock.h"
32#include "glops.h"
33#include "inode.h"
34#include "lm.h"
35#include "log.h"
36#include "meta_io.h"
37#include "ops_file.h"
38#include "ops_vm.h"
39#include "quota.h"
40#include "rgrp.h"
41#include "trans.h"
42#include "util.h"
43#include "eaops.h"
44
45/* For regular, non-NFS */
46struct filldir_reg {
47 struct gfs2_sbd *fdr_sbd;
48 int fdr_prefetch;
49
50 filldir_t fdr_filldir;
51 void *fdr_opaque;
52};
53
54/*
55 * Most fields left uninitialised to catch anybody who tries to
56 * use them. f_flags set to prevent file_accessed() from touching
57 * any other part of this. Its use is purely as a flag so that we
58 * know (in readpage()) whether or not do to locking.
59 */
60struct file gfs2_internal_file_sentinel = {
61 .f_flags = O_NOATIME|O_RDONLY,
62};
63
64static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
65 unsigned long offset, unsigned long size)
66{
67 char *kaddr;
68 unsigned long count = desc->count;
69
70 if (size > count)
71 size = count;
72
73 kaddr = kmap(page);
74 memcpy(desc->arg.buf, kaddr + offset, size);
75 kunmap(page);
76
77 desc->count = count - size;
78 desc->written += size;
79 desc->arg.buf += size;
80 return size;
81}
82
83int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
84 char *buf, loff_t *pos, unsigned size)
85{
86 struct inode *inode = &ip->i_inode;
87 read_descriptor_t desc;
88 desc.written = 0;
89 desc.arg.buf = buf;
90 desc.count = size;
91 desc.error = 0;
92 do_generic_mapping_read(inode->i_mapping, ra_state,
93 &gfs2_internal_file_sentinel, pos, &desc,
94 gfs2_read_actor);
95 return desc.written ? desc.written : desc.error;
96}
97
98/**
99 * gfs2_llseek - seek to a location in a file
100 * @file: the file
101 * @offset: the offset
102 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
103 *
104 * SEEK_END requires the glock for the file because it references the
105 * file's size.
106 *
107 * Returns: The new offset, or errno
108 */
109
110static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
111{
112 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
113 struct gfs2_holder i_gh;
114 loff_t error;
115
116 if (origin == 2) {
117 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
118 &i_gh);
119 if (!error) {
120 error = remote_llseek(file, offset, origin);
121 gfs2_glock_dq_uninit(&i_gh);
122 }
123 } else
124 error = remote_llseek(file, offset, origin);
125
126 return error;
127}
128
129/**
130 * filldir_func - Report a directory entry to the caller of gfs2_dir_read()
131 * @opaque: opaque data used by the function
132 * @name: the name of the directory entry
133 * @length: the length of the name
134 * @offset: the entry's offset in the directory
135 * @inum: the inode number the entry points to
136 * @type: the type of inode the entry points to
137 *
138 * Returns: 0 on success, 1 if buffer full
139 */
140
141static int filldir_func(void *opaque, const char *name, unsigned int length,
142 u64 offset, struct gfs2_inum *inum,
143 unsigned int type)
144{
145 struct filldir_reg *fdr = (struct filldir_reg *)opaque;
146 struct gfs2_sbd *sdp = fdr->fdr_sbd;
147 int error;
148
149 error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
150 inum->no_addr, type);
151 if (error)
152 return 1;
153
154 if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
155 gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops,
156 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
157 gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops,
158 LM_ST_SHARED, LM_FLAG_TRY);
159 }
160
161 return 0;
162}
163
164/**
165 * gfs2_readdir - Read directory entries from a directory
166 * @file: The directory to read from
167 * @dirent: Buffer for dirents
168 * @filldir: Function used to do the copying
169 *
170 * Returns: errno
171 */
172
173static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
174{
175 struct inode *dir = file->f_mapping->host;
176 struct gfs2_inode *dip = GFS2_I(dir);
177 struct filldir_reg fdr;
178 struct gfs2_holder d_gh;
179 u64 offset = file->f_pos;
180 int error;
181
182 fdr.fdr_sbd = GFS2_SB(dir);
183 fdr.fdr_prefetch = 1;
184 fdr.fdr_filldir = filldir;
185 fdr.fdr_opaque = dirent;
186
187 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
188 error = gfs2_glock_nq_atime(&d_gh);
189 if (error) {
190 gfs2_holder_uninit(&d_gh);
191 return error;
192 }
193
194 error = gfs2_dir_read(dir, &offset, &fdr, filldir_func);
195
196 gfs2_glock_dq_uninit(&d_gh);
197
198 file->f_pos = offset;
199
200 return error;
201}
202
203/**
204 * fsflags_cvt
205 * @table: A table of 32 u32 flags
206 * @val: a 32 bit value to convert
207 *
208 * This function can be used to convert between fsflags values and
209 * GFS2's own flags values.
210 *
211 * Returns: the converted flags
212 */
213static u32 fsflags_cvt(const u32 *table, u32 val)
214{
215 u32 res = 0;
216 while(val) {
217 if (val & 1)
218 res |= *table;
219 table++;
220 val >>= 1;
221 }
222 return res;
223}
224
225static const u32 fsflags_to_gfs2[32] = {
226 [3] = GFS2_DIF_SYNC,
227 [4] = GFS2_DIF_IMMUTABLE,
228 [5] = GFS2_DIF_APPENDONLY,
229 [7] = GFS2_DIF_NOATIME,
230 [12] = GFS2_DIF_EXHASH,
231 [14] = GFS2_DIF_JDATA,
232 [20] = GFS2_DIF_DIRECTIO,
233};
234
235static const u32 gfs2_to_fsflags[32] = {
236 [gfs2fl_Sync] = FS_SYNC_FL,
237 [gfs2fl_Immutable] = FS_IMMUTABLE_FL,
238 [gfs2fl_AppendOnly] = FS_APPEND_FL,
239 [gfs2fl_NoAtime] = FS_NOATIME_FL,
240 [gfs2fl_ExHash] = FS_INDEX_FL,
241 [gfs2fl_Jdata] = FS_JOURNAL_DATA_FL,
242 [gfs2fl_Directio] = FS_DIRECTIO_FL,
243 [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL,
244 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
245};
246
247static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
248{
249 struct inode *inode = filp->f_dentry->d_inode;
250 struct gfs2_inode *ip = GFS2_I(inode);
251 struct gfs2_holder gh;
252 int error;
253 u32 fsflags;
254
255 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
256 error = gfs2_glock_nq_m_atime(1, &gh);
257 if (error)
258 return error;
259
260 fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags);
261 if (put_user(fsflags, ptr))
262 error = -EFAULT;
263
264 gfs2_glock_dq_m(1, &gh);
265 gfs2_holder_uninit(&gh);
266 return error;
267}
268
269/* Flags that can be set by user space */
270#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \
271 GFS2_DIF_DIRECTIO| \
272 GFS2_DIF_IMMUTABLE| \
273 GFS2_DIF_APPENDONLY| \
274 GFS2_DIF_NOATIME| \
275 GFS2_DIF_SYNC| \
276 GFS2_DIF_SYSTEM| \
277 GFS2_DIF_INHERIT_DIRECTIO| \
278 GFS2_DIF_INHERIT_JDATA)
279
280/**
281 * gfs2_set_flags - set flags on an inode
282 * @inode: The inode
283 * @flags: The flags to set
284 * @mask: Indicates which flags are valid
285 *
286 */
287static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
288{
289 struct inode *inode = filp->f_dentry->d_inode;
290 struct gfs2_inode *ip = GFS2_I(inode);
291 struct gfs2_sbd *sdp = GFS2_SB(inode);
292 struct buffer_head *bh;
293 struct gfs2_holder gh;
294 int error;
295 u32 new_flags, flags;
296
297 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
298 if (error)
299 return error;
300
301 flags = ip->i_di.di_flags;
302 new_flags = (flags & ~mask) | (reqflags & mask);
303 if ((new_flags ^ flags) == 0)
304 goto out;
305
306 if (S_ISDIR(inode->i_mode)) {
307 if ((new_flags ^ flags) & GFS2_DIF_JDATA)
308 new_flags ^= (GFS2_DIF_JDATA|GFS2_DIF_INHERIT_JDATA);
309 if ((new_flags ^ flags) & GFS2_DIF_DIRECTIO)
310 new_flags ^= (GFS2_DIF_DIRECTIO|GFS2_DIF_INHERIT_DIRECTIO);
311 }
312
313 error = -EINVAL;
314 if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
315 goto out;
316
317 error = -EPERM;
318 if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
319 goto out;
320 if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
321 goto out;
322 if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
323 !capable(CAP_LINUX_IMMUTABLE))
324 goto out;
325 if (!IS_IMMUTABLE(inode)) {
326 error = permission(inode, MAY_WRITE, NULL);
327 if (error)
328 goto out;
329 }
330
331 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
332 if (error)
333 goto out;
334 error = gfs2_meta_inode_buffer(ip, &bh);
335 if (error)
336 goto out_trans_end;
337 gfs2_trans_add_bh(ip->i_gl, bh, 1);
338 ip->i_di.di_flags = new_flags;
339 gfs2_dinode_out(&ip->i_di, bh->b_data);
340 brelse(bh);
341out_trans_end:
342 gfs2_trans_end(sdp);
343out:
344 gfs2_glock_dq_uninit(&gh);
345 return error;
346}
347
348static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
349{
350 u32 fsflags, gfsflags;
351 if (get_user(fsflags, ptr))
352 return -EFAULT;
353 gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
354 return do_gfs2_set_flags(filp, gfsflags, ~0);
355}
356
357static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
358{
359 switch(cmd) {
360 case FS_IOC_GETFLAGS:
361 return gfs2_get_flags(filp, (u32 __user *)arg);
362 case FS_IOC_SETFLAGS:
363 return gfs2_set_flags(filp, (u32 __user *)arg);
364 }
365 return -ENOTTY;
366}
367
368
369/**
370 * gfs2_mmap -
371 * @file: The file to map
372 * @vma: The VMA which described the mapping
373 *
374 * Returns: 0 or error code
375 */
376
377static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
378{
379 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
380 struct gfs2_holder i_gh;
381 int error;
382
383 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
384 error = gfs2_glock_nq_atime(&i_gh);
385 if (error) {
386 gfs2_holder_uninit(&i_gh);
387 return error;
388 }
389
390 /* This is VM_MAYWRITE instead of VM_WRITE because a call
391 to mprotect() can turn on VM_WRITE later. */
392
393 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
394 (VM_MAYSHARE | VM_MAYWRITE))
395 vma->vm_ops = &gfs2_vm_ops_sharewrite;
396 else
397 vma->vm_ops = &gfs2_vm_ops_private;
398
399 gfs2_glock_dq_uninit(&i_gh);
400
401 return error;
402}
403
404/**
405 * gfs2_open - open a file
406 * @inode: the inode to open
407 * @file: the struct file for this opening
408 *
409 * Returns: errno
410 */
411
412static int gfs2_open(struct inode *inode, struct file *file)
413{
414 struct gfs2_inode *ip = GFS2_I(inode);
415 struct gfs2_holder i_gh;
416 struct gfs2_file *fp;
417 int error;
418
419 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
420 if (!fp)
421 return -ENOMEM;
422
423 mutex_init(&fp->f_fl_mutex);
424
425 gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
426 file->private_data = fp;
427
428 if (S_ISREG(ip->i_di.di_mode)) {
429 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
430 &i_gh);
431 if (error)
432 goto fail;
433
434 if (!(file->f_flags & O_LARGEFILE) &&
435 ip->i_di.di_size > MAX_NON_LFS) {
436 error = -EFBIG;
437 goto fail_gunlock;
438 }
439
440 /* Listen to the Direct I/O flag */
441
442 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
443 file->f_flags |= O_DIRECT;
444
445 gfs2_glock_dq_uninit(&i_gh);
446 }
447
448 return 0;
449
450fail_gunlock:
451 gfs2_glock_dq_uninit(&i_gh);
452fail:
453 file->private_data = NULL;
454 kfree(fp);
455 return error;
456}
457
458/**
459 * gfs2_close - called to close a struct file
460 * @inode: the inode the struct file belongs to
461 * @file: the struct file being closed
462 *
463 * Returns: errno
464 */
465
466static int gfs2_close(struct inode *inode, struct file *file)
467{
468 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
469 struct gfs2_file *fp;
470
471 fp = file->private_data;
472 file->private_data = NULL;
473
474 if (gfs2_assert_warn(sdp, fp))
475 return -EIO;
476
477 kfree(fp);
478
479 return 0;
480}
481
482/**
483 * gfs2_fsync - sync the dirty data for a file (across the cluster)
484 * @file: the file that points to the dentry (we ignore this)
485 * @dentry: the dentry that points to the inode to sync
486 *
487 * Returns: errno
488 */
489
490static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
491{
492 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
493
494 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
495
496 return 0;
497}
498
499/**
500 * gfs2_lock - acquire/release a posix lock on a file
501 * @file: the file pointer
502 * @cmd: either modify or retrieve lock state, possibly wait
503 * @fl: type and range of lock
504 *
505 * Returns: errno
506 */
507
508static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
509{
510 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
511 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
512 struct lm_lockname name =
513 { .ln_number = ip->i_num.no_addr,
514 .ln_type = LM_TYPE_PLOCK };
515
516 if (!(fl->fl_flags & FL_POSIX))
517 return -ENOLCK;
518 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
519 return -ENOLCK;
520
521 if (sdp->sd_args.ar_localflocks) {
522 if (IS_GETLK(cmd)) {
523 struct file_lock tmp;
524 int ret;
525 ret = posix_test_lock(file, fl, &tmp);
526 fl->fl_type = F_UNLCK;
527 if (ret)
528 memcpy(fl, &tmp, sizeof(struct file_lock));
529 return 0;
530 } else {
531 return posix_lock_file_wait(file, fl);
532 }
533 }
534
535 if (IS_GETLK(cmd))
536 return gfs2_lm_plock_get(sdp, &name, file, fl);
537 else if (fl->fl_type == F_UNLCK)
538 return gfs2_lm_punlock(sdp, &name, file, fl);
539 else
540 return gfs2_lm_plock(sdp, &name, file, cmd, fl);
541}
542
543static int do_flock(struct file *file, int cmd, struct file_lock *fl)
544{
545 struct gfs2_file *fp = file->private_data;
546 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
547 struct gfs2_inode *ip = GFS2_I(file->f_dentry->d_inode);
548 struct gfs2_glock *gl;
549 unsigned int state;
550 int flags;
551 int error = 0;
552
553 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
554 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
555
556 mutex_lock(&fp->f_fl_mutex);
557
558 gl = fl_gh->gh_gl;
559 if (gl) {
560 if (fl_gh->gh_state == state)
561 goto out;
562 gfs2_glock_hold(gl);
563 flock_lock_file_wait(file,
564 &(struct file_lock){.fl_type = F_UNLCK});
565 gfs2_glock_dq_uninit(fl_gh);
566 } else {
567 error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
568 ip->i_num.no_addr, &gfs2_flock_glops,
569 CREATE, &gl);
570 if (error)
571 goto out;
572 }
573
574 gfs2_holder_init(gl, state, flags, fl_gh);
575 gfs2_glock_put(gl);
576
577 error = gfs2_glock_nq(fl_gh);
578 if (error) {
579 gfs2_holder_uninit(fl_gh);
580 if (error == GLR_TRYFAILED)
581 error = -EAGAIN;
582 } else {
583 error = flock_lock_file_wait(file, fl);
584 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
585 }
586
587out:
588 mutex_unlock(&fp->f_fl_mutex);
589 return error;
590}
591
592static void do_unflock(struct file *file, struct file_lock *fl)
593{
594 struct gfs2_file *fp = file->private_data;
595 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
596
597 mutex_lock(&fp->f_fl_mutex);
598 flock_lock_file_wait(file, fl);
599 if (fl_gh->gh_gl)
600 gfs2_glock_dq_uninit(fl_gh);
601 mutex_unlock(&fp->f_fl_mutex);
602}
603
604/**
605 * gfs2_flock - acquire/release a flock lock on a file
606 * @file: the file pointer
607 * @cmd: either modify or retrieve lock state, possibly wait
608 * @fl: type and range of lock
609 *
610 * Returns: errno
611 */
612
613static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
614{
615 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
616 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
617
618 if (!(fl->fl_flags & FL_FLOCK))
619 return -ENOLCK;
620 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
621 return -ENOLCK;
622
623 if (sdp->sd_args.ar_localflocks)
624 return flock_lock_file_wait(file, fl);
625
626 if (fl->fl_type == F_UNLCK) {
627 do_unflock(file, fl);
628 return 0;
629 } else {
630 return do_flock(file, cmd, fl);
631 }
632}
633
634const struct file_operations gfs2_file_fops = {
635 .llseek = gfs2_llseek,
636 .read = do_sync_read,
637 .aio_read = generic_file_aio_read,
638 .write = do_sync_write,
639 .aio_write = generic_file_aio_write,
640 .unlocked_ioctl = gfs2_ioctl,
641 .mmap = gfs2_mmap,
642 .open = gfs2_open,
643 .release = gfs2_close,
644 .fsync = gfs2_fsync,
645 .lock = gfs2_lock,
646 .sendfile = generic_file_sendfile,
647 .flock = gfs2_flock,
648 .splice_read = generic_file_splice_read,
649 .splice_write = generic_file_splice_write,
650};
651
652const struct file_operations gfs2_dir_fops = {
653 .readdir = gfs2_readdir,
654 .unlocked_ioctl = gfs2_ioctl,
655 .open = gfs2_open,
656 .release = gfs2_close,
657 .fsync = gfs2_fsync,
658 .lock = gfs2_lock,
659 .flock = gfs2_flock,
660};
661
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
new file mode 100644
index 000000000000..ce319f89ec8e
--- /dev/null
+++ b/fs/gfs2/ops_file.h
@@ -0,0 +1,24 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_FILE_DOT_H__
11#define __OPS_FILE_DOT_H__
12
13#include <linux/fs.h>
14struct gfs2_inode;
15
16extern struct file gfs2_internal_file_sentinel;
17extern int gfs2_internal_read(struct gfs2_inode *ip,
18 struct file_ra_state *ra_state,
19 char *buf, loff_t *pos, unsigned size);
20
21extern const struct file_operations gfs2_file_fops;
22extern const struct file_operations gfs2_dir_fops;
23
24#endif /* __OPS_FILE_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
new file mode 100644
index 000000000000..178b33911843
--- /dev/null
+++ b/fs/gfs2/ops_fstype.c
@@ -0,0 +1,928 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/blkdev.h>
16#include <linux/kthread.h>
17#include <linux/namei.h>
18#include <linux/mount.h>
19#include <linux/gfs2_ondisk.h>
20#include <linux/lm_interface.h>
21
22#include "gfs2.h"
23#include "incore.h"
24#include "daemon.h"
25#include "glock.h"
26#include "glops.h"
27#include "inode.h"
28#include "lm.h"
29#include "mount.h"
30#include "ops_export.h"
31#include "ops_fstype.h"
32#include "ops_super.h"
33#include "recovery.h"
34#include "rgrp.h"
35#include "super.h"
36#include "sys.h"
37#include "util.h"
38
39#define DO 0
40#define UNDO 1
41
42extern struct dentry_operations gfs2_dops;
43
44static struct gfs2_sbd *init_sbd(struct super_block *sb)
45{
46 struct gfs2_sbd *sdp;
47
48 sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL);
49 if (!sdp)
50 return NULL;
51
52 sb->s_fs_info = sdp;
53 sdp->sd_vfs = sb;
54
55 gfs2_tune_init(&sdp->sd_tune);
56
57 INIT_LIST_HEAD(&sdp->sd_reclaim_list);
58 spin_lock_init(&sdp->sd_reclaim_lock);
59 init_waitqueue_head(&sdp->sd_reclaim_wq);
60
61 mutex_init(&sdp->sd_inum_mutex);
62 spin_lock_init(&sdp->sd_statfs_spin);
63 mutex_init(&sdp->sd_statfs_mutex);
64
65 spin_lock_init(&sdp->sd_rindex_spin);
66 mutex_init(&sdp->sd_rindex_mutex);
67 INIT_LIST_HEAD(&sdp->sd_rindex_list);
68 INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
69 INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
70
71 INIT_LIST_HEAD(&sdp->sd_jindex_list);
72 spin_lock_init(&sdp->sd_jindex_spin);
73 mutex_init(&sdp->sd_jindex_mutex);
74
75 INIT_LIST_HEAD(&sdp->sd_quota_list);
76 spin_lock_init(&sdp->sd_quota_spin);
77 mutex_init(&sdp->sd_quota_mutex);
78
79 spin_lock_init(&sdp->sd_log_lock);
80
81 INIT_LIST_HEAD(&sdp->sd_log_le_gl);
82 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
83 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
84 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
85 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
86
87 mutex_init(&sdp->sd_log_reserve_mutex);
88 INIT_LIST_HEAD(&sdp->sd_ail1_list);
89 INIT_LIST_HEAD(&sdp->sd_ail2_list);
90
91 init_rwsem(&sdp->sd_log_flush_lock);
92 INIT_LIST_HEAD(&sdp->sd_log_flush_list);
93
94 INIT_LIST_HEAD(&sdp->sd_revoke_list);
95
96 mutex_init(&sdp->sd_freeze_lock);
97
98 return sdp;
99}
100
101static void init_vfs(struct super_block *sb, unsigned noatime)
102{
103 struct gfs2_sbd *sdp = sb->s_fs_info;
104
105 sb->s_magic = GFS2_MAGIC;
106 sb->s_op = &gfs2_super_ops;
107 sb->s_export_op = &gfs2_export_ops;
108 sb->s_maxbytes = MAX_LFS_FILESIZE;
109
110 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
111 set_bit(noatime, &sdp->sd_flags);
112
113 /* Don't let the VFS update atimes. GFS2 handles this itself. */
114 sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
115}
116
117static int init_names(struct gfs2_sbd *sdp, int silent)
118{
119 struct page *page;
120 char *proto, *table;
121 int error = 0;
122
123 proto = sdp->sd_args.ar_lockproto;
124 table = sdp->sd_args.ar_locktable;
125
126 /* Try to autodetect */
127
128 if (!proto[0] || !table[0]) {
129 struct gfs2_sb *sb;
130 page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
131 if (!page)
132 return -ENOBUFS;
133 sb = kmap(page);
134 gfs2_sb_in(&sdp->sd_sb, sb);
135 kunmap(page);
136 __free_page(page);
137
138 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
139 if (error)
140 goto out;
141
142 if (!proto[0])
143 proto = sdp->sd_sb.sb_lockproto;
144 if (!table[0])
145 table = sdp->sd_sb.sb_locktable;
146 }
147
148 if (!table[0])
149 table = sdp->sd_vfs->s_id;
150
151 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
152 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
153
154out:
155 return error;
156}
157
158static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
159 int undo)
160{
161 struct task_struct *p;
162 int error = 0;
163
164 if (undo)
165 goto fail_trans;
166
167 p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
168 error = IS_ERR(p);
169 if (error) {
170 fs_err(sdp, "can't start scand thread: %d\n", error);
171 return error;
172 }
173 sdp->sd_scand_process = p;
174
175 for (sdp->sd_glockd_num = 0;
176 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
177 sdp->sd_glockd_num++) {
178 p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
179 error = IS_ERR(p);
180 if (error) {
181 fs_err(sdp, "can't start glockd thread: %d\n", error);
182 goto fail;
183 }
184 sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
185 }
186
187 error = gfs2_glock_nq_num(sdp,
188 GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
189 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
190 mount_gh);
191 if (error) {
192 fs_err(sdp, "can't acquire mount glock: %d\n", error);
193 goto fail;
194 }
195
196 error = gfs2_glock_nq_num(sdp,
197 GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
198 LM_ST_SHARED,
199 LM_FLAG_NOEXP | GL_EXACT,
200 &sdp->sd_live_gh);
201 if (error) {
202 fs_err(sdp, "can't acquire live glock: %d\n", error);
203 goto fail_mount;
204 }
205
206 error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
207 CREATE, &sdp->sd_rename_gl);
208 if (error) {
209 fs_err(sdp, "can't create rename glock: %d\n", error);
210 goto fail_live;
211 }
212
213 error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
214 CREATE, &sdp->sd_trans_gl);
215 if (error) {
216 fs_err(sdp, "can't create transaction glock: %d\n", error);
217 goto fail_rename;
218 }
219 set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
220
221 return 0;
222
223fail_trans:
224 gfs2_glock_put(sdp->sd_trans_gl);
225fail_rename:
226 gfs2_glock_put(sdp->sd_rename_gl);
227fail_live:
228 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
229fail_mount:
230 gfs2_glock_dq_uninit(mount_gh);
231fail:
232 while (sdp->sd_glockd_num--)
233 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
234
235 kthread_stop(sdp->sd_scand_process);
236 return error;
237}
238
239static struct inode *gfs2_lookup_root(struct super_block *sb,
240 struct gfs2_inum *inum)
241{
242 return gfs2_inode_lookup(sb, inum, DT_DIR);
243}
244
245static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
246{
247 struct super_block *sb = sdp->sd_vfs;
248 struct gfs2_holder sb_gh;
249 struct gfs2_inum *inum;
250 struct inode *inode;
251 int error = 0;
252
253 if (undo) {
254 if (sb->s_root) {
255 dput(sb->s_root);
256 sb->s_root = NULL;
257 }
258 return 0;
259 }
260
261 error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
262 LM_ST_SHARED, 0, &sb_gh);
263 if (error) {
264 fs_err(sdp, "can't acquire superblock glock: %d\n", error);
265 return error;
266 }
267
268 error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
269 if (error) {
270 fs_err(sdp, "can't read superblock: %d\n", error);
271 goto out;
272 }
273
274 /* Set up the buffer cache and SB for real */
275 if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
276 error = -EINVAL;
277 fs_err(sdp, "FS block size (%u) is too small for device "
278 "block size (%u)\n",
279 sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
280 goto out;
281 }
282 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
283 error = -EINVAL;
284 fs_err(sdp, "FS block size (%u) is too big for machine "
285 "page size (%u)\n",
286 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
287 goto out;
288 }
289 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
290
291 /* Get the root inode */
292 inum = &sdp->sd_sb.sb_root_dir;
293 if (sb->s_type == &gfs2meta_fs_type)
294 inum = &sdp->sd_sb.sb_master_dir;
295 inode = gfs2_lookup_root(sb, inum);
296 if (IS_ERR(inode)) {
297 error = PTR_ERR(inode);
298 fs_err(sdp, "can't read in root inode: %d\n", error);
299 goto out;
300 }
301
302 sb->s_root = d_alloc_root(inode);
303 if (!sb->s_root) {
304 fs_err(sdp, "can't get root dentry\n");
305 error = -ENOMEM;
306 iput(inode);
307 }
308 sb->s_root->d_op = &gfs2_dops;
309out:
310 gfs2_glock_dq_uninit(&sb_gh);
311 return error;
312}
313
314static int init_journal(struct gfs2_sbd *sdp, int undo)
315{
316 struct gfs2_holder ji_gh;
317 struct task_struct *p;
318 struct gfs2_inode *ip;
319 int jindex = 1;
320 int error = 0;
321
322 if (undo) {
323 jindex = 0;
324 goto fail_recoverd;
325 }
326
327 sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex");
328 if (IS_ERR(sdp->sd_jindex)) {
329 fs_err(sdp, "can't lookup journal index: %d\n", error);
330 return PTR_ERR(sdp->sd_jindex);
331 }
332 ip = GFS2_I(sdp->sd_jindex);
333 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
334
335 /* Load in the journal index special file */
336
337 error = gfs2_jindex_hold(sdp, &ji_gh);
338 if (error) {
339 fs_err(sdp, "can't read journal index: %d\n", error);
340 goto fail;
341 }
342
343 error = -EINVAL;
344 if (!gfs2_jindex_size(sdp)) {
345 fs_err(sdp, "no journals!\n");
346 goto fail_jindex;
347 }
348
349 if (sdp->sd_args.ar_spectator) {
350 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
351 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
352 } else {
353 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
354 fs_err(sdp, "can't mount journal #%u\n",
355 sdp->sd_lockstruct.ls_jid);
356 fs_err(sdp, "there are only %u journals (0 - %u)\n",
357 gfs2_jindex_size(sdp),
358 gfs2_jindex_size(sdp) - 1);
359 goto fail_jindex;
360 }
361 sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
362
363 error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
364 &gfs2_journal_glops,
365 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
366 &sdp->sd_journal_gh);
367 if (error) {
368 fs_err(sdp, "can't acquire journal glock: %d\n", error);
369 goto fail_jindex;
370 }
371
372 ip = GFS2_I(sdp->sd_jdesc->jd_inode);
373 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
374 LM_FLAG_NOEXP | GL_EXACT,
375 &sdp->sd_jinode_gh);
376 if (error) {
377 fs_err(sdp, "can't acquire journal inode glock: %d\n",
378 error);
379 goto fail_journal_gh;
380 }
381
382 error = gfs2_jdesc_check(sdp->sd_jdesc);
383 if (error) {
384 fs_err(sdp, "my journal (%u) is bad: %d\n",
385 sdp->sd_jdesc->jd_jid, error);
386 goto fail_jinode_gh;
387 }
388 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
389 }
390
391 if (sdp->sd_lockstruct.ls_first) {
392 unsigned int x;
393 for (x = 0; x < sdp->sd_journals; x++) {
394 error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x));
395 if (error) {
396 fs_err(sdp, "error recovering journal %u: %d\n",
397 x, error);
398 goto fail_jinode_gh;
399 }
400 }
401
402 gfs2_lm_others_may_mount(sdp);
403 } else if (!sdp->sd_args.ar_spectator) {
404 error = gfs2_recover_journal(sdp->sd_jdesc);
405 if (error) {
406 fs_err(sdp, "error recovering my journal: %d\n", error);
407 goto fail_jinode_gh;
408 }
409 }
410
411 set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
412 gfs2_glock_dq_uninit(&ji_gh);
413 jindex = 0;
414
415 p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
416 error = IS_ERR(p);
417 if (error) {
418 fs_err(sdp, "can't start recoverd thread: %d\n", error);
419 goto fail_jinode_gh;
420 }
421 sdp->sd_recoverd_process = p;
422
423 return 0;
424
425fail_recoverd:
426 kthread_stop(sdp->sd_recoverd_process);
427fail_jinode_gh:
428 if (!sdp->sd_args.ar_spectator)
429 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
430fail_journal_gh:
431 if (!sdp->sd_args.ar_spectator)
432 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
433fail_jindex:
434 gfs2_jindex_free(sdp);
435 if (jindex)
436 gfs2_glock_dq_uninit(&ji_gh);
437fail:
438 iput(sdp->sd_jindex);
439 return error;
440}
441
442
443static int init_inodes(struct gfs2_sbd *sdp, int undo)
444{
445 int error = 0;
446 struct gfs2_inode *ip;
447 struct inode *inode;
448
449 if (undo)
450 goto fail_qinode;
451
452 inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
453 if (IS_ERR(inode)) {
454 error = PTR_ERR(inode);
455 fs_err(sdp, "can't read in master directory: %d\n", error);
456 goto fail;
457 }
458 sdp->sd_master_dir = inode;
459
460 error = init_journal(sdp, undo);
461 if (error)
462 goto fail_master;
463
464 /* Read in the master inode number inode */
465 sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum");
466 if (IS_ERR(sdp->sd_inum_inode)) {
467 error = PTR_ERR(sdp->sd_inum_inode);
468 fs_err(sdp, "can't read in inum inode: %d\n", error);
469 goto fail_journal;
470 }
471
472
473 /* Read in the master statfs inode */
474 sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs");
475 if (IS_ERR(sdp->sd_statfs_inode)) {
476 error = PTR_ERR(sdp->sd_statfs_inode);
477 fs_err(sdp, "can't read in statfs inode: %d\n", error);
478 goto fail_inum;
479 }
480
481 /* Read in the resource index inode */
482 sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex");
483 if (IS_ERR(sdp->sd_rindex)) {
484 error = PTR_ERR(sdp->sd_rindex);
485 fs_err(sdp, "can't get resource index inode: %d\n", error);
486 goto fail_statfs;
487 }
488 ip = GFS2_I(sdp->sd_rindex);
489 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
490 sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1;
491
492 /* Read in the quota inode */
493 sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
494 if (IS_ERR(sdp->sd_quota_inode)) {
495 error = PTR_ERR(sdp->sd_quota_inode);
496 fs_err(sdp, "can't get quota file inode: %d\n", error);
497 goto fail_rindex;
498 }
499 return 0;
500
501fail_qinode:
502 iput(sdp->sd_quota_inode);
503fail_rindex:
504 gfs2_clear_rgrpd(sdp);
505 iput(sdp->sd_rindex);
506fail_statfs:
507 iput(sdp->sd_statfs_inode);
508fail_inum:
509 iput(sdp->sd_inum_inode);
510fail_journal:
511 init_journal(sdp, UNDO);
512fail_master:
513 iput(sdp->sd_master_dir);
514fail:
515 return error;
516}
517
518static int init_per_node(struct gfs2_sbd *sdp, int undo)
519{
520 struct inode *pn = NULL;
521 char buf[30];
522 int error = 0;
523 struct gfs2_inode *ip;
524
525 if (sdp->sd_args.ar_spectator)
526 return 0;
527
528 if (undo)
529 goto fail_qc_gh;
530
531 pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node");
532 if (IS_ERR(pn)) {
533 error = PTR_ERR(pn);
534 fs_err(sdp, "can't find per_node directory: %d\n", error);
535 return error;
536 }
537
538 sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
539 sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf);
540 if (IS_ERR(sdp->sd_ir_inode)) {
541 error = PTR_ERR(sdp->sd_ir_inode);
542 fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
543 goto fail;
544 }
545
546 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
547 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
548 if (IS_ERR(sdp->sd_sc_inode)) {
549 error = PTR_ERR(sdp->sd_sc_inode);
550 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
551 goto fail_ir_i;
552 }
553
554 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
555 sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
556 if (IS_ERR(sdp->sd_qc_inode)) {
557 error = PTR_ERR(sdp->sd_qc_inode);
558 fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
559 goto fail_ut_i;
560 }
561
562 iput(pn);
563 pn = NULL;
564
565 ip = GFS2_I(sdp->sd_ir_inode);
566 error = gfs2_glock_nq_init(ip->i_gl,
567 LM_ST_EXCLUSIVE, 0,
568 &sdp->sd_ir_gh);
569 if (error) {
570 fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
571 goto fail_qc_i;
572 }
573
574 ip = GFS2_I(sdp->sd_sc_inode);
575 error = gfs2_glock_nq_init(ip->i_gl,
576 LM_ST_EXCLUSIVE, 0,
577 &sdp->sd_sc_gh);
578 if (error) {
579 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
580 goto fail_ir_gh;
581 }
582
583 ip = GFS2_I(sdp->sd_qc_inode);
584 error = gfs2_glock_nq_init(ip->i_gl,
585 LM_ST_EXCLUSIVE, 0,
586 &sdp->sd_qc_gh);
587 if (error) {
588 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
589 goto fail_ut_gh;
590 }
591
592 return 0;
593
594fail_qc_gh:
595 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
596fail_ut_gh:
597 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
598fail_ir_gh:
599 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
600fail_qc_i:
601 iput(sdp->sd_qc_inode);
602fail_ut_i:
603 iput(sdp->sd_sc_inode);
604fail_ir_i:
605 iput(sdp->sd_ir_inode);
606fail:
607 if (pn)
608 iput(pn);
609 return error;
610}
611
612static int init_threads(struct gfs2_sbd *sdp, int undo)
613{
614 struct task_struct *p;
615 int error = 0;
616
617 if (undo)
618 goto fail_quotad;
619
620 sdp->sd_log_flush_time = jiffies;
621 sdp->sd_jindex_refresh_time = jiffies;
622
623 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
624 error = IS_ERR(p);
625 if (error) {
626 fs_err(sdp, "can't start logd thread: %d\n", error);
627 return error;
628 }
629 sdp->sd_logd_process = p;
630
631 sdp->sd_statfs_sync_time = jiffies;
632 sdp->sd_quota_sync_time = jiffies;
633
634 p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
635 error = IS_ERR(p);
636 if (error) {
637 fs_err(sdp, "can't start quotad thread: %d\n", error);
638 goto fail;
639 }
640 sdp->sd_quotad_process = p;
641
642 return 0;
643
644
645fail_quotad:
646 kthread_stop(sdp->sd_quotad_process);
647fail:
648 kthread_stop(sdp->sd_logd_process);
649 return error;
650}
651
652/**
653 * fill_super - Read in superblock
654 * @sb: The VFS superblock
655 * @data: Mount options
656 * @silent: Don't complain if it's not a GFS2 filesystem
657 *
658 * Returns: errno
659 */
660
661static int fill_super(struct super_block *sb, void *data, int silent)
662{
663 struct gfs2_sbd *sdp;
664 struct gfs2_holder mount_gh;
665 int error;
666
667 sdp = init_sbd(sb);
668 if (!sdp) {
669 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
670 return -ENOMEM;
671 }
672
673 error = gfs2_mount_args(sdp, (char *)data, 0);
674 if (error) {
675 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
676 goto fail;
677 }
678
679 init_vfs(sb, SDF_NOATIME);
680
681 /* Set up the buffer cache and fill in some fake block size values
682 to allow us to read-in the on-disk superblock. */
683 sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
684 sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
685 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
686 GFS2_BASIC_BLOCK_SHIFT;
687 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
688
689 error = init_names(sdp, silent);
690 if (error)
691 goto fail;
692
693 error = gfs2_sys_fs_add(sdp);
694 if (error)
695 goto fail;
696
697 error = gfs2_lm_mount(sdp, silent);
698 if (error)
699 goto fail_sys;
700
701 error = init_locking(sdp, &mount_gh, DO);
702 if (error)
703 goto fail_lm;
704
705 error = init_sb(sdp, silent, DO);
706 if (error)
707 goto fail_locking;
708
709 error = init_inodes(sdp, DO);
710 if (error)
711 goto fail_sb;
712
713 error = init_per_node(sdp, DO);
714 if (error)
715 goto fail_inodes;
716
717 error = gfs2_statfs_init(sdp);
718 if (error) {
719 fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
720 goto fail_per_node;
721 }
722
723 error = init_threads(sdp, DO);
724 if (error)
725 goto fail_per_node;
726
727 if (!(sb->s_flags & MS_RDONLY)) {
728 error = gfs2_make_fs_rw(sdp);
729 if (error) {
730 fs_err(sdp, "can't make FS RW: %d\n", error);
731 goto fail_threads;
732 }
733 }
734
735 gfs2_glock_dq_uninit(&mount_gh);
736
737 return 0;
738
739fail_threads:
740 init_threads(sdp, UNDO);
741fail_per_node:
742 init_per_node(sdp, UNDO);
743fail_inodes:
744 init_inodes(sdp, UNDO);
745fail_sb:
746 init_sb(sdp, 0, UNDO);
747fail_locking:
748 init_locking(sdp, &mount_gh, UNDO);
749fail_lm:
750 gfs2_gl_hash_clear(sdp, WAIT);
751 gfs2_lm_unmount(sdp);
752 while (invalidate_inodes(sb))
753 yield();
754fail_sys:
755 gfs2_sys_fs_del(sdp);
756fail:
757 kfree(sdp);
758 sb->s_fs_info = NULL;
759 return error;
760}
761
762static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
763 const char *dev_name, void *data, struct vfsmount *mnt)
764{
765 struct super_block *sb;
766 struct gfs2_sbd *sdp;
767 int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
768 if (error)
769 goto out;
770 sb = mnt->mnt_sb;
771 sdp = sb->s_fs_info;
772 sdp->sd_gfs2mnt = mnt;
773out:
774 return error;
775}
776
777static int fill_super_meta(struct super_block *sb, struct super_block *new,
778 void *data, int silent)
779{
780 struct gfs2_sbd *sdp = sb->s_fs_info;
781 struct inode *inode;
782 int error = 0;
783
784 new->s_fs_info = sdp;
785 sdp->sd_vfs_meta = sb;
786
787 init_vfs(new, SDF_NOATIME);
788
789 /* Get the master inode */
790 inode = igrab(sdp->sd_master_dir);
791
792 new->s_root = d_alloc_root(inode);
793 if (!new->s_root) {
794 fs_err(sdp, "can't get root dentry\n");
795 error = -ENOMEM;
796 iput(inode);
797 }
798 new->s_root->d_op = &gfs2_dops;
799
800 return error;
801}
802
803static int set_bdev_super(struct super_block *s, void *data)
804{
805 s->s_bdev = data;
806 s->s_dev = s->s_bdev->bd_dev;
807 return 0;
808}
809
810static int test_bdev_super(struct super_block *s, void *data)
811{
812 return s->s_bdev == data;
813}
814
815static struct super_block* get_gfs2_sb(const char *dev_name)
816{
817 struct kstat stat;
818 struct nameidata nd;
819 struct file_system_type *fstype;
820 struct super_block *sb = NULL, *s;
821 struct list_head *l;
822 int error;
823
824 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
825 if (error) {
826 printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n",
827 dev_name);
828 goto out;
829 }
830 error = vfs_getattr(nd.mnt, nd.dentry, &stat);
831
832 fstype = get_fs_type("gfs2");
833 list_for_each(l, &fstype->fs_supers) {
834 s = list_entry(l, struct super_block, s_instances);
835 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
836 (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) {
837 sb = s;
838 goto free_nd;
839 }
840 }
841
842 printk(KERN_WARNING "GFS2: Unrecognized block device or "
843 "mount point %s", dev_name);
844
845free_nd:
846 path_release(&nd);
847out:
848 return sb;
849}
850
851static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
852 const char *dev_name, void *data, struct vfsmount *mnt)
853{
854 int error = 0;
855 struct super_block *sb = NULL, *new;
856 struct gfs2_sbd *sdp;
857 char *gfs2mnt = NULL;
858
859 sb = get_gfs2_sb(dev_name);
860 if (!sb) {
861 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
862 error = -ENOENT;
863 goto error;
864 }
865 sdp = (struct gfs2_sbd*) sb->s_fs_info;
866 if (sdp->sd_vfs_meta) {
867 printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
868 error = -EBUSY;
869 goto error;
870 }
871 mutex_lock(&sb->s_bdev->bd_mount_mutex);
872 new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
873 mutex_unlock(&sb->s_bdev->bd_mount_mutex);
874 if (IS_ERR(new)) {
875 error = PTR_ERR(new);
876 goto error;
877 }
878 module_put(fs_type->owner);
879 new->s_flags = flags;
880 strlcpy(new->s_id, sb->s_id, sizeof(new->s_id));
881 sb_set_blocksize(new, sb->s_blocksize);
882 error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0);
883 if (error) {
884 up_write(&new->s_umount);
885 deactivate_super(new);
886 goto error;
887 }
888
889 new->s_flags |= MS_ACTIVE;
890
891 /* Grab a reference to the gfs2 mount point */
892 atomic_inc(&sdp->sd_gfs2mnt->mnt_count);
893 return simple_set_mnt(mnt, new);
894error:
895 if (gfs2mnt)
896 kfree(gfs2mnt);
897 return error;
898}
899
900static void gfs2_kill_sb(struct super_block *sb)
901{
902 kill_block_super(sb);
903}
904
905static void gfs2_kill_sb_meta(struct super_block *sb)
906{
907 struct gfs2_sbd *sdp = sb->s_fs_info;
908 generic_shutdown_super(sb);
909 sdp->sd_vfs_meta = NULL;
910 atomic_dec(&sdp->sd_gfs2mnt->mnt_count);
911}
912
913struct file_system_type gfs2_fs_type = {
914 .name = "gfs2",
915 .fs_flags = FS_REQUIRES_DEV,
916 .get_sb = gfs2_get_sb,
917 .kill_sb = gfs2_kill_sb,
918 .owner = THIS_MODULE,
919};
920
921struct file_system_type gfs2meta_fs_type = {
922 .name = "gfs2meta",
923 .fs_flags = FS_REQUIRES_DEV,
924 .get_sb = gfs2_get_sb_meta,
925 .kill_sb = gfs2_kill_sb_meta,
926 .owner = THIS_MODULE,
927};
928
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
new file mode 100644
index 000000000000..7cc2c296271b
--- /dev/null
+++ b/fs/gfs2/ops_fstype.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_FSTYPE_DOT_H__
11#define __OPS_FSTYPE_DOT_H__
12
13#include <linux/fs.h>
14
15extern struct file_system_type gfs2_fs_type;
16extern struct file_system_type gfs2meta_fs_type;
17
18#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
new file mode 100644
index 000000000000..ef6e5ed70e94
--- /dev/null
+++ b/fs/gfs2/ops_inode.c
@@ -0,0 +1,1151 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/namei.h>
16#include <linux/utsname.h>
17#include <linux/mm.h>
18#include <linux/xattr.h>
19#include <linux/posix_acl.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h>
22#include <linux/lm_interface.h>
23#include <asm/uaccess.h>
24
25#include "gfs2.h"
26#include "incore.h"
27#include "acl.h"
28#include "bmap.h"
29#include "dir.h"
30#include "eaops.h"
31#include "eattr.h"
32#include "glock.h"
33#include "inode.h"
34#include "meta_io.h"
35#include "ops_dentry.h"
36#include "ops_inode.h"
37#include "quota.h"
38#include "rgrp.h"
39#include "trans.h"
40#include "util.h"
41
42/**
43 * gfs2_create - Create a file
44 * @dir: The directory in which to create the file
45 * @dentry: The dentry of the new file
46 * @mode: The mode of the new file
47 *
48 * Returns: errno
49 */
50
51static int gfs2_create(struct inode *dir, struct dentry *dentry,
52 int mode, struct nameidata *nd)
53{
54 struct gfs2_inode *dip = GFS2_I(dir);
55 struct gfs2_sbd *sdp = GFS2_SB(dir);
56 struct gfs2_holder ghs[2];
57 struct inode *inode;
58
59 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
60
61 for (;;) {
62 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
63 if (!IS_ERR(inode)) {
64 gfs2_trans_end(sdp);
65 if (dip->i_alloc.al_rgd)
66 gfs2_inplace_release(dip);
67 gfs2_quota_unlock(dip);
68 gfs2_alloc_put(dip);
69 gfs2_glock_dq_uninit_m(2, ghs);
70 mark_inode_dirty(inode);
71 break;
72 } else if (PTR_ERR(inode) != -EEXIST ||
73 (nd->intent.open.flags & O_EXCL)) {
74 gfs2_holder_uninit(ghs);
75 return PTR_ERR(inode);
76 }
77
78 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
79 if (inode) {
80 if (!IS_ERR(inode)) {
81 gfs2_holder_uninit(ghs);
82 break;
83 } else {
84 gfs2_holder_uninit(ghs);
85 return PTR_ERR(inode);
86 }
87 }
88 }
89
90 d_instantiate(dentry, inode);
91
92 return 0;
93}
94
95/**
96 * gfs2_lookup - Look up a filename in a directory and return its inode
97 * @dir: The directory inode
98 * @dentry: The dentry of the new inode
99 * @nd: passed from Linux VFS, ignored by us
100 *
101 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
102 *
103 * Returns: errno
104 */
105
106static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
107 struct nameidata *nd)
108{
109 struct inode *inode = NULL;
110
111 dentry->d_op = &gfs2_dops;
112
113 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
114 if (inode && IS_ERR(inode))
115 return ERR_PTR(PTR_ERR(inode));
116
117 if (inode)
118 return d_splice_alias(inode, dentry);
119 d_add(dentry, inode);
120
121 return NULL;
122}
123
124/**
125 * gfs2_link - Link to a file
126 * @old_dentry: The inode to link
127 * @dir: Add link to this directory
128 * @dentry: The name of the link
129 *
130 * Link the inode in "old_dentry" into the directory "dir" with the
131 * name in "dentry".
132 *
133 * Returns: errno
134 */
135
136static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
137 struct dentry *dentry)
138{
139 struct gfs2_inode *dip = GFS2_I(dir);
140 struct gfs2_sbd *sdp = GFS2_SB(dir);
141 struct inode *inode = old_dentry->d_inode;
142 struct gfs2_inode *ip = GFS2_I(inode);
143 struct gfs2_holder ghs[2];
144 int alloc_required;
145 int error;
146
147 if (S_ISDIR(ip->i_di.di_mode))
148 return -EPERM;
149
150 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
151 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
152
153 error = gfs2_glock_nq_m(2, ghs);
154 if (error)
155 goto out;
156
157 error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
158 if (error)
159 goto out_gunlock;
160
161 error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
162 switch (error) {
163 case -ENOENT:
164 break;
165 case 0:
166 error = -EEXIST;
167 default:
168 goto out_gunlock;
169 }
170
171 error = -EINVAL;
172 if (!dip->i_di.di_nlink)
173 goto out_gunlock;
174 error = -EFBIG;
175 if (dip->i_di.di_entries == (u32)-1)
176 goto out_gunlock;
177 error = -EPERM;
178 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
179 goto out_gunlock;
180 error = -EINVAL;
181 if (!ip->i_di.di_nlink)
182 goto out_gunlock;
183 error = -EMLINK;
184 if (ip->i_di.di_nlink == (u32)-1)
185 goto out_gunlock;
186
187 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
188 if (error < 0)
189 goto out_gunlock;
190 error = 0;
191
192 if (alloc_required) {
193 struct gfs2_alloc *al = gfs2_alloc_get(dip);
194
195 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
196 if (error)
197 goto out_alloc;
198
199 error = gfs2_quota_check(dip, dip->i_di.di_uid,
200 dip->i_di.di_gid);
201 if (error)
202 goto out_gunlock_q;
203
204 al->al_requested = sdp->sd_max_dirres;
205
206 error = gfs2_inplace_reserve(dip);
207 if (error)
208 goto out_gunlock_q;
209
210 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
211 al->al_rgd->rd_ri.ri_length +
212 2 * RES_DINODE + RES_STATFS +
213 RES_QUOTA, 0);
214 if (error)
215 goto out_ipres;
216 } else {
217 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
218 if (error)
219 goto out_ipres;
220 }
221
222 error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
223 IF2DT(ip->i_di.di_mode));
224 if (error)
225 goto out_end_trans;
226
227 error = gfs2_change_nlink(ip, +1);
228
229out_end_trans:
230 gfs2_trans_end(sdp);
231out_ipres:
232 if (alloc_required)
233 gfs2_inplace_release(dip);
234out_gunlock_q:
235 if (alloc_required)
236 gfs2_quota_unlock(dip);
237out_alloc:
238 if (alloc_required)
239 gfs2_alloc_put(dip);
240out_gunlock:
241 gfs2_glock_dq_m(2, ghs);
242out:
243 gfs2_holder_uninit(ghs);
244 gfs2_holder_uninit(ghs + 1);
245 if (!error) {
246 atomic_inc(&inode->i_count);
247 d_instantiate(dentry, inode);
248 mark_inode_dirty(inode);
249 }
250 return error;
251}
252
253/**
254 * gfs2_unlink - Unlink a file
255 * @dir: The inode of the directory containing the file to unlink
256 * @dentry: The file itself
257 *
258 * Unlink a file. Call gfs2_unlinki()
259 *
260 * Returns: errno
261 */
262
263static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
264{
265 struct gfs2_inode *dip = GFS2_I(dir);
266 struct gfs2_sbd *sdp = GFS2_SB(dir);
267 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
268 struct gfs2_holder ghs[2];
269 int error;
270
271 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
272 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
273
274 error = gfs2_glock_nq_m(2, ghs);
275 if (error)
276 goto out;
277
278 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
279 if (error)
280 goto out_gunlock;
281
282 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
283 if (error)
284 goto out_gunlock;
285
286 error = gfs2_dir_del(dip, &dentry->d_name);
287 if (error)
288 goto out_end_trans;
289
290 error = gfs2_change_nlink(ip, -1);
291
292out_end_trans:
293 gfs2_trans_end(sdp);
294out_gunlock:
295 gfs2_glock_dq_m(2, ghs);
296out:
297 gfs2_holder_uninit(ghs);
298 gfs2_holder_uninit(ghs + 1);
299 return error;
300}
301
302/**
303 * gfs2_symlink - Create a symlink
304 * @dir: The directory to create the symlink in
305 * @dentry: The dentry to put the symlink in
306 * @symname: The thing which the link points to
307 *
308 * Returns: errno
309 */
310
311static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
312 const char *symname)
313{
314 struct gfs2_inode *dip = GFS2_I(dir), *ip;
315 struct gfs2_sbd *sdp = GFS2_SB(dir);
316 struct gfs2_holder ghs[2];
317 struct inode *inode;
318 struct buffer_head *dibh;
319 int size;
320 int error;
321
322 /* Must be stuffed with a null terminator for gfs2_follow_link() */
323 size = strlen(symname);
324 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
325 return -ENAMETOOLONG;
326
327 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
328
329 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
330 if (IS_ERR(inode)) {
331 gfs2_holder_uninit(ghs);
332 return PTR_ERR(inode);
333 }
334
335 ip = ghs[1].gh_gl->gl_object;
336
337 ip->i_di.di_size = size;
338
339 error = gfs2_meta_inode_buffer(ip, &dibh);
340
341 if (!gfs2_assert_withdraw(sdp, !error)) {
342 gfs2_dinode_out(&ip->i_di, dibh->b_data);
343 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
344 size);
345 brelse(dibh);
346 }
347
348 gfs2_trans_end(sdp);
349 if (dip->i_alloc.al_rgd)
350 gfs2_inplace_release(dip);
351 gfs2_quota_unlock(dip);
352 gfs2_alloc_put(dip);
353
354 gfs2_glock_dq_uninit_m(2, ghs);
355
356 d_instantiate(dentry, inode);
357 mark_inode_dirty(inode);
358
359 return 0;
360}
361
362/**
363 * gfs2_mkdir - Make a directory
364 * @dir: The parent directory of the new one
365 * @dentry: The dentry of the new directory
366 * @mode: The mode of the new directory
367 *
368 * Returns: errno
369 */
370
371static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
372{
373 struct gfs2_inode *dip = GFS2_I(dir), *ip;
374 struct gfs2_sbd *sdp = GFS2_SB(dir);
375 struct gfs2_holder ghs[2];
376 struct inode *inode;
377 struct buffer_head *dibh;
378 int error;
379
380 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
381
382 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
383 if (IS_ERR(inode)) {
384 gfs2_holder_uninit(ghs);
385 return PTR_ERR(inode);
386 }
387
388 ip = ghs[1].gh_gl->gl_object;
389
390 ip->i_di.di_nlink = 2;
391 ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
392 ip->i_di.di_flags |= GFS2_DIF_JDATA;
393 ip->i_di.di_payload_format = GFS2_FORMAT_DE;
394 ip->i_di.di_entries = 2;
395
396 error = gfs2_meta_inode_buffer(ip, &dibh);
397
398 if (!gfs2_assert_withdraw(sdp, !error)) {
399 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
400 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
401 struct qstr str;
402
403 gfs2_str2qstr(&str, ".");
404 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
405 gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent);
406 dent->de_inum = di->di_num; /* already GFS2 endian */
407 dent->de_type = cpu_to_be16(DT_DIR);
408 di->di_entries = cpu_to_be32(1);
409
410 gfs2_str2qstr(&str, "..");
411 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
412 gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
413
414 gfs2_inum_out(&dip->i_num, &dent->de_inum);
415 dent->de_type = cpu_to_be16(DT_DIR);
416
417 gfs2_dinode_out(&ip->i_di, di);
418
419 brelse(dibh);
420 }
421
422 error = gfs2_change_nlink(dip, +1);
423 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
424
425 gfs2_trans_end(sdp);
426 if (dip->i_alloc.al_rgd)
427 gfs2_inplace_release(dip);
428 gfs2_quota_unlock(dip);
429 gfs2_alloc_put(dip);
430
431 gfs2_glock_dq_uninit_m(2, ghs);
432
433 d_instantiate(dentry, inode);
434 mark_inode_dirty(inode);
435
436 return 0;
437}
438
439/**
440 * gfs2_rmdir - Remove a directory
441 * @dir: The parent directory of the directory to be removed
442 * @dentry: The dentry of the directory to remove
443 *
444 * Remove a directory. Call gfs2_rmdiri()
445 *
446 * Returns: errno
447 */
448
449static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
450{
451 struct gfs2_inode *dip = GFS2_I(dir);
452 struct gfs2_sbd *sdp = GFS2_SB(dir);
453 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
454 struct gfs2_holder ghs[2];
455 int error;
456
457 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
458 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
459
460 error = gfs2_glock_nq_m(2, ghs);
461 if (error)
462 goto out;
463
464 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
465 if (error)
466 goto out_gunlock;
467
468 if (ip->i_di.di_entries < 2) {
469 if (gfs2_consist_inode(ip))
470 gfs2_dinode_print(&ip->i_di);
471 error = -EIO;
472 goto out_gunlock;
473 }
474 if (ip->i_di.di_entries > 2) {
475 error = -ENOTEMPTY;
476 goto out_gunlock;
477 }
478
479 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
480 if (error)
481 goto out_gunlock;
482
483 error = gfs2_rmdiri(dip, &dentry->d_name, ip);
484
485 gfs2_trans_end(sdp);
486
487out_gunlock:
488 gfs2_glock_dq_m(2, ghs);
489out:
490 gfs2_holder_uninit(ghs);
491 gfs2_holder_uninit(ghs + 1);
492 return error;
493}
494
495/**
496 * gfs2_mknod - Make a special file
497 * @dir: The directory in which the special file will reside
498 * @dentry: The dentry of the special file
499 * @mode: The mode of the special file
500 * @rdev: The device specification of the special file
501 *
502 */
503
504static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
505 dev_t dev)
506{
507 struct gfs2_inode *dip = GFS2_I(dir), *ip;
508 struct gfs2_sbd *sdp = GFS2_SB(dir);
509 struct gfs2_holder ghs[2];
510 struct inode *inode;
511 struct buffer_head *dibh;
512 u32 major = 0, minor = 0;
513 int error;
514
515 switch (mode & S_IFMT) {
516 case S_IFBLK:
517 case S_IFCHR:
518 major = MAJOR(dev);
519 minor = MINOR(dev);
520 break;
521 case S_IFIFO:
522 case S_IFSOCK:
523 break;
524 default:
525 return -EOPNOTSUPP;
526 };
527
528 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
529
530 inode = gfs2_createi(ghs, &dentry->d_name, mode);
531 if (IS_ERR(inode)) {
532 gfs2_holder_uninit(ghs);
533 return PTR_ERR(inode);
534 }
535
536 ip = ghs[1].gh_gl->gl_object;
537
538 ip->i_di.di_major = major;
539 ip->i_di.di_minor = minor;
540
541 error = gfs2_meta_inode_buffer(ip, &dibh);
542
543 if (!gfs2_assert_withdraw(sdp, !error)) {
544 gfs2_dinode_out(&ip->i_di, dibh->b_data);
545 brelse(dibh);
546 }
547
548 gfs2_trans_end(sdp);
549 if (dip->i_alloc.al_rgd)
550 gfs2_inplace_release(dip);
551 gfs2_quota_unlock(dip);
552 gfs2_alloc_put(dip);
553
554 gfs2_glock_dq_uninit_m(2, ghs);
555
556 d_instantiate(dentry, inode);
557 mark_inode_dirty(inode);
558
559 return 0;
560}
561
562/**
563 * gfs2_rename - Rename a file
564 * @odir: Parent directory of old file name
565 * @odentry: The old dentry of the file
566 * @ndir: Parent directory of new file name
567 * @ndentry: The new dentry of the file
568 *
569 * Returns: errno
570 */
571
572static int gfs2_rename(struct inode *odir, struct dentry *odentry,
573 struct inode *ndir, struct dentry *ndentry)
574{
575 struct gfs2_inode *odip = GFS2_I(odir);
576 struct gfs2_inode *ndip = GFS2_I(ndir);
577 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
578 struct gfs2_inode *nip = NULL;
579 struct gfs2_sbd *sdp = GFS2_SB(odir);
580 struct gfs2_holder ghs[4], r_gh;
581 unsigned int num_gh;
582 int dir_rename = 0;
583 int alloc_required;
584 unsigned int x;
585 int error;
586
587 if (ndentry->d_inode) {
588 nip = GFS2_I(ndentry->d_inode);
589 if (ip == nip)
590 return 0;
591 }
592
593 /* Make sure we aren't trying to move a dirctory into it's subdir */
594
595 if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
596 dir_rename = 1;
597
598 error = gfs2_glock_nq_init(sdp->sd_rename_gl,
599 LM_ST_EXCLUSIVE, 0,
600 &r_gh);
601 if (error)
602 goto out;
603
604 error = gfs2_ok_to_move(ip, ndip);
605 if (error)
606 goto out_gunlock_r;
607 }
608
609 num_gh = 1;
610 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
611 if (odip != ndip) {
612 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
613 num_gh++;
614 }
615 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
616 num_gh++;
617
618 if (nip) {
619 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
620 num_gh++;
621 }
622
623 error = gfs2_glock_nq_m(num_gh, ghs);
624 if (error)
625 goto out_uninit;
626
627 /* Check out the old directory */
628
629 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
630 if (error)
631 goto out_gunlock;
632
633 /* Check out the new directory */
634
635 if (nip) {
636 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
637 if (error)
638 goto out_gunlock;
639
640 if (S_ISDIR(nip->i_di.di_mode)) {
641 if (nip->i_di.di_entries < 2) {
642 if (gfs2_consist_inode(nip))
643 gfs2_dinode_print(&nip->i_di);
644 error = -EIO;
645 goto out_gunlock;
646 }
647 if (nip->i_di.di_entries > 2) {
648 error = -ENOTEMPTY;
649 goto out_gunlock;
650 }
651 }
652 } else {
653 error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL);
654 if (error)
655 goto out_gunlock;
656
657 error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
658 switch (error) {
659 case -ENOENT:
660 error = 0;
661 break;
662 case 0:
663 error = -EEXIST;
664 default:
665 goto out_gunlock;
666 };
667
668 if (odip != ndip) {
669 if (!ndip->i_di.di_nlink) {
670 error = -EINVAL;
671 goto out_gunlock;
672 }
673 if (ndip->i_di.di_entries == (u32)-1) {
674 error = -EFBIG;
675 goto out_gunlock;
676 }
677 if (S_ISDIR(ip->i_di.di_mode) &&
678 ndip->i_di.di_nlink == (u32)-1) {
679 error = -EMLINK;
680 goto out_gunlock;
681 }
682 }
683 }
684
685 /* Check out the dir to be renamed */
686
687 if (dir_rename) {
688 error = permission(odentry->d_inode, MAY_WRITE, NULL);
689 if (error)
690 goto out_gunlock;
691 }
692
693 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
694 if (error < 0)
695 goto out_gunlock;
696 error = 0;
697
698 if (alloc_required) {
699 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
700
701 error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
702 if (error)
703 goto out_alloc;
704
705 error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
706 ndip->i_di.di_gid);
707 if (error)
708 goto out_gunlock_q;
709
710 al->al_requested = sdp->sd_max_dirres;
711
712 error = gfs2_inplace_reserve(ndip);
713 if (error)
714 goto out_gunlock_q;
715
716 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
717 al->al_rgd->rd_ri.ri_length +
718 4 * RES_DINODE + 4 * RES_LEAF +
719 RES_STATFS + RES_QUOTA, 0);
720 if (error)
721 goto out_ipreserv;
722 } else {
723 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
724 5 * RES_LEAF, 0);
725 if (error)
726 goto out_gunlock;
727 }
728
729 /* Remove the target file, if it exists */
730
731 if (nip) {
732 if (S_ISDIR(nip->i_di.di_mode))
733 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
734 else {
735 error = gfs2_dir_del(ndip, &ndentry->d_name);
736 if (error)
737 goto out_end_trans;
738 error = gfs2_change_nlink(nip, -1);
739 }
740 if (error)
741 goto out_end_trans;
742 }
743
744 if (dir_rename) {
745 struct qstr name;
746 gfs2_str2qstr(&name, "..");
747
748 error = gfs2_change_nlink(ndip, +1);
749 if (error)
750 goto out_end_trans;
751 error = gfs2_change_nlink(odip, -1);
752 if (error)
753 goto out_end_trans;
754
755 error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
756 if (error)
757 goto out_end_trans;
758 } else {
759 struct buffer_head *dibh;
760 error = gfs2_meta_inode_buffer(ip, &dibh);
761 if (error)
762 goto out_end_trans;
763 ip->i_di.di_ctime = get_seconds();
764 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
765 gfs2_dinode_out(&ip->i_di, dibh->b_data);
766 brelse(dibh);
767 }
768
769 error = gfs2_dir_del(odip, &odentry->d_name);
770 if (error)
771 goto out_end_trans;
772
773 error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
774 IF2DT(ip->i_di.di_mode));
775 if (error)
776 goto out_end_trans;
777
778out_end_trans:
779 gfs2_trans_end(sdp);
780out_ipreserv:
781 if (alloc_required)
782 gfs2_inplace_release(ndip);
783out_gunlock_q:
784 if (alloc_required)
785 gfs2_quota_unlock(ndip);
786out_alloc:
787 if (alloc_required)
788 gfs2_alloc_put(ndip);
789out_gunlock:
790 gfs2_glock_dq_m(num_gh, ghs);
791out_uninit:
792 for (x = 0; x < num_gh; x++)
793 gfs2_holder_uninit(ghs + x);
794out_gunlock_r:
795 if (dir_rename)
796 gfs2_glock_dq_uninit(&r_gh);
797out:
798 return error;
799}
800
801/**
802 * gfs2_readlink - Read the value of a symlink
803 * @dentry: the symlink
804 * @buf: the buffer to read the symlink data into
805 * @size: the size of the buffer
806 *
807 * Returns: errno
808 */
809
810static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
811 int user_size)
812{
813 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
814 char array[GFS2_FAST_NAME_SIZE], *buf = array;
815 unsigned int len = GFS2_FAST_NAME_SIZE;
816 int error;
817
818 error = gfs2_readlinki(ip, &buf, &len);
819 if (error)
820 return error;
821
822 if (user_size > len - 1)
823 user_size = len - 1;
824
825 if (copy_to_user(user_buf, buf, user_size))
826 error = -EFAULT;
827 else
828 error = user_size;
829
830 if (buf != array)
831 kfree(buf);
832
833 return error;
834}
835
836/**
837 * gfs2_follow_link - Follow a symbolic link
838 * @dentry: The dentry of the link
839 * @nd: Data that we pass to vfs_follow_link()
840 *
841 * This can handle symlinks of any size. It is optimised for symlinks
842 * under GFS2_FAST_NAME_SIZE.
843 *
844 * Returns: 0 on success or error code
845 */
846
847static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
848{
849 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
850 char array[GFS2_FAST_NAME_SIZE], *buf = array;
851 unsigned int len = GFS2_FAST_NAME_SIZE;
852 int error;
853
854 error = gfs2_readlinki(ip, &buf, &len);
855 if (!error) {
856 error = vfs_follow_link(nd, buf);
857 if (buf != array)
858 kfree(buf);
859 }
860
861 return ERR_PTR(error);
862}
863
864/**
865 * gfs2_permission -
866 * @inode:
867 * @mask:
868 * @nd: passed from Linux VFS, ignored by us
869 *
870 * Returns: errno
871 */
872
873static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
874{
875 struct gfs2_inode *ip = GFS2_I(inode);
876 struct gfs2_holder i_gh;
877 int error;
878
879 if (ip->i_vn == ip->i_gl->gl_vn)
880 return generic_permission(inode, mask, gfs2_check_acl);
881
882 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
883 if (!error) {
884 error = generic_permission(inode, mask, gfs2_check_acl_locked);
885 gfs2_glock_dq_uninit(&i_gh);
886 }
887
888 return error;
889}
890
891static int setattr_size(struct inode *inode, struct iattr *attr)
892{
893 struct gfs2_inode *ip = GFS2_I(inode);
894 int error;
895
896 if (attr->ia_size != ip->i_di.di_size) {
897 error = vmtruncate(inode, attr->ia_size);
898 if (error)
899 return error;
900 }
901
902 error = gfs2_truncatei(ip, attr->ia_size);
903 if (error)
904 return error;
905
906 return error;
907}
908
909static int setattr_chown(struct inode *inode, struct iattr *attr)
910{
911 struct gfs2_inode *ip = GFS2_I(inode);
912 struct gfs2_sbd *sdp = GFS2_SB(inode);
913 struct buffer_head *dibh;
914 u32 ouid, ogid, nuid, ngid;
915 int error;
916
917 ouid = ip->i_di.di_uid;
918 ogid = ip->i_di.di_gid;
919 nuid = attr->ia_uid;
920 ngid = attr->ia_gid;
921
922 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
923 ouid = nuid = NO_QUOTA_CHANGE;
924 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
925 ogid = ngid = NO_QUOTA_CHANGE;
926
927 gfs2_alloc_get(ip);
928
929 error = gfs2_quota_lock(ip, nuid, ngid);
930 if (error)
931 goto out_alloc;
932
933 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
934 error = gfs2_quota_check(ip, nuid, ngid);
935 if (error)
936 goto out_gunlock_q;
937 }
938
939 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
940 if (error)
941 goto out_gunlock_q;
942
943 error = gfs2_meta_inode_buffer(ip, &dibh);
944 if (error)
945 goto out_end_trans;
946
947 error = inode_setattr(inode, attr);
948 gfs2_assert_warn(sdp, !error);
949 gfs2_inode_attr_out(ip);
950
951 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
952 gfs2_dinode_out(&ip->i_di, dibh->b_data);
953 brelse(dibh);
954
955 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
956 gfs2_quota_change(ip, -ip->i_di.di_blocks, ouid, ogid);
957 gfs2_quota_change(ip, ip->i_di.di_blocks, nuid, ngid);
958 }
959
960out_end_trans:
961 gfs2_trans_end(sdp);
962out_gunlock_q:
963 gfs2_quota_unlock(ip);
964out_alloc:
965 gfs2_alloc_put(ip);
966 return error;
967}
968
969/**
970 * gfs2_setattr - Change attributes on an inode
971 * @dentry: The dentry which is changing
972 * @attr: The structure describing the change
973 *
974 * The VFS layer wants to change one or more of an inodes attributes. Write
975 * that change out to disk.
976 *
977 * Returns: errno
978 */
979
980static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
981{
982 struct inode *inode = dentry->d_inode;
983 struct gfs2_inode *ip = GFS2_I(inode);
984 struct gfs2_holder i_gh;
985 int error;
986
987 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
988 if (error)
989 return error;
990
991 error = -EPERM;
992 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
993 goto out;
994
995 error = inode_change_ok(inode, attr);
996 if (error)
997 goto out;
998
999 if (attr->ia_valid & ATTR_SIZE)
1000 error = setattr_size(inode, attr);
1001 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1002 error = setattr_chown(inode, attr);
1003 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1004 error = gfs2_acl_chmod(ip, attr);
1005 else
1006 error = gfs2_setattr_simple(ip, attr);
1007
1008out:
1009 gfs2_glock_dq_uninit(&i_gh);
1010 if (!error)
1011 mark_inode_dirty(inode);
1012 return error;
1013}
1014
1015/**
1016 * gfs2_getattr - Read out an inode's attributes
1017 * @mnt: The vfsmount the inode is being accessed from
1018 * @dentry: The dentry to stat
1019 * @stat: The inode's stats
1020 *
1021 * Returns: errno
1022 */
1023
1024static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1025 struct kstat *stat)
1026{
1027 struct inode *inode = dentry->d_inode;
1028 struct gfs2_inode *ip = GFS2_I(inode);
1029 struct gfs2_holder gh;
1030 int error;
1031
1032 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1033 if (!error) {
1034 generic_fillattr(inode, stat);
1035 gfs2_glock_dq_uninit(&gh);
1036 }
1037
1038 return error;
1039}
1040
1041static int gfs2_setxattr(struct dentry *dentry, const char *name,
1042 const void *data, size_t size, int flags)
1043{
1044 struct inode *inode = dentry->d_inode;
1045 struct gfs2_ea_request er;
1046
1047 memset(&er, 0, sizeof(struct gfs2_ea_request));
1048 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1049 if (er.er_type == GFS2_EATYPE_UNUSED)
1050 return -EOPNOTSUPP;
1051 er.er_data = (char *)data;
1052 er.er_name_len = strlen(er.er_name);
1053 er.er_data_len = size;
1054 er.er_flags = flags;
1055
1056 gfs2_assert_warn(GFS2_SB(inode), !(er.er_flags & GFS2_ERF_MODE));
1057
1058 return gfs2_ea_set(GFS2_I(inode), &er);
1059}
1060
1061static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1062 void *data, size_t size)
1063{
1064 struct gfs2_ea_request er;
1065
1066 memset(&er, 0, sizeof(struct gfs2_ea_request));
1067 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1068 if (er.er_type == GFS2_EATYPE_UNUSED)
1069 return -EOPNOTSUPP;
1070 er.er_data = data;
1071 er.er_name_len = strlen(er.er_name);
1072 er.er_data_len = size;
1073
1074 return gfs2_ea_get(GFS2_I(dentry->d_inode), &er);
1075}
1076
1077static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
1078{
1079 struct gfs2_ea_request er;
1080
1081 memset(&er, 0, sizeof(struct gfs2_ea_request));
1082 er.er_data = (size) ? buffer : NULL;
1083 er.er_data_len = size;
1084
1085 return gfs2_ea_list(GFS2_I(dentry->d_inode), &er);
1086}
1087
1088static int gfs2_removexattr(struct dentry *dentry, const char *name)
1089{
1090 struct gfs2_ea_request er;
1091
1092 memset(&er, 0, sizeof(struct gfs2_ea_request));
1093 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1094 if (er.er_type == GFS2_EATYPE_UNUSED)
1095 return -EOPNOTSUPP;
1096 er.er_name_len = strlen(er.er_name);
1097
1098 return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
1099}
1100
1101struct inode_operations gfs2_file_iops = {
1102 .permission = gfs2_permission,
1103 .setattr = gfs2_setattr,
1104 .getattr = gfs2_getattr,
1105 .setxattr = gfs2_setxattr,
1106 .getxattr = gfs2_getxattr,
1107 .listxattr = gfs2_listxattr,
1108 .removexattr = gfs2_removexattr,
1109};
1110
1111struct inode_operations gfs2_dev_iops = {
1112 .permission = gfs2_permission,
1113 .setattr = gfs2_setattr,
1114 .getattr = gfs2_getattr,
1115 .setxattr = gfs2_setxattr,
1116 .getxattr = gfs2_getxattr,
1117 .listxattr = gfs2_listxattr,
1118 .removexattr = gfs2_removexattr,
1119};
1120
1121struct inode_operations gfs2_dir_iops = {
1122 .create = gfs2_create,
1123 .lookup = gfs2_lookup,
1124 .link = gfs2_link,
1125 .unlink = gfs2_unlink,
1126 .symlink = gfs2_symlink,
1127 .mkdir = gfs2_mkdir,
1128 .rmdir = gfs2_rmdir,
1129 .mknod = gfs2_mknod,
1130 .rename = gfs2_rename,
1131 .permission = gfs2_permission,
1132 .setattr = gfs2_setattr,
1133 .getattr = gfs2_getattr,
1134 .setxattr = gfs2_setxattr,
1135 .getxattr = gfs2_getxattr,
1136 .listxattr = gfs2_listxattr,
1137 .removexattr = gfs2_removexattr,
1138};
1139
1140struct inode_operations gfs2_symlink_iops = {
1141 .readlink = gfs2_readlink,
1142 .follow_link = gfs2_follow_link,
1143 .permission = gfs2_permission,
1144 .setattr = gfs2_setattr,
1145 .getattr = gfs2_getattr,
1146 .setxattr = gfs2_setxattr,
1147 .getxattr = gfs2_getxattr,
1148 .listxattr = gfs2_listxattr,
1149 .removexattr = gfs2_removexattr,
1150};
1151
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
new file mode 100644
index 000000000000..b15acb4fd34c
--- /dev/null
+++ b/fs/gfs2/ops_inode.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_INODE_DOT_H__
11#define __OPS_INODE_DOT_H__
12
13#include <linux/fs.h>
14
15extern struct inode_operations gfs2_file_iops;
16extern struct inode_operations gfs2_dir_iops;
17extern struct inode_operations gfs2_symlink_iops;
18extern struct inode_operations gfs2_dev_iops;
19
20#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
new file mode 100644
index 000000000000..06f06f7773d0
--- /dev/null
+++ b/fs/gfs2/ops_super.c
@@ -0,0 +1,468 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/statfs.h>
16#include <linux/seq_file.h>
17#include <linux/mount.h>
18#include <linux/kthread.h>
19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h>
22#include <linux/lm_interface.h>
23
24#include "gfs2.h"
25#include "incore.h"
26#include "glock.h"
27#include "inode.h"
28#include "lm.h"
29#include "log.h"
30#include "mount.h"
31#include "ops_super.h"
32#include "quota.h"
33#include "recovery.h"
34#include "rgrp.h"
35#include "super.h"
36#include "sys.h"
37#include "util.h"
38#include "trans.h"
39#include "dir.h"
40#include "eattr.h"
41#include "bmap.h"
42
43/**
44 * gfs2_write_inode - Make sure the inode is stable on the disk
45 * @inode: The inode
46 * @sync: synchronous write flag
47 *
48 * Returns: errno
49 */
50
51static int gfs2_write_inode(struct inode *inode, int sync)
52{
53 struct gfs2_inode *ip = GFS2_I(inode);
54
55 /* Check this is a "normal" inode */
56 if (inode->i_private) {
57 if (current->flags & PF_MEMALLOC)
58 return 0;
59 if (sync)
60 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
61 }
62
63 return 0;
64}
65
66/**
67 * gfs2_put_super - Unmount the filesystem
68 * @sb: The VFS superblock
69 *
70 */
71
72static void gfs2_put_super(struct super_block *sb)
73{
74 struct gfs2_sbd *sdp = sb->s_fs_info;
75 int error;
76
77 if (!sdp)
78 return;
79
80 if (!strncmp(sb->s_type->name, "gfs2meta", 8))
81 return; /* Nothing to do */
82
83 /* Unfreeze the filesystem, if we need to */
84
85 mutex_lock(&sdp->sd_freeze_lock);
86 if (sdp->sd_freeze_count)
87 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
88 mutex_unlock(&sdp->sd_freeze_lock);
89
90 kthread_stop(sdp->sd_quotad_process);
91 kthread_stop(sdp->sd_logd_process);
92 kthread_stop(sdp->sd_recoverd_process);
93 while (sdp->sd_glockd_num--)
94 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
95 kthread_stop(sdp->sd_scand_process);
96
97 if (!(sb->s_flags & MS_RDONLY)) {
98 error = gfs2_make_fs_ro(sdp);
99 if (error)
100 gfs2_io_error(sdp);
101 }
102 /* At this point, we're through modifying the disk */
103
104 /* Release stuff */
105
106 iput(sdp->sd_master_dir);
107 iput(sdp->sd_jindex);
108 iput(sdp->sd_inum_inode);
109 iput(sdp->sd_statfs_inode);
110 iput(sdp->sd_rindex);
111 iput(sdp->sd_quota_inode);
112
113 gfs2_glock_put(sdp->sd_rename_gl);
114 gfs2_glock_put(sdp->sd_trans_gl);
115
116 if (!sdp->sd_args.ar_spectator) {
117 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
118 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
119 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
120 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
121 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
122 iput(sdp->sd_ir_inode);
123 iput(sdp->sd_sc_inode);
124 iput(sdp->sd_qc_inode);
125 }
126
127 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
128 gfs2_clear_rgrpd(sdp);
129 gfs2_jindex_free(sdp);
130 /* Take apart glock structures and buffer lists */
131 gfs2_gl_hash_clear(sdp, WAIT);
132 /* Unmount the locking protocol */
133 gfs2_lm_unmount(sdp);
134
135 /* At this point, we're through participating in the lockspace */
136 gfs2_sys_fs_del(sdp);
137 kfree(sdp);
138}
139
140/**
141 * gfs2_write_super - disk commit all incore transactions
142 * @sb: the filesystem
143 *
144 * This function is called every time sync(2) is called.
145 * After this exits, all dirty buffers are synced.
146 */
147
148static void gfs2_write_super(struct super_block *sb)
149{
150 gfs2_log_flush(sb->s_fs_info, NULL);
151}
152
153/**
154 * gfs2_write_super_lockfs - prevent further writes to the filesystem
155 * @sb: the VFS structure for the filesystem
156 *
157 */
158
159static void gfs2_write_super_lockfs(struct super_block *sb)
160{
161 struct gfs2_sbd *sdp = sb->s_fs_info;
162 int error;
163
164 for (;;) {
165 error = gfs2_freeze_fs(sdp);
166 if (!error)
167 break;
168
169 switch (error) {
170 case -EBUSY:
171 fs_err(sdp, "waiting for recovery before freeze\n");
172 break;
173
174 default:
175 fs_err(sdp, "error freezing FS: %d\n", error);
176 break;
177 }
178
179 fs_err(sdp, "retrying...\n");
180 msleep(1000);
181 }
182}
183
184/**
185 * gfs2_unlockfs - reallow writes to the filesystem
186 * @sb: the VFS structure for the filesystem
187 *
188 */
189
190static void gfs2_unlockfs(struct super_block *sb)
191{
192 gfs2_unfreeze_fs(sb->s_fs_info);
193}
194
195/**
196 * gfs2_statfs - Gather and return stats about the filesystem
197 * @sb: The superblock
198 * @statfsbuf: The buffer
199 *
200 * Returns: 0 on success or error code
201 */
202
203static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
204{
205 struct super_block *sb = dentry->d_inode->i_sb;
206 struct gfs2_sbd *sdp = sb->s_fs_info;
207 struct gfs2_statfs_change sc;
208 int error;
209
210 if (gfs2_tune_get(sdp, gt_statfs_slow))
211 error = gfs2_statfs_slow(sdp, &sc);
212 else
213 error = gfs2_statfs_i(sdp, &sc);
214
215 if (error)
216 return error;
217
218 buf->f_type = GFS2_MAGIC;
219 buf->f_bsize = sdp->sd_sb.sb_bsize;
220 buf->f_blocks = sc.sc_total;
221 buf->f_bfree = sc.sc_free;
222 buf->f_bavail = sc.sc_free;
223 buf->f_files = sc.sc_dinodes + sc.sc_free;
224 buf->f_ffree = sc.sc_free;
225 buf->f_namelen = GFS2_FNAMESIZE;
226
227 return 0;
228}
229
230/**
231 * gfs2_remount_fs - called when the FS is remounted
232 * @sb: the filesystem
233 * @flags: the remount flags
234 * @data: extra data passed in (not used right now)
235 *
236 * Returns: errno
237 */
238
239static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
240{
241 struct gfs2_sbd *sdp = sb->s_fs_info;
242 int error;
243
244 error = gfs2_mount_args(sdp, data, 1);
245 if (error)
246 return error;
247
248 if (sdp->sd_args.ar_spectator)
249 *flags |= MS_RDONLY;
250 else {
251 if (*flags & MS_RDONLY) {
252 if (!(sb->s_flags & MS_RDONLY))
253 error = gfs2_make_fs_ro(sdp);
254 } else if (!(*flags & MS_RDONLY) &&
255 (sb->s_flags & MS_RDONLY)) {
256 error = gfs2_make_fs_rw(sdp);
257 }
258 }
259
260 if (*flags & (MS_NOATIME | MS_NODIRATIME))
261 set_bit(SDF_NOATIME, &sdp->sd_flags);
262 else
263 clear_bit(SDF_NOATIME, &sdp->sd_flags);
264
265 /* Don't let the VFS update atimes. GFS2 handles this itself. */
266 *flags |= MS_NOATIME | MS_NODIRATIME;
267
268 return error;
269}
270
271/**
272 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
273 * @inode: The VFS inode
274 *
275 */
276
277static void gfs2_clear_inode(struct inode *inode)
278{
279 /* This tells us its a "real" inode and not one which only
280 * serves to contain an address space (see rgrp.c, meta_io.c)
281 * which therefore doesn't have its own glocks.
282 */
283 if (inode->i_private) {
284 struct gfs2_inode *ip = GFS2_I(inode);
285 gfs2_glock_inode_squish(inode);
286 gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED);
287 ip->i_gl->gl_object = NULL;
288 gfs2_glock_schedule_for_reclaim(ip->i_gl);
289 gfs2_glock_put(ip->i_gl);
290 ip->i_gl = NULL;
291 if (ip->i_iopen_gh.gh_gl)
292 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
293 }
294}
295
296/**
297 * gfs2_show_options - Show mount options for /proc/mounts
298 * @s: seq_file structure
299 * @mnt: vfsmount
300 *
301 * Returns: 0 on success or error code
302 */
303
304static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
305{
306 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
307 struct gfs2_args *args = &sdp->sd_args;
308
309 if (args->ar_lockproto[0])
310 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
311 if (args->ar_locktable[0])
312 seq_printf(s, ",locktable=%s", args->ar_locktable);
313 if (args->ar_hostdata[0])
314 seq_printf(s, ",hostdata=%s", args->ar_hostdata);
315 if (args->ar_spectator)
316 seq_printf(s, ",spectator");
317 if (args->ar_ignore_local_fs)
318 seq_printf(s, ",ignore_local_fs");
319 if (args->ar_localflocks)
320 seq_printf(s, ",localflocks");
321 if (args->ar_localcaching)
322 seq_printf(s, ",localcaching");
323 if (args->ar_debug)
324 seq_printf(s, ",debug");
325 if (args->ar_upgrade)
326 seq_printf(s, ",upgrade");
327 if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
328 seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
329 if (args->ar_posix_acl)
330 seq_printf(s, ",acl");
331 if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
332 char *state;
333 switch (args->ar_quota) {
334 case GFS2_QUOTA_OFF:
335 state = "off";
336 break;
337 case GFS2_QUOTA_ACCOUNT:
338 state = "account";
339 break;
340 case GFS2_QUOTA_ON:
341 state = "on";
342 break;
343 default:
344 state = "unknown";
345 break;
346 }
347 seq_printf(s, ",quota=%s", state);
348 }
349 if (args->ar_suiddir)
350 seq_printf(s, ",suiddir");
351 if (args->ar_data != GFS2_DATA_DEFAULT) {
352 char *state;
353 switch (args->ar_data) {
354 case GFS2_DATA_WRITEBACK:
355 state = "writeback";
356 break;
357 case GFS2_DATA_ORDERED:
358 state = "ordered";
359 break;
360 default:
361 state = "unknown";
362 break;
363 }
364 seq_printf(s, ",data=%s", state);
365 }
366
367 return 0;
368}
369
370/*
371 * We have to (at the moment) hold the inodes main lock to cover
372 * the gap between unlocking the shared lock on the iopen lock and
373 * taking the exclusive lock. I'd rather do a shared -> exclusive
374 * conversion on the iopen lock, but we can change that later. This
375 * is safe, just less efficient.
376 */
377static void gfs2_delete_inode(struct inode *inode)
378{
379 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
380 struct gfs2_inode *ip = GFS2_I(inode);
381 struct gfs2_holder gh;
382 int error;
383
384 if (!inode->i_private)
385 goto out;
386
387 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh);
388 if (unlikely(error)) {
389 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
390 goto out;
391 }
392
393 gfs2_glock_dq(&ip->i_iopen_gh);
394 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
395 error = gfs2_glock_nq(&ip->i_iopen_gh);
396 if (error)
397 goto out_uninit;
398
399 if (S_ISDIR(ip->i_di.di_mode) &&
400 (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
401 error = gfs2_dir_exhash_dealloc(ip);
402 if (error)
403 goto out_unlock;
404 }
405
406 if (ip->i_di.di_eattr) {
407 error = gfs2_ea_dealloc(ip);
408 if (error)
409 goto out_unlock;
410 }
411
412 if (!gfs2_is_stuffed(ip)) {
413 error = gfs2_file_dealloc(ip);
414 if (error)
415 goto out_unlock;
416 }
417
418 error = gfs2_dinode_dealloc(ip);
419
420out_unlock:
421 gfs2_glock_dq(&ip->i_iopen_gh);
422out_uninit:
423 gfs2_holder_uninit(&ip->i_iopen_gh);
424 gfs2_glock_dq_uninit(&gh);
425 if (error)
426 fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
427out:
428 truncate_inode_pages(&inode->i_data, 0);
429 clear_inode(inode);
430}
431
432
433
434static struct inode *gfs2_alloc_inode(struct super_block *sb)
435{
436 struct gfs2_sbd *sdp = sb->s_fs_info;
437 struct gfs2_inode *ip;
438
439 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
440 if (ip) {
441 ip->i_flags = 0;
442 ip->i_gl = NULL;
443 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
444 ip->i_last_pfault = jiffies;
445 }
446 return &ip->i_inode;
447}
448
449static void gfs2_destroy_inode(struct inode *inode)
450{
451 kmem_cache_free(gfs2_inode_cachep, inode);
452}
453
454struct super_operations gfs2_super_ops = {
455 .alloc_inode = gfs2_alloc_inode,
456 .destroy_inode = gfs2_destroy_inode,
457 .write_inode = gfs2_write_inode,
458 .delete_inode = gfs2_delete_inode,
459 .put_super = gfs2_put_super,
460 .write_super = gfs2_write_super,
461 .write_super_lockfs = gfs2_write_super_lockfs,
462 .unlockfs = gfs2_unlockfs,
463 .statfs = gfs2_statfs,
464 .remount_fs = gfs2_remount_fs,
465 .clear_inode = gfs2_clear_inode,
466 .show_options = gfs2_show_options,
467};
468
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
new file mode 100644
index 000000000000..9de73f042f78
--- /dev/null
+++ b/fs/gfs2/ops_super.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_SUPER_DOT_H__
11#define __OPS_SUPER_DOT_H__
12
13#include <linux/fs.h>
14
15extern struct super_operations gfs2_super_ops;
16
17#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
new file mode 100644
index 000000000000..5453d2947ab3
--- /dev/null
+++ b/fs/gfs2/ops_vm.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "bmap.h"
23#include "glock.h"
24#include "inode.h"
25#include "ops_vm.h"
26#include "quota.h"
27#include "rgrp.h"
28#include "trans.h"
29#include "util.h"
30
31static void pfault_be_greedy(struct gfs2_inode *ip)
32{
33 unsigned int time;
34
35 spin_lock(&ip->i_spin);
36 time = ip->i_greedy;
37 ip->i_last_pfault = jiffies;
38 spin_unlock(&ip->i_spin);
39
40 igrab(&ip->i_inode);
41 if (gfs2_glock_be_greedy(ip->i_gl, time))
42 iput(&ip->i_inode);
43}
44
45static struct page *gfs2_private_nopage(struct vm_area_struct *area,
46 unsigned long address, int *type)
47{
48 struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
49 struct page *result;
50
51 set_bit(GIF_PAGED, &ip->i_flags);
52
53 result = filemap_nopage(area, address, type);
54
55 if (result && result != NOPAGE_OOM)
56 pfault_be_greedy(ip);
57
58 return result;
59}
60
61static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
62{
63 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
64 unsigned long index = page->index;
65 u64 lblock = index << (PAGE_CACHE_SHIFT -
66 sdp->sd_sb.sb_bsize_shift);
67 unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
68 struct gfs2_alloc *al;
69 unsigned int data_blocks, ind_blocks;
70 unsigned int x;
71 int error;
72
73 al = gfs2_alloc_get(ip);
74
75 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
76 if (error)
77 goto out;
78
79 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
80 if (error)
81 goto out_gunlock_q;
82
83 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
84
85 al->al_requested = data_blocks + ind_blocks;
86
87 error = gfs2_inplace_reserve(ip);
88 if (error)
89 goto out_gunlock_q;
90
91 error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
92 ind_blocks + RES_DINODE +
93 RES_STATFS + RES_QUOTA, 0);
94 if (error)
95 goto out_ipres;
96
97 if (gfs2_is_stuffed(ip)) {
98 error = gfs2_unstuff_dinode(ip, NULL);
99 if (error)
100 goto out_trans;
101 }
102
103 for (x = 0; x < blocks; ) {
104 u64 dblock;
105 unsigned int extlen;
106 int new = 1;
107
108 error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
109 if (error)
110 goto out_trans;
111
112 lblock += extlen;
113 x += extlen;
114 }
115
116 gfs2_assert_warn(sdp, al->al_alloced);
117
118out_trans:
119 gfs2_trans_end(sdp);
120out_ipres:
121 gfs2_inplace_release(ip);
122out_gunlock_q:
123 gfs2_quota_unlock(ip);
124out:
125 gfs2_alloc_put(ip);
126 return error;
127}
128
129static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
130 unsigned long address, int *type)
131{
132 struct file *file = area->vm_file;
133 struct gfs2_file *gf = file->private_data;
134 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
135 struct gfs2_holder i_gh;
136 struct page *result = NULL;
137 unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
138 area->vm_pgoff;
139 int alloc_required;
140 int error;
141
142 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
143 if (error)
144 return NULL;
145
146 set_bit(GIF_PAGED, &ip->i_flags);
147 set_bit(GIF_SW_PAGED, &ip->i_flags);
148
149 error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT,
150 PAGE_CACHE_SIZE, &alloc_required);
151 if (error)
152 goto out;
153
154 set_bit(GFF_EXLOCK, &gf->f_flags);
155 result = filemap_nopage(area, address, type);
156 clear_bit(GFF_EXLOCK, &gf->f_flags);
157 if (!result || result == NOPAGE_OOM)
158 goto out;
159
160 if (alloc_required) {
161 error = alloc_page_backing(ip, result);
162 if (error) {
163 page_cache_release(result);
164 result = NULL;
165 goto out;
166 }
167 set_page_dirty(result);
168 }
169
170 pfault_be_greedy(ip);
171out:
172 gfs2_glock_dq_uninit(&i_gh);
173
174 return result;
175}
176
177struct vm_operations_struct gfs2_vm_ops_private = {
178 .nopage = gfs2_private_nopage,
179};
180
181struct vm_operations_struct gfs2_vm_ops_sharewrite = {
182 .nopage = gfs2_sharewrite_nopage,
183};
184
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h
new file mode 100644
index 000000000000..4ae8f43ed5e3
--- /dev/null
+++ b/fs/gfs2/ops_vm.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_VM_DOT_H__
11#define __OPS_VM_DOT_H__
12
13#include <linux/mm.h>
14
15extern struct vm_operations_struct gfs2_vm_ops_private;
16extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
17
18#endif /* __OPS_VM_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
new file mode 100644
index 000000000000..c69b94a55588
--- /dev/null
+++ b/fs/gfs2/quota.c
@@ -0,0 +1,1227 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10/*
11 * Quota change tags are associated with each transaction that allocates or
12 * deallocates space. Those changes are accumulated locally to each node (in a
13 * per-node file) and then are periodically synced to the quota file. This
14 * avoids the bottleneck of constantly touching the quota file, but introduces
15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need to a quota check
19 * program to be run on node crashes or anything like that.
20 *
21 * There are couple of knobs that let the administrator manage the quota
22 * fuzziness. "quota_quantum" sets the maximum time a quota change can be
23 * sitting on one node before being synced to the quota file. (The default is
24 * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency
25 * of quota file syncs increases as the user moves closer to their limit. The
26 * more frequent the syncs, the more accurate the quota enforcement, but that
27 * means that there is more contention between the nodes for the quota file.
28 * The default value is one. This sets the maximum theoretical quota overrun
29 * (with infinite node with infinite bandwidth) to twice the user's limit. (In
30 * practice, the maximum overrun you see should be much less.) A "quota_scale"
31 * number greater than one makes quota syncs more frequent and reduces the
32 * maximum overrun. Numbers less than one (but greater than zero) make quota
33 * syncs less frequent.
34 *
35 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of
36 * the quota file, so it is not being constantly read.
37 */
38
39#include <linux/sched.h>
40#include <linux/slab.h>
41#include <linux/spinlock.h>
42#include <linux/completion.h>
43#include <linux/buffer_head.h>
44#include <linux/sort.h>
45#include <linux/fs.h>
46#include <linux/bio.h>
47#include <linux/gfs2_ondisk.h>
48#include <linux/lm_interface.h>
49
50#include "gfs2.h"
51#include "incore.h"
52#include "bmap.h"
53#include "glock.h"
54#include "glops.h"
55#include "log.h"
56#include "meta_io.h"
57#include "quota.h"
58#include "rgrp.h"
59#include "super.h"
60#include "trans.h"
61#include "inode.h"
62#include "ops_file.h"
63#include "ops_address.h"
64#include "util.h"
65
66#define QUOTA_USER 1
67#define QUOTA_GROUP 0
68
69static u64 qd2offset(struct gfs2_quota_data *qd)
70{
71 u64 offset;
72
73 offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
74 offset *= sizeof(struct gfs2_quota);
75
76 return offset;
77}
78
79static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
80 struct gfs2_quota_data **qdp)
81{
82 struct gfs2_quota_data *qd;
83 int error;
84
85 qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL);
86 if (!qd)
87 return -ENOMEM;
88
89 qd->qd_count = 1;
90 qd->qd_id = id;
91 if (user)
92 set_bit(QDF_USER, &qd->qd_flags);
93 qd->qd_slot = -1;
94
95 error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
96 &gfs2_quota_glops, CREATE, &qd->qd_gl);
97 if (error)
98 goto fail;
99
100 error = gfs2_lvb_hold(qd->qd_gl);
101 gfs2_glock_put(qd->qd_gl);
102 if (error)
103 goto fail;
104
105 *qdp = qd;
106
107 return 0;
108
109fail:
110 kfree(qd);
111 return error;
112}
113
114static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
115 struct gfs2_quota_data **qdp)
116{
117 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
118 int error, found;
119
120 *qdp = NULL;
121
122 for (;;) {
123 found = 0;
124 spin_lock(&sdp->sd_quota_spin);
125 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
126 if (qd->qd_id == id &&
127 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
128 qd->qd_count++;
129 found = 1;
130 break;
131 }
132 }
133
134 if (!found)
135 qd = NULL;
136
137 if (!qd && new_qd) {
138 qd = new_qd;
139 list_add(&qd->qd_list, &sdp->sd_quota_list);
140 atomic_inc(&sdp->sd_quota_count);
141 new_qd = NULL;
142 }
143
144 spin_unlock(&sdp->sd_quota_spin);
145
146 if (qd || !create) {
147 if (new_qd) {
148 gfs2_lvb_unhold(new_qd->qd_gl);
149 kfree(new_qd);
150 }
151 *qdp = qd;
152 return 0;
153 }
154
155 error = qd_alloc(sdp, user, id, &new_qd);
156 if (error)
157 return error;
158 }
159}
160
161static void qd_hold(struct gfs2_quota_data *qd)
162{
163 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
164
165 spin_lock(&sdp->sd_quota_spin);
166 gfs2_assert(sdp, qd->qd_count);
167 qd->qd_count++;
168 spin_unlock(&sdp->sd_quota_spin);
169}
170
171static void qd_put(struct gfs2_quota_data *qd)
172{
173 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
174 spin_lock(&sdp->sd_quota_spin);
175 gfs2_assert(sdp, qd->qd_count);
176 if (!--qd->qd_count)
177 qd->qd_last_touched = jiffies;
178 spin_unlock(&sdp->sd_quota_spin);
179}
180
181static int slot_get(struct gfs2_quota_data *qd)
182{
183 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
184 unsigned int c, o = 0, b;
185 unsigned char byte = 0;
186
187 spin_lock(&sdp->sd_quota_spin);
188
189 if (qd->qd_slot_count++) {
190 spin_unlock(&sdp->sd_quota_spin);
191 return 0;
192 }
193
194 for (c = 0; c < sdp->sd_quota_chunks; c++)
195 for (o = 0; o < PAGE_SIZE; o++) {
196 byte = sdp->sd_quota_bitmap[c][o];
197 if (byte != 0xFF)
198 goto found;
199 }
200
201 goto fail;
202
203found:
204 for (b = 0; b < 8; b++)
205 if (!(byte & (1 << b)))
206 break;
207 qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
208
209 if (qd->qd_slot >= sdp->sd_quota_slots)
210 goto fail;
211
212 sdp->sd_quota_bitmap[c][o] |= 1 << b;
213
214 spin_unlock(&sdp->sd_quota_spin);
215
216 return 0;
217
218fail:
219 qd->qd_slot_count--;
220 spin_unlock(&sdp->sd_quota_spin);
221 return -ENOSPC;
222}
223
224static void slot_hold(struct gfs2_quota_data *qd)
225{
226 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
227
228 spin_lock(&sdp->sd_quota_spin);
229 gfs2_assert(sdp, qd->qd_slot_count);
230 qd->qd_slot_count++;
231 spin_unlock(&sdp->sd_quota_spin);
232}
233
234static void slot_put(struct gfs2_quota_data *qd)
235{
236 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
237
238 spin_lock(&sdp->sd_quota_spin);
239 gfs2_assert(sdp, qd->qd_slot_count);
240 if (!--qd->qd_slot_count) {
241 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
242 qd->qd_slot = -1;
243 }
244 spin_unlock(&sdp->sd_quota_spin);
245}
246
247static int bh_get(struct gfs2_quota_data *qd)
248{
249 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
250 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
251 unsigned int block, offset;
252 struct buffer_head *bh;
253 int error;
254 struct buffer_head bh_map;
255
256 mutex_lock(&sdp->sd_quota_mutex);
257
258 if (qd->qd_bh_count++) {
259 mutex_unlock(&sdp->sd_quota_mutex);
260 return 0;
261 }
262
263 block = qd->qd_slot / sdp->sd_qc_per_block;
264 offset = qd->qd_slot % sdp->sd_qc_per_block;;
265
266 error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map, 1);
267 if (error)
268 goto fail;
269 error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh);
270 if (error)
271 goto fail;
272 error = -EIO;
273 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
274 goto fail_brelse;
275
276 qd->qd_bh = bh;
277 qd->qd_bh_qc = (struct gfs2_quota_change *)
278 (bh->b_data + sizeof(struct gfs2_meta_header) +
279 offset * sizeof(struct gfs2_quota_change));
280
281 mutex_lock(&sdp->sd_quota_mutex);
282
283 return 0;
284
285fail_brelse:
286 brelse(bh);
287fail:
288 qd->qd_bh_count--;
289 mutex_unlock(&sdp->sd_quota_mutex);
290 return error;
291}
292
293static void bh_put(struct gfs2_quota_data *qd)
294{
295 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
296
297 mutex_lock(&sdp->sd_quota_mutex);
298 gfs2_assert(sdp, qd->qd_bh_count);
299 if (!--qd->qd_bh_count) {
300 brelse(qd->qd_bh);
301 qd->qd_bh = NULL;
302 qd->qd_bh_qc = NULL;
303 }
304 mutex_unlock(&sdp->sd_quota_mutex);
305}
306
307static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
308{
309 struct gfs2_quota_data *qd = NULL;
310 int error;
311 int found = 0;
312
313 *qdp = NULL;
314
315 if (sdp->sd_vfs->s_flags & MS_RDONLY)
316 return 0;
317
318 spin_lock(&sdp->sd_quota_spin);
319
320 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
321 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
322 !test_bit(QDF_CHANGE, &qd->qd_flags) ||
323 qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
324 continue;
325
326 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
327
328 set_bit(QDF_LOCKED, &qd->qd_flags);
329 gfs2_assert_warn(sdp, qd->qd_count);
330 qd->qd_count++;
331 qd->qd_change_sync = qd->qd_change;
332 gfs2_assert_warn(sdp, qd->qd_slot_count);
333 qd->qd_slot_count++;
334 found = 1;
335
336 break;
337 }
338
339 if (!found)
340 qd = NULL;
341
342 spin_unlock(&sdp->sd_quota_spin);
343
344 if (qd) {
345 gfs2_assert_warn(sdp, qd->qd_change_sync);
346 error = bh_get(qd);
347 if (error) {
348 clear_bit(QDF_LOCKED, &qd->qd_flags);
349 slot_put(qd);
350 qd_put(qd);
351 return error;
352 }
353 }
354
355 *qdp = qd;
356
357 return 0;
358}
359
360static int qd_trylock(struct gfs2_quota_data *qd)
361{
362 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
363
364 if (sdp->sd_vfs->s_flags & MS_RDONLY)
365 return 0;
366
367 spin_lock(&sdp->sd_quota_spin);
368
369 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
370 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
371 spin_unlock(&sdp->sd_quota_spin);
372 return 0;
373 }
374
375 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
376
377 set_bit(QDF_LOCKED, &qd->qd_flags);
378 gfs2_assert_warn(sdp, qd->qd_count);
379 qd->qd_count++;
380 qd->qd_change_sync = qd->qd_change;
381 gfs2_assert_warn(sdp, qd->qd_slot_count);
382 qd->qd_slot_count++;
383
384 spin_unlock(&sdp->sd_quota_spin);
385
386 gfs2_assert_warn(sdp, qd->qd_change_sync);
387 if (bh_get(qd)) {
388 clear_bit(QDF_LOCKED, &qd->qd_flags);
389 slot_put(qd);
390 qd_put(qd);
391 return 0;
392 }
393
394 return 1;
395}
396
397static void qd_unlock(struct gfs2_quota_data *qd)
398{
399 gfs2_assert_warn(qd->qd_gl->gl_sbd,
400 test_bit(QDF_LOCKED, &qd->qd_flags));
401 clear_bit(QDF_LOCKED, &qd->qd_flags);
402 bh_put(qd);
403 slot_put(qd);
404 qd_put(qd);
405}
406
407static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
408 struct gfs2_quota_data **qdp)
409{
410 int error;
411
412 error = qd_get(sdp, user, id, create, qdp);
413 if (error)
414 return error;
415
416 error = slot_get(*qdp);
417 if (error)
418 goto fail;
419
420 error = bh_get(*qdp);
421 if (error)
422 goto fail_slot;
423
424 return 0;
425
426fail_slot:
427 slot_put(*qdp);
428fail:
429 qd_put(*qdp);
430 return error;
431}
432
433static void qdsb_put(struct gfs2_quota_data *qd)
434{
435 bh_put(qd);
436 slot_put(qd);
437 qd_put(qd);
438}
439
440int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
441{
442 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
443 struct gfs2_alloc *al = &ip->i_alloc;
444 struct gfs2_quota_data **qd = al->al_qd;
445 int error;
446
447 if (gfs2_assert_warn(sdp, !al->al_qd_num) ||
448 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
449 return -EIO;
450
451 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
452 return 0;
453
454 error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
455 if (error)
456 goto out;
457 al->al_qd_num++;
458 qd++;
459
460 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
461 if (error)
462 goto out;
463 al->al_qd_num++;
464 qd++;
465
466 if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
467 error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
468 if (error)
469 goto out;
470 al->al_qd_num++;
471 qd++;
472 }
473
474 if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
475 error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
476 if (error)
477 goto out;
478 al->al_qd_num++;
479 qd++;
480 }
481
482out:
483 if (error)
484 gfs2_quota_unhold(ip);
485 return error;
486}
487
488void gfs2_quota_unhold(struct gfs2_inode *ip)
489{
490 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
491 struct gfs2_alloc *al = &ip->i_alloc;
492 unsigned int x;
493
494 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
495
496 for (x = 0; x < al->al_qd_num; x++) {
497 qdsb_put(al->al_qd[x]);
498 al->al_qd[x] = NULL;
499 }
500 al->al_qd_num = 0;
501}
502
503static int sort_qd(const void *a, const void *b)
504{
505 const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a;
506 const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b;
507
508 if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
509 !test_bit(QDF_USER, &qd_b->qd_flags)) {
510 if (test_bit(QDF_USER, &qd_a->qd_flags))
511 return -1;
512 else
513 return 1;
514 }
515 if (qd_a->qd_id < qd_b->qd_id)
516 return -1;
517 if (qd_a->qd_id > qd_b->qd_id)
518 return 1;
519
520 return 0;
521}
522
523static void do_qc(struct gfs2_quota_data *qd, s64 change)
524{
525 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
526 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
527 struct gfs2_quota_change *qc = qd->qd_bh_qc;
528 s64 x;
529
530 mutex_lock(&sdp->sd_quota_mutex);
531 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1);
532
533 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
534 qc->qc_change = 0;
535 qc->qc_flags = 0;
536 if (test_bit(QDF_USER, &qd->qd_flags))
537 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
538 qc->qc_id = cpu_to_be32(qd->qd_id);
539 }
540
541 x = qc->qc_change;
542 x = be64_to_cpu(x) + change;
543 qc->qc_change = cpu_to_be64(x);
544
545 spin_lock(&sdp->sd_quota_spin);
546 qd->qd_change = x;
547 spin_unlock(&sdp->sd_quota_spin);
548
549 if (!x) {
550 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
551 clear_bit(QDF_CHANGE, &qd->qd_flags);
552 qc->qc_flags = 0;
553 qc->qc_id = 0;
554 slot_put(qd);
555 qd_put(qd);
556 } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
557 qd_hold(qd);
558 slot_hold(qd);
559 }
560
561 mutex_unlock(&sdp->sd_quota_mutex);
562}
563
564/**
565 * gfs2_adjust_quota
566 *
567 * This function was mostly borrowed from gfs2_block_truncate_page which was
568 * in turn mostly borrowed from ext3
569 */
570static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
571 s64 change, struct gfs2_quota_data *qd)
572{
573 struct inode *inode = &ip->i_inode;
574 struct address_space *mapping = inode->i_mapping;
575 unsigned long index = loc >> PAGE_CACHE_SHIFT;
576 unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
577 unsigned blocksize, iblock, pos;
578 struct buffer_head *bh;
579 struct page *page;
580 void *kaddr;
581 __be64 *ptr;
582 s64 value;
583 int err = -EIO;
584
585 page = grab_cache_page(mapping, index);
586 if (!page)
587 return -ENOMEM;
588
589 blocksize = inode->i_sb->s_blocksize;
590 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
591
592 if (!page_has_buffers(page))
593 create_empty_buffers(page, blocksize, 0);
594
595 bh = page_buffers(page);
596 pos = blocksize;
597 while (offset >= pos) {
598 bh = bh->b_this_page;
599 iblock++;
600 pos += blocksize;
601 }
602
603 if (!buffer_mapped(bh)) {
604 gfs2_get_block(inode, iblock, bh, 1);
605 if (!buffer_mapped(bh))
606 goto unlock;
607 }
608
609 if (PageUptodate(page))
610 set_buffer_uptodate(bh);
611
612 if (!buffer_uptodate(bh)) {
613 ll_rw_block(READ_META, 1, &bh);
614 wait_on_buffer(bh);
615 if (!buffer_uptodate(bh))
616 goto unlock;
617 }
618
619 gfs2_trans_add_bh(ip->i_gl, bh, 0);
620
621 kaddr = kmap_atomic(page, KM_USER0);
622 ptr = kaddr + offset;
623 value = (s64)be64_to_cpu(*ptr) + change;
624 *ptr = cpu_to_be64(value);
625 flush_dcache_page(page);
626 kunmap_atomic(kaddr, KM_USER0);
627 err = 0;
628 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
629 qd->qd_qb.qb_value = cpu_to_be64(value);
630unlock:
631 unlock_page(page);
632 page_cache_release(page);
633 return err;
634}
635
636static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
637{
638 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
639 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
640 unsigned int data_blocks, ind_blocks;
641 struct gfs2_holder *ghs, i_gh;
642 unsigned int qx, x;
643 struct gfs2_quota_data *qd;
644 loff_t offset;
645 unsigned int nalloc = 0;
646 struct gfs2_alloc *al = NULL;
647 int error;
648
649 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
650 &data_blocks, &ind_blocks);
651
652 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL);
653 if (!ghs)
654 return -ENOMEM;
655
656 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
657 for (qx = 0; qx < num_qd; qx++) {
658 error = gfs2_glock_nq_init(qda[qx]->qd_gl,
659 LM_ST_EXCLUSIVE,
660 GL_NOCACHE, &ghs[qx]);
661 if (error)
662 goto out;
663 }
664
665 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
666 if (error)
667 goto out;
668
669 for (x = 0; x < num_qd; x++) {
670 int alloc_required;
671
672 offset = qd2offset(qda[x]);
673 error = gfs2_write_alloc_required(ip, offset,
674 sizeof(struct gfs2_quota),
675 &alloc_required);
676 if (error)
677 goto out_gunlock;
678 if (alloc_required)
679 nalloc++;
680 }
681
682 if (nalloc) {
683 al = gfs2_alloc_get(ip);
684
685 al->al_requested = nalloc * (data_blocks + ind_blocks);
686
687 error = gfs2_inplace_reserve(ip);
688 if (error)
689 goto out_alloc;
690
691 error = gfs2_trans_begin(sdp,
692 al->al_rgd->rd_ri.ri_length +
693 num_qd * data_blocks +
694 nalloc * ind_blocks +
695 RES_DINODE + num_qd +
696 RES_STATFS, 0);
697 if (error)
698 goto out_ipres;
699 } else {
700 error = gfs2_trans_begin(sdp,
701 num_qd * data_blocks +
702 RES_DINODE + num_qd, 0);
703 if (error)
704 goto out_gunlock;
705 }
706
707 for (x = 0; x < num_qd; x++) {
708 qd = qda[x];
709 offset = qd2offset(qd);
710 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
711 (struct gfs2_quota_data *)
712 qd->qd_gl->gl_lvb);
713 if (error)
714 goto out_end_trans;
715
716 do_qc(qd, -qd->qd_change_sync);
717 }
718
719 error = 0;
720
721out_end_trans:
722 gfs2_trans_end(sdp);
723out_ipres:
724 if (nalloc)
725 gfs2_inplace_release(ip);
726out_alloc:
727 if (nalloc)
728 gfs2_alloc_put(ip);
729out_gunlock:
730 gfs2_glock_dq_uninit(&i_gh);
731out:
732 while (qx--)
733 gfs2_glock_dq_uninit(&ghs[qx]);
734 kfree(ghs);
735 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
736 return error;
737}
738
739static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
740 struct gfs2_holder *q_gh)
741{
742 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
743 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
744 struct gfs2_holder i_gh;
745 struct gfs2_quota q;
746 char buf[sizeof(struct gfs2_quota)];
747 struct file_ra_state ra_state;
748 int error;
749 struct gfs2_quota_lvb *qlvb;
750
751 file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
752restart:
753 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
754 if (error)
755 return error;
756
757 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
758
759 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
760 loff_t pos;
761 gfs2_glock_dq_uninit(q_gh);
762 error = gfs2_glock_nq_init(qd->qd_gl,
763 LM_ST_EXCLUSIVE, GL_NOCACHE,
764 q_gh);
765 if (error)
766 return error;
767
768 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
769 if (error)
770 goto fail;
771
772 memset(buf, 0, sizeof(struct gfs2_quota));
773 pos = qd2offset(qd);
774 error = gfs2_internal_read(ip, &ra_state, buf,
775 &pos, sizeof(struct gfs2_quota));
776 if (error < 0)
777 goto fail_gunlock;
778
779 gfs2_glock_dq_uninit(&i_gh);
780
781
782 gfs2_quota_in(&q, buf);
783 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
784 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
785 qlvb->__pad = 0;
786 qlvb->qb_limit = cpu_to_be64(q.qu_limit);
787 qlvb->qb_warn = cpu_to_be64(q.qu_warn);
788 qlvb->qb_value = cpu_to_be64(q.qu_value);
789 qd->qd_qb = *qlvb;
790
791 if (gfs2_glock_is_blocking(qd->qd_gl)) {
792 gfs2_glock_dq_uninit(q_gh);
793 force_refresh = 0;
794 goto restart;
795 }
796 }
797
798 return 0;
799
800fail_gunlock:
801 gfs2_glock_dq_uninit(&i_gh);
802fail:
803 gfs2_glock_dq_uninit(q_gh);
804 return error;
805}
806
807int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
808{
809 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
810 struct gfs2_alloc *al = &ip->i_alloc;
811 unsigned int x;
812 int error = 0;
813
814 gfs2_quota_hold(ip, uid, gid);
815
816 if (capable(CAP_SYS_RESOURCE) ||
817 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
818 return 0;
819
820 sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *),
821 sort_qd, NULL);
822
823 for (x = 0; x < al->al_qd_num; x++) {
824 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
825 if (error)
826 break;
827 }
828
829 if (!error)
830 set_bit(GIF_QD_LOCKED, &ip->i_flags);
831 else {
832 while (x--)
833 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
834 gfs2_quota_unhold(ip);
835 }
836
837 return error;
838}
839
840static int need_sync(struct gfs2_quota_data *qd)
841{
842 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
843 struct gfs2_tune *gt = &sdp->sd_tune;
844 s64 value;
845 unsigned int num, den;
846 int do_sync = 1;
847
848 if (!qd->qd_qb.qb_limit)
849 return 0;
850
851 spin_lock(&sdp->sd_quota_spin);
852 value = qd->qd_change;
853 spin_unlock(&sdp->sd_quota_spin);
854
855 spin_lock(&gt->gt_spin);
856 num = gt->gt_quota_scale_num;
857 den = gt->gt_quota_scale_den;
858 spin_unlock(&gt->gt_spin);
859
860 if (value < 0)
861 do_sync = 0;
862 else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >=
863 (s64)be64_to_cpu(qd->qd_qb.qb_limit))
864 do_sync = 0;
865 else {
866 value *= gfs2_jindex_size(sdp) * num;
867 do_div(value, den);
868 value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
869 if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
870 do_sync = 0;
871 }
872
873 return do_sync;
874}
875
876void gfs2_quota_unlock(struct gfs2_inode *ip)
877{
878 struct gfs2_alloc *al = &ip->i_alloc;
879 struct gfs2_quota_data *qda[4];
880 unsigned int count = 0;
881 unsigned int x;
882
883 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
884 goto out;
885
886 for (x = 0; x < al->al_qd_num; x++) {
887 struct gfs2_quota_data *qd;
888 int sync;
889
890 qd = al->al_qd[x];
891 sync = need_sync(qd);
892
893 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
894
895 if (sync && qd_trylock(qd))
896 qda[count++] = qd;
897 }
898
899 if (count) {
900 do_sync(count, qda);
901 for (x = 0; x < count; x++)
902 qd_unlock(qda[x]);
903 }
904
905out:
906 gfs2_quota_unhold(ip);
907}
908
909#define MAX_LINE 256
910
911static int print_message(struct gfs2_quota_data *qd, char *type)
912{
913 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
914
915 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\r\n",
916 sdp->sd_fsname, type,
917 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
918 qd->qd_id);
919
920 return 0;
921}
922
923int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
924{
925 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
926 struct gfs2_alloc *al = &ip->i_alloc;
927 struct gfs2_quota_data *qd;
928 s64 value;
929 unsigned int x;
930 int error = 0;
931
932 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
933 return 0;
934
935 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
936 return 0;
937
938 for (x = 0; x < al->al_qd_num; x++) {
939 qd = al->al_qd[x];
940
941 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
942 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
943 continue;
944
945 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
946 spin_lock(&sdp->sd_quota_spin);
947 value += qd->qd_change;
948 spin_unlock(&sdp->sd_quota_spin);
949
950 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
951 print_message(qd, "exceeded");
952 error = -EDQUOT;
953 break;
954 } else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
955 (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value &&
956 time_after_eq(jiffies, qd->qd_last_warn +
957 gfs2_tune_get(sdp,
958 gt_quota_warn_period) * HZ)) {
959 error = print_message(qd, "warning");
960 qd->qd_last_warn = jiffies;
961 }
962 }
963
964 return error;
965}
966
967void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
968 u32 uid, u32 gid)
969{
970 struct gfs2_alloc *al = &ip->i_alloc;
971 struct gfs2_quota_data *qd;
972 unsigned int x;
973 unsigned int found = 0;
974
975 if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
976 return;
977 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
978 return;
979
980 for (x = 0; x < al->al_qd_num; x++) {
981 qd = al->al_qd[x];
982
983 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
984 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
985 do_qc(qd, change);
986 found++;
987 }
988 }
989}
990
991int gfs2_quota_sync(struct gfs2_sbd *sdp)
992{
993 struct gfs2_quota_data **qda;
994 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
995 unsigned int num_qd;
996 unsigned int x;
997 int error = 0;
998
999 sdp->sd_quota_sync_gen++;
1000
1001 qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
1002 if (!qda)
1003 return -ENOMEM;
1004
1005 do {
1006 num_qd = 0;
1007
1008 for (;;) {
1009 error = qd_fish(sdp, qda + num_qd);
1010 if (error || !qda[num_qd])
1011 break;
1012 if (++num_qd == max_qd)
1013 break;
1014 }
1015
1016 if (num_qd) {
1017 if (!error)
1018 error = do_sync(num_qd, qda);
1019 if (!error)
1020 for (x = 0; x < num_qd; x++)
1021 qda[x]->qd_sync_gen =
1022 sdp->sd_quota_sync_gen;
1023
1024 for (x = 0; x < num_qd; x++)
1025 qd_unlock(qda[x]);
1026 }
1027 } while (!error && num_qd == max_qd);
1028
1029 kfree(qda);
1030
1031 return error;
1032}
1033
1034int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1035{
1036 struct gfs2_quota_data *qd;
1037 struct gfs2_holder q_gh;
1038 int error;
1039
1040 error = qd_get(sdp, user, id, CREATE, &qd);
1041 if (error)
1042 return error;
1043
1044 error = do_glock(qd, FORCE, &q_gh);
1045 if (!error)
1046 gfs2_glock_dq_uninit(&q_gh);
1047
1048 qd_put(qd);
1049
1050 return error;
1051}
1052
1053int gfs2_quota_init(struct gfs2_sbd *sdp)
1054{
1055 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
1056 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
1057 unsigned int x, slot = 0;
1058 unsigned int found = 0;
1059 u64 dblock;
1060 u32 extlen = 0;
1061 int error;
1062
1063 if (!ip->i_di.di_size || ip->i_di.di_size > (64 << 20) ||
1064 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
1065 gfs2_consist_inode(ip);
1066 return -EIO;
1067 }
1068 sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
1069 sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE);
1070
1071 error = -ENOMEM;
1072
1073 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
1074 sizeof(unsigned char *), GFP_KERNEL);
1075 if (!sdp->sd_quota_bitmap)
1076 return error;
1077
1078 for (x = 0; x < sdp->sd_quota_chunks; x++) {
1079 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
1080 if (!sdp->sd_quota_bitmap[x])
1081 goto fail;
1082 }
1083
1084 for (x = 0; x < blocks; x++) {
1085 struct buffer_head *bh;
1086 unsigned int y;
1087
1088 if (!extlen) {
1089 int new = 0;
1090 error = gfs2_extent_map(&ip->i_inode, x, &new, &dblock, &extlen);
1091 if (error)
1092 goto fail;
1093 }
1094 error = -EIO;
1095 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
1096 if (!bh)
1097 goto fail;
1098 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
1099 brelse(bh);
1100 goto fail;
1101 }
1102
1103 for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
1104 y++, slot++) {
1105 struct gfs2_quota_change qc;
1106 struct gfs2_quota_data *qd;
1107
1108 gfs2_quota_change_in(&qc, bh->b_data +
1109 sizeof(struct gfs2_meta_header) +
1110 y * sizeof(struct gfs2_quota_change));
1111 if (!qc.qc_change)
1112 continue;
1113
1114 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
1115 qc.qc_id, &qd);
1116 if (error) {
1117 brelse(bh);
1118 goto fail;
1119 }
1120
1121 set_bit(QDF_CHANGE, &qd->qd_flags);
1122 qd->qd_change = qc.qc_change;
1123 qd->qd_slot = slot;
1124 qd->qd_slot_count = 1;
1125 qd->qd_last_touched = jiffies;
1126
1127 spin_lock(&sdp->sd_quota_spin);
1128 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1129 list_add(&qd->qd_list, &sdp->sd_quota_list);
1130 atomic_inc(&sdp->sd_quota_count);
1131 spin_unlock(&sdp->sd_quota_spin);
1132
1133 found++;
1134 }
1135
1136 brelse(bh);
1137 dblock++;
1138 extlen--;
1139 }
1140
1141 if (found)
1142 fs_info(sdp, "found %u quota changes\n", found);
1143
1144 return 0;
1145
1146fail:
1147 gfs2_quota_cleanup(sdp);
1148 return error;
1149}
1150
1151void gfs2_quota_scan(struct gfs2_sbd *sdp)
1152{
1153 struct gfs2_quota_data *qd, *safe;
1154 LIST_HEAD(dead);
1155
1156 spin_lock(&sdp->sd_quota_spin);
1157 list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
1158 if (!qd->qd_count &&
1159 time_after_eq(jiffies, qd->qd_last_touched +
1160 gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
1161 list_move(&qd->qd_list, &dead);
1162 gfs2_assert_warn(sdp,
1163 atomic_read(&sdp->sd_quota_count) > 0);
1164 atomic_dec(&sdp->sd_quota_count);
1165 }
1166 }
1167 spin_unlock(&sdp->sd_quota_spin);
1168
1169 while (!list_empty(&dead)) {
1170 qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
1171 list_del(&qd->qd_list);
1172
1173 gfs2_assert_warn(sdp, !qd->qd_change);
1174 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1175 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1176
1177 gfs2_lvb_unhold(qd->qd_gl);
1178 kfree(qd);
1179 }
1180}
1181
1182void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1183{
1184 struct list_head *head = &sdp->sd_quota_list;
1185 struct gfs2_quota_data *qd;
1186 unsigned int x;
1187
1188 spin_lock(&sdp->sd_quota_spin);
1189 while (!list_empty(head)) {
1190 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1191
1192 if (qd->qd_count > 1 ||
1193 (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1194 list_move(&qd->qd_list, head);
1195 spin_unlock(&sdp->sd_quota_spin);
1196 schedule();
1197 spin_lock(&sdp->sd_quota_spin);
1198 continue;
1199 }
1200
1201 list_del(&qd->qd_list);
1202 atomic_dec(&sdp->sd_quota_count);
1203 spin_unlock(&sdp->sd_quota_spin);
1204
1205 if (!qd->qd_count) {
1206 gfs2_assert_warn(sdp, !qd->qd_change);
1207 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1208 } else
1209 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1210 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1211
1212 gfs2_lvb_unhold(qd->qd_gl);
1213 kfree(qd);
1214
1215 spin_lock(&sdp->sd_quota_spin);
1216 }
1217 spin_unlock(&sdp->sd_quota_spin);
1218
1219 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1220
1221 if (sdp->sd_quota_bitmap) {
1222 for (x = 0; x < sdp->sd_quota_chunks; x++)
1223 kfree(sdp->sd_quota_bitmap[x]);
1224 kfree(sdp->sd_quota_bitmap);
1225 }
1226}
1227
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
new file mode 100644
index 000000000000..a8be1417051f
--- /dev/null
+++ b/fs/gfs2/quota.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __QUOTA_DOT_H__
11#define __QUOTA_DOT_H__
12
13struct gfs2_inode;
14struct gfs2_sbd;
15
16#define NO_QUOTA_CHANGE ((u32)-1)
17
18int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid);
19void gfs2_quota_unhold(struct gfs2_inode *ip);
20
21int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid);
22void gfs2_quota_unlock(struct gfs2_inode *ip);
23
24int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
25void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
26 u32 uid, u32 gid);
27
28int gfs2_quota_sync(struct gfs2_sbd *sdp);
29int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
30
31int gfs2_quota_init(struct gfs2_sbd *sdp);
32void gfs2_quota_scan(struct gfs2_sbd *sdp);
33void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
34
35#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
new file mode 100644
index 000000000000..0a8a4b87dcc6
--- /dev/null
+++ b/fs/gfs2/recovery.c
@@ -0,0 +1,570 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "glops.h"
24#include "lm.h"
25#include "lops.h"
26#include "meta_io.h"
27#include "recovery.h"
28#include "super.h"
29#include "util.h"
30#include "dir.h"
31
32int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
33 struct buffer_head **bh)
34{
35 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
36 struct gfs2_glock *gl = ip->i_gl;
37 int new = 0;
38 u64 dblock;
39 u32 extlen;
40 int error;
41
42 error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
43 if (error)
44 return error;
45 if (!dblock) {
46 gfs2_consist_inode(ip);
47 return -EIO;
48 }
49
50 *bh = gfs2_meta_ra(gl, dblock, extlen);
51
52 return error;
53}
54
55int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
56{
57 struct list_head *head = &sdp->sd_revoke_list;
58 struct gfs2_revoke_replay *rr;
59 int found = 0;
60
61 list_for_each_entry(rr, head, rr_list) {
62 if (rr->rr_blkno == blkno) {
63 found = 1;
64 break;
65 }
66 }
67
68 if (found) {
69 rr->rr_where = where;
70 return 0;
71 }
72
73 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
74 if (!rr)
75 return -ENOMEM;
76
77 rr->rr_blkno = blkno;
78 rr->rr_where = where;
79 list_add(&rr->rr_list, head);
80
81 return 1;
82}
83
84int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
85{
86 struct gfs2_revoke_replay *rr;
87 int wrap, a, b, revoke;
88 int found = 0;
89
90 list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
91 if (rr->rr_blkno == blkno) {
92 found = 1;
93 break;
94 }
95 }
96
97 if (!found)
98 return 0;
99
100 wrap = (rr->rr_where < sdp->sd_replay_tail);
101 a = (sdp->sd_replay_tail < where);
102 b = (where < rr->rr_where);
103 revoke = (wrap) ? (a || b) : (a && b);
104
105 return revoke;
106}
107
108void gfs2_revoke_clean(struct gfs2_sbd *sdp)
109{
110 struct list_head *head = &sdp->sd_revoke_list;
111 struct gfs2_revoke_replay *rr;
112
113 while (!list_empty(head)) {
114 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
115 list_del(&rr->rr_list);
116 kfree(rr);
117 }
118}
119
120/**
121 * get_log_header - read the log header for a given segment
122 * @jd: the journal
123 * @blk: the block to look at
124 * @lh: the log header to return
125 *
126 * Read the log header for a given segement in a given journal. Do a few
127 * sanity checks on it.
128 *
129 * Returns: 0 on success,
130 * 1 if the header was invalid or incomplete,
131 * errno on error
132 */
133
134static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
135 struct gfs2_log_header *head)
136{
137 struct buffer_head *bh;
138 struct gfs2_log_header lh;
139 u32 hash;
140 int error;
141
142 error = gfs2_replay_read_block(jd, blk, &bh);
143 if (error)
144 return error;
145
146 memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
147 lh.lh_hash = 0;
148 hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
149 gfs2_log_header_in(&lh, bh->b_data);
150
151 brelse(bh);
152
153 if (lh.lh_header.mh_magic != GFS2_MAGIC ||
154 lh.lh_header.mh_type != GFS2_METATYPE_LH ||
155 lh.lh_blkno != blk || lh.lh_hash != hash)
156 return 1;
157
158 *head = lh;
159
160 return 0;
161}
162
163/**
164 * find_good_lh - find a good log header
165 * @jd: the journal
166 * @blk: the segment to start searching from
167 * @lh: the log header to fill in
168 * @forward: if true search forward in the log, else search backward
169 *
170 * Call get_log_header() to get a log header for a segment, but if the
171 * segment is bad, either scan forward or backward until we find a good one.
172 *
173 * Returns: errno
174 */
175
176static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
177 struct gfs2_log_header *head)
178{
179 unsigned int orig_blk = *blk;
180 int error;
181
182 for (;;) {
183 error = get_log_header(jd, *blk, head);
184 if (error <= 0)
185 return error;
186
187 if (++*blk == jd->jd_blocks)
188 *blk = 0;
189
190 if (*blk == orig_blk) {
191 gfs2_consist_inode(GFS2_I(jd->jd_inode));
192 return -EIO;
193 }
194 }
195}
196
197/**
198 * jhead_scan - make sure we've found the head of the log
199 * @jd: the journal
200 * @head: this is filled in with the log descriptor of the head
201 *
202 * At this point, seg and lh should be either the head of the log or just
203 * before. Scan forward until we find the head.
204 *
205 * Returns: errno
206 */
207
208static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
209{
210 unsigned int blk = head->lh_blkno;
211 struct gfs2_log_header lh;
212 int error;
213
214 for (;;) {
215 if (++blk == jd->jd_blocks)
216 blk = 0;
217
218 error = get_log_header(jd, blk, &lh);
219 if (error < 0)
220 return error;
221 if (error == 1)
222 continue;
223
224 if (lh.lh_sequence == head->lh_sequence) {
225 gfs2_consist_inode(GFS2_I(jd->jd_inode));
226 return -EIO;
227 }
228 if (lh.lh_sequence < head->lh_sequence)
229 break;
230
231 *head = lh;
232 }
233
234 return 0;
235}
236
237/**
238 * gfs2_find_jhead - find the head of a log
239 * @jd: the journal
240 * @head: the log descriptor for the head of the log is returned here
241 *
242 * Do a binary search of a journal and find the valid log entry with the
243 * highest sequence number. (i.e. the log head)
244 *
245 * Returns: errno
246 */
247
248int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
249{
250 struct gfs2_log_header lh_1, lh_m;
251 u32 blk_1, blk_2, blk_m;
252 int error;
253
254 blk_1 = 0;
255 blk_2 = jd->jd_blocks - 1;
256
257 for (;;) {
258 blk_m = (blk_1 + blk_2) / 2;
259
260 error = find_good_lh(jd, &blk_1, &lh_1);
261 if (error)
262 return error;
263
264 error = find_good_lh(jd, &blk_m, &lh_m);
265 if (error)
266 return error;
267
268 if (blk_1 == blk_m || blk_m == blk_2)
269 break;
270
271 if (lh_1.lh_sequence <= lh_m.lh_sequence)
272 blk_1 = blk_m;
273 else
274 blk_2 = blk_m;
275 }
276
277 error = jhead_scan(jd, &lh_1);
278 if (error)
279 return error;
280
281 *head = lh_1;
282
283 return error;
284}
285
286/**
287 * foreach_descriptor - go through the active part of the log
288 * @jd: the journal
289 * @start: the first log header in the active region
290 * @end: the last log header (don't process the contents of this entry))
291 *
292 * Call a given function once for every log descriptor in the active
293 * portion of the log.
294 *
295 * Returns: errno
296 */
297
298static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
299 unsigned int end, int pass)
300{
301 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
302 struct buffer_head *bh;
303 struct gfs2_log_descriptor *ld;
304 int error = 0;
305 u32 length;
306 __be64 *ptr;
307 unsigned int offset = sizeof(struct gfs2_log_descriptor);
308 offset += sizeof(__be64) - 1;
309 offset &= ~(sizeof(__be64) - 1);
310
311 while (start != end) {
312 error = gfs2_replay_read_block(jd, start, &bh);
313 if (error)
314 return error;
315 if (gfs2_meta_check(sdp, bh)) {
316 brelse(bh);
317 return -EIO;
318 }
319 ld = (struct gfs2_log_descriptor *)bh->b_data;
320 length = be32_to_cpu(ld->ld_length);
321
322 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
323 struct gfs2_log_header lh;
324 error = get_log_header(jd, start, &lh);
325 if (!error) {
326 gfs2_replay_incr_blk(sdp, &start);
327 brelse(bh);
328 continue;
329 }
330 if (error == 1) {
331 gfs2_consist_inode(GFS2_I(jd->jd_inode));
332 error = -EIO;
333 }
334 brelse(bh);
335 return error;
336 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
337 brelse(bh);
338 return -EIO;
339 }
340 ptr = (__be64 *)(bh->b_data + offset);
341 error = lops_scan_elements(jd, start, ld, ptr, pass);
342 if (error) {
343 brelse(bh);
344 return error;
345 }
346
347 while (length--)
348 gfs2_replay_incr_blk(sdp, &start);
349
350 brelse(bh);
351 }
352
353 return 0;
354}
355
356/**
357 * clean_journal - mark a dirty journal as being clean
358 * @sdp: the filesystem
359 * @jd: the journal
360 * @gl: the journal's glock
361 * @head: the head journal to start from
362 *
363 * Returns: errno
364 */
365
366static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
367{
368 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
369 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
370 unsigned int lblock;
371 struct gfs2_log_header *lh;
372 u32 hash;
373 struct buffer_head *bh;
374 int error;
375 struct buffer_head bh_map;
376
377 lblock = head->lh_blkno;
378 gfs2_replay_incr_blk(sdp, &lblock);
379 error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map, 1);
380 if (error)
381 return error;
382 if (!bh_map.b_blocknr) {
383 gfs2_consist_inode(ip);
384 return -EIO;
385 }
386
387 bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
388 lock_buffer(bh);
389 memset(bh->b_data, 0, bh->b_size);
390 set_buffer_uptodate(bh);
391 clear_buffer_dirty(bh);
392 unlock_buffer(bh);
393
394 lh = (struct gfs2_log_header *)bh->b_data;
395 memset(lh, 0, sizeof(struct gfs2_log_header));
396 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
397 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
398 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
399 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
400 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
401 lh->lh_blkno = cpu_to_be32(lblock);
402 hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
403 lh->lh_hash = cpu_to_be32(hash);
404
405 set_buffer_dirty(bh);
406 if (sync_dirty_buffer(bh))
407 gfs2_io_error_bh(sdp, bh);
408 brelse(bh);
409
410 return error;
411}
412
413/**
414 * gfs2_recover_journal - recovery a given journal
415 * @jd: the struct gfs2_jdesc describing the journal
416 *
417 * Acquire the journal's lock, check to see if the journal is clean, and
418 * do recovery if necessary.
419 *
420 * Returns: errno
421 */
422
423int gfs2_recover_journal(struct gfs2_jdesc *jd)
424{
425 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
426 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
427 struct gfs2_log_header head;
428 struct gfs2_holder j_gh, ji_gh, t_gh;
429 unsigned long t;
430 int ro = 0;
431 unsigned int pass;
432 int error;
433
434 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
435 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
436 jd->jd_jid);
437
438 /* Aquire the journal lock so we can do recovery */
439
440 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
441 LM_ST_EXCLUSIVE,
442 LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
443 &j_gh);
444 switch (error) {
445 case 0:
446 break;
447
448 case GLR_TRYFAILED:
449 fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
450 error = 0;
451
452 default:
453 goto fail;
454 };
455
456 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
457 LM_FLAG_NOEXP, &ji_gh);
458 if (error)
459 goto fail_gunlock_j;
460 } else {
461 fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
462 }
463
464 fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
465
466 error = gfs2_jdesc_check(jd);
467 if (error)
468 goto fail_gunlock_ji;
469
470 error = gfs2_find_jhead(jd, &head);
471 if (error)
472 goto fail_gunlock_ji;
473
474 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
475 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
476 jd->jd_jid);
477
478 t = jiffies;
479
480 /* Acquire a shared hold on the transaction lock */
481
482 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
483 LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
484 GL_NOCANCEL | GL_NOCACHE, &t_gh);
485 if (error)
486 goto fail_gunlock_ji;
487
488 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
489 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
490 ro = 1;
491 } else {
492 if (sdp->sd_vfs->s_flags & MS_RDONLY)
493 ro = 1;
494 }
495
496 if (ro) {
497 fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
498 jd->jd_jid);
499 error = -EROFS;
500 goto fail_gunlock_tr;
501 }
502
503 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
504
505 for (pass = 0; pass < 2; pass++) {
506 lops_before_scan(jd, &head, pass);
507 error = foreach_descriptor(jd, head.lh_tail,
508 head.lh_blkno, pass);
509 lops_after_scan(jd, error, pass);
510 if (error)
511 goto fail_gunlock_tr;
512 }
513
514 error = clean_journal(jd, &head);
515 if (error)
516 goto fail_gunlock_tr;
517
518 gfs2_glock_dq_uninit(&t_gh);
519 t = DIV_ROUND_UP(jiffies - t, HZ);
520 fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
521 jd->jd_jid, t);
522 }
523
524 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
525 gfs2_glock_dq_uninit(&ji_gh);
526
527 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
528
529 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
530 gfs2_glock_dq_uninit(&j_gh);
531
532 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
533 return 0;
534
535fail_gunlock_tr:
536 gfs2_glock_dq_uninit(&t_gh);
537fail_gunlock_ji:
538 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
539 gfs2_glock_dq_uninit(&ji_gh);
540fail_gunlock_j:
541 gfs2_glock_dq_uninit(&j_gh);
542 }
543
544 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
545
546fail:
547 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
548 return error;
549}
550
551/**
552 * gfs2_check_journals - Recover any dirty journals
553 * @sdp: the filesystem
554 *
555 */
556
557void gfs2_check_journals(struct gfs2_sbd *sdp)
558{
559 struct gfs2_jdesc *jd;
560
561 for (;;) {
562 jd = gfs2_jdesc_find_dirty(sdp);
563 if (!jd)
564 break;
565
566 if (jd != sdp->sd_jdesc)
567 gfs2_recover_journal(jd);
568 }
569}
570
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
new file mode 100644
index 000000000000..961feedf4d8b
--- /dev/null
+++ b/fs/gfs2/recovery.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __RECOVERY_DOT_H__
11#define __RECOVERY_DOT_H__
12
13#include "incore.h"
14
15static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
16{
17 if (++*blk == sdp->sd_jdesc->jd_blocks)
18 *blk = 0;
19}
20
21int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
22 struct buffer_head **bh);
23
24int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
25int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
26void gfs2_revoke_clean(struct gfs2_sbd *sdp);
27
28int gfs2_find_jhead(struct gfs2_jdesc *jd,
29 struct gfs2_log_header *head);
30int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
31void gfs2_check_journals(struct gfs2_sbd *sdp);
32
33#endif /* __RECOVERY_DOT_H__ */
34
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
new file mode 100644
index 000000000000..b261385c0065
--- /dev/null
+++ b/fs/gfs2/rgrp.c
@@ -0,0 +1,1513 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/fs.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "glock.h"
22#include "glops.h"
23#include "lops.h"
24#include "meta_io.h"
25#include "quota.h"
26#include "rgrp.h"
27#include "super.h"
28#include "trans.h"
29#include "ops_file.h"
30#include "util.h"
31
32#define BFITNOENT ((u32)~0)
33
34/*
35 * These routines are used by the resource group routines (rgrp.c)
36 * to keep track of block allocation. Each block is represented by two
37 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks.
38 *
39 * 0 = Free
40 * 1 = Used (not metadata)
41 * 2 = Unlinked (still in use) inode
42 * 3 = Used (metadata)
43 */
44
45static const char valid_change[16] = {
46 /* current */
47 /* n */ 0, 1, 1, 1,
48 /* e */ 1, 0, 0, 0,
49 /* w */ 0, 0, 0, 1,
50 1, 0, 0, 0
51};
52
53/**
54 * gfs2_setbit - Set a bit in the bitmaps
55 * @buffer: the buffer that holds the bitmaps
56 * @buflen: the length (in bytes) of the buffer
57 * @block: the block to set
58 * @new_state: the new state of the block
59 *
60 */
61
62static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
63 unsigned int buflen, u32 block,
64 unsigned char new_state)
65{
66 unsigned char *byte, *end, cur_state;
67 unsigned int bit;
68
69 byte = buffer + (block / GFS2_NBBY);
70 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
71 end = buffer + buflen;
72
73 gfs2_assert(rgd->rd_sbd, byte < end);
74
75 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
76
77 if (valid_change[new_state * 4 + cur_state]) {
78 *byte ^= cur_state << bit;
79 *byte |= new_state << bit;
80 } else
81 gfs2_consist_rgrpd(rgd);
82}
83
84/**
85 * gfs2_testbit - test a bit in the bitmaps
86 * @buffer: the buffer that holds the bitmaps
87 * @buflen: the length (in bytes) of the buffer
88 * @block: the block to read
89 *
90 */
91
92static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
93 unsigned int buflen, u32 block)
94{
95 unsigned char *byte, *end, cur_state;
96 unsigned int bit;
97
98 byte = buffer + (block / GFS2_NBBY);
99 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
100 end = buffer + buflen;
101
102 gfs2_assert(rgd->rd_sbd, byte < end);
103
104 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
105
106 return cur_state;
107}
108
109/**
110 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
111 * a block in a given allocation state.
112 * @buffer: the buffer that holds the bitmaps
113 * @buflen: the length (in bytes) of the buffer
114 * @goal: start search at this block's bit-pair (within @buffer)
115 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
116 * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
117 *
118 * Scope of @goal and returned block number is only within this bitmap buffer,
119 * not entire rgrp or filesystem. @buffer will be offset from the actual
120 * beginning of a bitmap block buffer, skipping any header structures.
121 *
122 * Return: the block number (bitmap buffer scope) that was found
123 */
124
125static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
126 unsigned int buflen, u32 goal,
127 unsigned char old_state)
128{
129 unsigned char *byte, *end, alloc;
130 u32 blk = goal;
131 unsigned int bit;
132
133 byte = buffer + (goal / GFS2_NBBY);
134 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
135 end = buffer + buflen;
136 alloc = (old_state & 1) ? 0 : 0x55;
137
138 while (byte < end) {
139 if ((*byte & 0x55) == alloc) {
140 blk += (8 - bit) >> 1;
141
142 bit = 0;
143 byte++;
144
145 continue;
146 }
147
148 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
149 return blk;
150
151 bit += GFS2_BIT_SIZE;
152 if (bit >= 8) {
153 bit = 0;
154 byte++;
155 }
156
157 blk++;
158 }
159
160 return BFITNOENT;
161}
162
163/**
164 * gfs2_bitcount - count the number of bits in a certain state
165 * @buffer: the buffer that holds the bitmaps
166 * @buflen: the length (in bytes) of the buffer
167 * @state: the state of the block we're looking for
168 *
169 * Returns: The number of bits
170 */
171
172static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer,
173 unsigned int buflen, unsigned char state)
174{
175 unsigned char *byte = buffer;
176 unsigned char *end = buffer + buflen;
177 unsigned char state1 = state << 2;
178 unsigned char state2 = state << 4;
179 unsigned char state3 = state << 6;
180 u32 count = 0;
181
182 for (; byte < end; byte++) {
183 if (((*byte) & 0x03) == state)
184 count++;
185 if (((*byte) & 0x0C) == state1)
186 count++;
187 if (((*byte) & 0x30) == state2)
188 count++;
189 if (((*byte) & 0xC0) == state3)
190 count++;
191 }
192
193 return count;
194}
195
196/**
197 * gfs2_rgrp_verify - Verify that a resource group is consistent
198 * @sdp: the filesystem
199 * @rgd: the rgrp
200 *
201 */
202
203void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
204{
205 struct gfs2_sbd *sdp = rgd->rd_sbd;
206 struct gfs2_bitmap *bi = NULL;
207 u32 length = rgd->rd_ri.ri_length;
208 u32 count[4], tmp;
209 int buf, x;
210
211 memset(count, 0, 4 * sizeof(u32));
212
213 /* Count # blocks in each of 4 possible allocation states */
214 for (buf = 0; buf < length; buf++) {
215 bi = rgd->rd_bits + buf;
216 for (x = 0; x < 4; x++)
217 count[x] += gfs2_bitcount(rgd,
218 bi->bi_bh->b_data +
219 bi->bi_offset,
220 bi->bi_len, x);
221 }
222
223 if (count[0] != rgd->rd_rg.rg_free) {
224 if (gfs2_consist_rgrpd(rgd))
225 fs_err(sdp, "free data mismatch: %u != %u\n",
226 count[0], rgd->rd_rg.rg_free);
227 return;
228 }
229
230 tmp = rgd->rd_ri.ri_data -
231 rgd->rd_rg.rg_free -
232 rgd->rd_rg.rg_dinodes;
233 if (count[1] + count[2] != tmp) {
234 if (gfs2_consist_rgrpd(rgd))
235 fs_err(sdp, "used data mismatch: %u != %u\n",
236 count[1], tmp);
237 return;
238 }
239
240 if (count[3] != rgd->rd_rg.rg_dinodes) {
241 if (gfs2_consist_rgrpd(rgd))
242 fs_err(sdp, "used metadata mismatch: %u != %u\n",
243 count[3], rgd->rd_rg.rg_dinodes);
244 return;
245 }
246
247 if (count[2] > count[3]) {
248 if (gfs2_consist_rgrpd(rgd))
249 fs_err(sdp, "unlinked inodes > inodes: %u\n",
250 count[2]);
251 return;
252 }
253
254}
255
256static inline int rgrp_contains_block(struct gfs2_rindex *ri, u64 block)
257{
258 u64 first = ri->ri_data0;
259 u64 last = first + ri->ri_data;
260 return first <= block && block < last;
261}
262
263/**
264 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
265 * @sdp: The GFS2 superblock
266 * @n: The data block number
267 *
268 * Returns: The resource group, or NULL if not found
269 */
270
271struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
272{
273 struct gfs2_rgrpd *rgd;
274
275 spin_lock(&sdp->sd_rindex_spin);
276
277 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
278 if (rgrp_contains_block(&rgd->rd_ri, blk)) {
279 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
280 spin_unlock(&sdp->sd_rindex_spin);
281 return rgd;
282 }
283 }
284
285 spin_unlock(&sdp->sd_rindex_spin);
286
287 return NULL;
288}
289
290/**
291 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
292 * @sdp: The GFS2 superblock
293 *
294 * Returns: The first rgrp in the filesystem
295 */
296
297struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
298{
299 gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
300 return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
301}
302
303/**
304 * gfs2_rgrpd_get_next - get the next RG
305 * @rgd: A RG
306 *
307 * Returns: The next rgrp
308 */
309
310struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
311{
312 if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
313 return NULL;
314 return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
315}
316
317static void clear_rgrpdi(struct gfs2_sbd *sdp)
318{
319 struct list_head *head;
320 struct gfs2_rgrpd *rgd;
321 struct gfs2_glock *gl;
322
323 spin_lock(&sdp->sd_rindex_spin);
324 sdp->sd_rindex_forward = NULL;
325 head = &sdp->sd_rindex_recent_list;
326 while (!list_empty(head)) {
327 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
328 list_del(&rgd->rd_recent);
329 }
330 spin_unlock(&sdp->sd_rindex_spin);
331
332 head = &sdp->sd_rindex_list;
333 while (!list_empty(head)) {
334 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
335 gl = rgd->rd_gl;
336
337 list_del(&rgd->rd_list);
338 list_del(&rgd->rd_list_mru);
339
340 if (gl) {
341 gl->gl_object = NULL;
342 gfs2_glock_put(gl);
343 }
344
345 kfree(rgd->rd_bits);
346 kfree(rgd);
347 }
348}
349
350void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
351{
352 mutex_lock(&sdp->sd_rindex_mutex);
353 clear_rgrpdi(sdp);
354 mutex_unlock(&sdp->sd_rindex_mutex);
355}
356
357/**
358 * gfs2_compute_bitstructs - Compute the bitmap sizes
359 * @rgd: The resource group descriptor
360 *
361 * Calculates bitmap descriptors, one for each block that contains bitmap data
362 *
363 * Returns: errno
364 */
365
366static int compute_bitstructs(struct gfs2_rgrpd *rgd)
367{
368 struct gfs2_sbd *sdp = rgd->rd_sbd;
369 struct gfs2_bitmap *bi;
370 u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
371 u32 bytes_left, bytes;
372 int x;
373
374 if (!length)
375 return -EINVAL;
376
377 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
378 if (!rgd->rd_bits)
379 return -ENOMEM;
380
381 bytes_left = rgd->rd_ri.ri_bitbytes;
382
383 for (x = 0; x < length; x++) {
384 bi = rgd->rd_bits + x;
385
386 /* small rgrp; bitmap stored completely in header block */
387 if (length == 1) {
388 bytes = bytes_left;
389 bi->bi_offset = sizeof(struct gfs2_rgrp);
390 bi->bi_start = 0;
391 bi->bi_len = bytes;
392 /* header block */
393 } else if (x == 0) {
394 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
395 bi->bi_offset = sizeof(struct gfs2_rgrp);
396 bi->bi_start = 0;
397 bi->bi_len = bytes;
398 /* last block */
399 } else if (x + 1 == length) {
400 bytes = bytes_left;
401 bi->bi_offset = sizeof(struct gfs2_meta_header);
402 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
403 bi->bi_len = bytes;
404 /* other blocks */
405 } else {
406 bytes = sdp->sd_sb.sb_bsize -
407 sizeof(struct gfs2_meta_header);
408 bi->bi_offset = sizeof(struct gfs2_meta_header);
409 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
410 bi->bi_len = bytes;
411 }
412
413 bytes_left -= bytes;
414 }
415
416 if (bytes_left) {
417 gfs2_consist_rgrpd(rgd);
418 return -EIO;
419 }
420 bi = rgd->rd_bits + (length - 1);
421 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
422 if (gfs2_consist_rgrpd(rgd)) {
423 gfs2_rindex_print(&rgd->rd_ri);
424 fs_err(sdp, "start=%u len=%u offset=%u\n",
425 bi->bi_start, bi->bi_len, bi->bi_offset);
426 }
427 return -EIO;
428 }
429
430 return 0;
431}
432
433/**
434 * gfs2_ri_update - Pull in a new resource index from the disk
435 * @gl: The glock covering the rindex inode
436 *
437 * Returns: 0 on successful update, error code otherwise
438 */
439
440static int gfs2_ri_update(struct gfs2_inode *ip)
441{
442 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
443 struct inode *inode = &ip->i_inode;
444 struct gfs2_rgrpd *rgd;
445 char buf[sizeof(struct gfs2_rindex)];
446 struct file_ra_state ra_state;
447 u64 junk = ip->i_di.di_size;
448 int error;
449
450 if (do_div(junk, sizeof(struct gfs2_rindex))) {
451 gfs2_consist_inode(ip);
452 return -EIO;
453 }
454
455 clear_rgrpdi(sdp);
456
457 file_ra_state_init(&ra_state, inode->i_mapping);
458 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
459 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
460 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
461 sizeof(struct gfs2_rindex));
462 if (!error)
463 break;
464 if (error != sizeof(struct gfs2_rindex)) {
465 if (error > 0)
466 error = -EIO;
467 goto fail;
468 }
469
470 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
471 error = -ENOMEM;
472 if (!rgd)
473 goto fail;
474
475 mutex_init(&rgd->rd_mutex);
476 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
477 rgd->rd_sbd = sdp;
478
479 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
480 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
481
482 gfs2_rindex_in(&rgd->rd_ri, buf);
483 error = compute_bitstructs(rgd);
484 if (error)
485 goto fail;
486
487 error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
488 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
489 if (error)
490 goto fail;
491
492 rgd->rd_gl->gl_object = rgd;
493 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
494 }
495
496 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
497 return 0;
498
499fail:
500 clear_rgrpdi(sdp);
501 return error;
502}
503
504/**
505 * gfs2_rindex_hold - Grab a lock on the rindex
506 * @sdp: The GFS2 superblock
507 * @ri_gh: the glock holder
508 *
509 * We grab a lock on the rindex inode to make sure that it doesn't
510 * change whilst we are performing an operation. We keep this lock
511 * for quite long periods of time compared to other locks. This
512 * doesn't matter, since it is shared and it is very, very rarely
513 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
514 *
515 * This makes sure that we're using the latest copy of the resource index
516 * special file, which might have been updated if someone expanded the
517 * filesystem (via gfs2_grow utility), which adds new resource groups.
518 *
519 * Returns: 0 on success, error code otherwise
520 */
521
522int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
523{
524 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
525 struct gfs2_glock *gl = ip->i_gl;
526 int error;
527
528 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
529 if (error)
530 return error;
531
532 /* Read new copy from disk if we don't have the latest */
533 if (sdp->sd_rindex_vn != gl->gl_vn) {
534 mutex_lock(&sdp->sd_rindex_mutex);
535 if (sdp->sd_rindex_vn != gl->gl_vn) {
536 error = gfs2_ri_update(ip);
537 if (error)
538 gfs2_glock_dq_uninit(ri_gh);
539 }
540 mutex_unlock(&sdp->sd_rindex_mutex);
541 }
542
543 return error;
544}
545
546/**
547 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
548 * @rgd: the struct gfs2_rgrpd describing the RG to read in
549 *
550 * Read in all of a Resource Group's header and bitmap blocks.
551 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
552 *
553 * Returns: errno
554 */
555
556int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
557{
558 struct gfs2_sbd *sdp = rgd->rd_sbd;
559 struct gfs2_glock *gl = rgd->rd_gl;
560 unsigned int length = rgd->rd_ri.ri_length;
561 struct gfs2_bitmap *bi;
562 unsigned int x, y;
563 int error;
564
565 mutex_lock(&rgd->rd_mutex);
566
567 spin_lock(&sdp->sd_rindex_spin);
568 if (rgd->rd_bh_count) {
569 rgd->rd_bh_count++;
570 spin_unlock(&sdp->sd_rindex_spin);
571 mutex_unlock(&rgd->rd_mutex);
572 return 0;
573 }
574 spin_unlock(&sdp->sd_rindex_spin);
575
576 for (x = 0; x < length; x++) {
577 bi = rgd->rd_bits + x;
578 error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh);
579 if (error)
580 goto fail;
581 }
582
583 for (y = length; y--;) {
584 bi = rgd->rd_bits + y;
585 error = gfs2_meta_wait(sdp, bi->bi_bh);
586 if (error)
587 goto fail;
588 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
589 GFS2_METATYPE_RG)) {
590 error = -EIO;
591 goto fail;
592 }
593 }
594
595 if (rgd->rd_rg_vn != gl->gl_vn) {
596 gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data);
597 rgd->rd_rg_vn = gl->gl_vn;
598 }
599
600 spin_lock(&sdp->sd_rindex_spin);
601 rgd->rd_free_clone = rgd->rd_rg.rg_free;
602 rgd->rd_bh_count++;
603 spin_unlock(&sdp->sd_rindex_spin);
604
605 mutex_unlock(&rgd->rd_mutex);
606
607 return 0;
608
609fail:
610 while (x--) {
611 bi = rgd->rd_bits + x;
612 brelse(bi->bi_bh);
613 bi->bi_bh = NULL;
614 gfs2_assert_warn(sdp, !bi->bi_clone);
615 }
616 mutex_unlock(&rgd->rd_mutex);
617
618 return error;
619}
620
621void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
622{
623 struct gfs2_sbd *sdp = rgd->rd_sbd;
624
625 spin_lock(&sdp->sd_rindex_spin);
626 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
627 rgd->rd_bh_count++;
628 spin_unlock(&sdp->sd_rindex_spin);
629}
630
631/**
632 * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
633 * @rgd: the struct gfs2_rgrpd describing the RG to read in
634 *
635 */
636
637void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
638{
639 struct gfs2_sbd *sdp = rgd->rd_sbd;
640 int x, length = rgd->rd_ri.ri_length;
641
642 spin_lock(&sdp->sd_rindex_spin);
643 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
644 if (--rgd->rd_bh_count) {
645 spin_unlock(&sdp->sd_rindex_spin);
646 return;
647 }
648
649 for (x = 0; x < length; x++) {
650 struct gfs2_bitmap *bi = rgd->rd_bits + x;
651 kfree(bi->bi_clone);
652 bi->bi_clone = NULL;
653 brelse(bi->bi_bh);
654 bi->bi_bh = NULL;
655 }
656
657 spin_unlock(&sdp->sd_rindex_spin);
658}
659
660void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
661{
662 struct gfs2_sbd *sdp = rgd->rd_sbd;
663 unsigned int length = rgd->rd_ri.ri_length;
664 unsigned int x;
665
666 for (x = 0; x < length; x++) {
667 struct gfs2_bitmap *bi = rgd->rd_bits + x;
668 if (!bi->bi_clone)
669 continue;
670 memcpy(bi->bi_clone + bi->bi_offset,
671 bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
672 }
673
674 spin_lock(&sdp->sd_rindex_spin);
675 rgd->rd_free_clone = rgd->rd_rg.rg_free;
676 spin_unlock(&sdp->sd_rindex_spin);
677}
678
679/**
680 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
681 * @ip: the incore GFS2 inode structure
682 *
683 * Returns: the struct gfs2_alloc
684 */
685
686struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
687{
688 struct gfs2_alloc *al = &ip->i_alloc;
689
690 /* FIXME: Should assert that the correct locks are held here... */
691 memset(al, 0, sizeof(*al));
692 return al;
693}
694
695/**
696 * try_rgrp_fit - See if a given reservation will fit in a given RG
697 * @rgd: the RG data
698 * @al: the struct gfs2_alloc structure describing the reservation
699 *
700 * If there's room for the requested blocks to be allocated from the RG:
701 * Sets the $al_reserved_data field in @al.
702 * Sets the $al_reserved_meta field in @al.
703 * Sets the $al_rgd field in @al.
704 *
705 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
706 */
707
708static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
709{
710 struct gfs2_sbd *sdp = rgd->rd_sbd;
711 int ret = 0;
712
713 spin_lock(&sdp->sd_rindex_spin);
714 if (rgd->rd_free_clone >= al->al_requested) {
715 al->al_rgd = rgd;
716 ret = 1;
717 }
718 spin_unlock(&sdp->sd_rindex_spin);
719
720 return ret;
721}
722
723/**
724 * recent_rgrp_first - get first RG from "recent" list
725 * @sdp: The GFS2 superblock
726 * @rglast: address of the rgrp used last
727 *
728 * Returns: The first rgrp in the recent list
729 */
730
731static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
732 u64 rglast)
733{
734 struct gfs2_rgrpd *rgd = NULL;
735
736 spin_lock(&sdp->sd_rindex_spin);
737
738 if (list_empty(&sdp->sd_rindex_recent_list))
739 goto out;
740
741 if (!rglast)
742 goto first;
743
744 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
745 if (rgd->rd_ri.ri_addr == rglast)
746 goto out;
747 }
748
749first:
750 rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd,
751 rd_recent);
752out:
753 spin_unlock(&sdp->sd_rindex_spin);
754 return rgd;
755}
756
757/**
758 * recent_rgrp_next - get next RG from "recent" list
759 * @cur_rgd: current rgrp
760 * @remove:
761 *
762 * Returns: The next rgrp in the recent list
763 */
764
765static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
766 int remove)
767{
768 struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
769 struct list_head *head;
770 struct gfs2_rgrpd *rgd;
771
772 spin_lock(&sdp->sd_rindex_spin);
773
774 head = &sdp->sd_rindex_recent_list;
775
776 list_for_each_entry(rgd, head, rd_recent) {
777 if (rgd == cur_rgd) {
778 if (cur_rgd->rd_recent.next != head)
779 rgd = list_entry(cur_rgd->rd_recent.next,
780 struct gfs2_rgrpd, rd_recent);
781 else
782 rgd = NULL;
783
784 if (remove)
785 list_del(&cur_rgd->rd_recent);
786
787 goto out;
788 }
789 }
790
791 rgd = NULL;
792 if (!list_empty(head))
793 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
794
795out:
796 spin_unlock(&sdp->sd_rindex_spin);
797 return rgd;
798}
799
800/**
801 * recent_rgrp_add - add an RG to tail of "recent" list
802 * @new_rgd: The rgrp to add
803 *
804 */
805
806static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
807{
808 struct gfs2_sbd *sdp = new_rgd->rd_sbd;
809 struct gfs2_rgrpd *rgd;
810 unsigned int count = 0;
811 unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
812
813 spin_lock(&sdp->sd_rindex_spin);
814
815 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
816 if (rgd == new_rgd)
817 goto out;
818
819 if (++count >= max)
820 goto out;
821 }
822 list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
823
824out:
825 spin_unlock(&sdp->sd_rindex_spin);
826}
827
828/**
829 * forward_rgrp_get - get an rgrp to try next from full list
830 * @sdp: The GFS2 superblock
831 *
832 * Returns: The rgrp to try next
833 */
834
835static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
836{
837 struct gfs2_rgrpd *rgd;
838 unsigned int journals = gfs2_jindex_size(sdp);
839 unsigned int rg = 0, x;
840
841 spin_lock(&sdp->sd_rindex_spin);
842
843 rgd = sdp->sd_rindex_forward;
844 if (!rgd) {
845 if (sdp->sd_rgrps >= journals)
846 rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
847
848 for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg;
849 x++, rgd = gfs2_rgrpd_get_next(rgd))
850 /* Do Nothing */;
851
852 sdp->sd_rindex_forward = rgd;
853 }
854
855 spin_unlock(&sdp->sd_rindex_spin);
856
857 return rgd;
858}
859
860/**
861 * forward_rgrp_set - set the forward rgrp pointer
862 * @sdp: the filesystem
863 * @rgd: The new forward rgrp
864 *
865 */
866
867static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
868{
869 spin_lock(&sdp->sd_rindex_spin);
870 sdp->sd_rindex_forward = rgd;
871 spin_unlock(&sdp->sd_rindex_spin);
872}
873
874/**
875 * get_local_rgrp - Choose and lock a rgrp for allocation
876 * @ip: the inode to reserve space for
877 * @rgp: the chosen and locked rgrp
878 *
879 * Try to acquire rgrp in way which avoids contending with others.
880 *
881 * Returns: errno
882 */
883
884static int get_local_rgrp(struct gfs2_inode *ip)
885{
886 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
887 struct gfs2_rgrpd *rgd, *begin = NULL;
888 struct gfs2_alloc *al = &ip->i_alloc;
889 int flags = LM_FLAG_TRY;
890 int skipped = 0;
891 int loops = 0;
892 int error;
893
894 /* Try recently successful rgrps */
895
896 rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
897
898 while (rgd) {
899 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
900 LM_FLAG_TRY, &al->al_rgd_gh);
901 switch (error) {
902 case 0:
903 if (try_rgrp_fit(rgd, al))
904 goto out;
905 gfs2_glock_dq_uninit(&al->al_rgd_gh);
906 rgd = recent_rgrp_next(rgd, 1);
907 break;
908
909 case GLR_TRYFAILED:
910 rgd = recent_rgrp_next(rgd, 0);
911 break;
912
913 default:
914 return error;
915 }
916 }
917
918 /* Go through full list of rgrps */
919
920 begin = rgd = forward_rgrp_get(sdp);
921
922 for (;;) {
923 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
924 &al->al_rgd_gh);
925 switch (error) {
926 case 0:
927 if (try_rgrp_fit(rgd, al))
928 goto out;
929 gfs2_glock_dq_uninit(&al->al_rgd_gh);
930 break;
931
932 case GLR_TRYFAILED:
933 skipped++;
934 break;
935
936 default:
937 return error;
938 }
939
940 rgd = gfs2_rgrpd_get_next(rgd);
941 if (!rgd)
942 rgd = gfs2_rgrpd_get_first(sdp);
943
944 if (rgd == begin) {
945 if (++loops >= 2 || !skipped)
946 return -ENOSPC;
947 flags = 0;
948 }
949 }
950
951out:
952 ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
953
954 if (begin) {
955 recent_rgrp_add(rgd);
956 rgd = gfs2_rgrpd_get_next(rgd);
957 if (!rgd)
958 rgd = gfs2_rgrpd_get_first(sdp);
959 forward_rgrp_set(sdp, rgd);
960 }
961
962 return 0;
963}
964
965/**
966 * gfs2_inplace_reserve_i - Reserve space in the filesystem
967 * @ip: the inode to reserve space for
968 *
969 * Returns: errno
970 */
971
972int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
973{
974 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
975 struct gfs2_alloc *al = &ip->i_alloc;
976 int error;
977
978 if (gfs2_assert_warn(sdp, al->al_requested))
979 return -EINVAL;
980
981 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
982 if (error)
983 return error;
984
985 error = get_local_rgrp(ip);
986 if (error) {
987 gfs2_glock_dq_uninit(&al->al_ri_gh);
988 return error;
989 }
990
991 al->al_file = file;
992 al->al_line = line;
993
994 return 0;
995}
996
997/**
998 * gfs2_inplace_release - release an inplace reservation
999 * @ip: the inode the reservation was taken out on
1000 *
1001 * Release a reservation made by gfs2_inplace_reserve().
1002 */
1003
1004void gfs2_inplace_release(struct gfs2_inode *ip)
1005{
1006 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1007 struct gfs2_alloc *al = &ip->i_alloc;
1008
1009 if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
1010 fs_warn(sdp, "al_alloced = %u, al_requested = %u "
1011 "al_file = %s, al_line = %u\n",
1012 al->al_alloced, al->al_requested, al->al_file,
1013 al->al_line);
1014
1015 al->al_rgd = NULL;
1016 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1017 gfs2_glock_dq_uninit(&al->al_ri_gh);
1018}
1019
1020/**
1021 * gfs2_get_block_type - Check a block in a RG is of given type
1022 * @rgd: the resource group holding the block
1023 * @block: the block number
1024 *
1025 * Returns: The block type (GFS2_BLKST_*)
1026 */
1027
1028unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1029{
1030 struct gfs2_bitmap *bi = NULL;
1031 u32 length, rgrp_block, buf_block;
1032 unsigned int buf;
1033 unsigned char type;
1034
1035 length = rgd->rd_ri.ri_length;
1036 rgrp_block = block - rgd->rd_ri.ri_data0;
1037
1038 for (buf = 0; buf < length; buf++) {
1039 bi = rgd->rd_bits + buf;
1040 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1041 break;
1042 }
1043
1044 gfs2_assert(rgd->rd_sbd, buf < length);
1045 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
1046
1047 type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1048 bi->bi_len, buf_block);
1049
1050 return type;
1051}
1052
1053/**
1054 * rgblk_search - find a block in @old_state, change allocation
1055 * state to @new_state
1056 * @rgd: the resource group descriptor
1057 * @goal: the goal block within the RG (start here to search for avail block)
1058 * @old_state: GFS2_BLKST_XXX the before-allocation state to find
1059 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1060 *
1061 * Walk rgrp's bitmap to find bits that represent a block in @old_state.
1062 * Add the found bitmap buffer to the transaction.
1063 * Set the found bits to @new_state to change block's allocation state.
1064 *
1065 * This function never fails, because we wouldn't call it unless we
1066 * know (from reservation results, etc.) that a block is available.
1067 *
1068 * Scope of @goal and returned block is just within rgrp, not the whole
1069 * filesystem.
1070 *
1071 * Returns: the block number allocated
1072 */
1073
1074static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1075 unsigned char old_state, unsigned char new_state)
1076{
1077 struct gfs2_bitmap *bi = NULL;
1078 u32 length = rgd->rd_ri.ri_length;
1079 u32 blk = 0;
1080 unsigned int buf, x;
1081
1082 /* Find bitmap block that contains bits for goal block */
1083 for (buf = 0; buf < length; buf++) {
1084 bi = rgd->rd_bits + buf;
1085 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1086 break;
1087 }
1088
1089 gfs2_assert(rgd->rd_sbd, buf < length);
1090
1091 /* Convert scope of "goal" from rgrp-wide to within found bit block */
1092 goal -= bi->bi_start * GFS2_NBBY;
1093
1094 /* Search (up to entire) bitmap in this rgrp for allocatable block.
1095 "x <= length", instead of "x < length", because we typically start
1096 the search in the middle of a bit block, but if we can't find an
1097 allocatable block anywhere else, we want to be able wrap around and
1098 search in the first part of our first-searched bit block. */
1099 for (x = 0; x <= length; x++) {
1100 if (bi->bi_clone)
1101 blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
1102 bi->bi_len, goal, old_state);
1103 else
1104 blk = gfs2_bitfit(rgd,
1105 bi->bi_bh->b_data + bi->bi_offset,
1106 bi->bi_len, goal, old_state);
1107 if (blk != BFITNOENT)
1108 break;
1109
1110 /* Try next bitmap block (wrap back to rgrp header if at end) */
1111 buf = (buf + 1) % length;
1112 bi = rgd->rd_bits + buf;
1113 goal = 0;
1114 }
1115
1116 if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
1117 blk = 0;
1118
1119 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1120 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1121 bi->bi_len, blk, new_state);
1122 if (bi->bi_clone)
1123 gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
1124 bi->bi_len, blk, new_state);
1125
1126 return bi->bi_start * GFS2_NBBY + blk;
1127}
1128
1129/**
1130 * rgblk_free - Change alloc state of given block(s)
1131 * @sdp: the filesystem
1132 * @bstart: the start of a run of blocks to free
1133 * @blen: the length of the block run (all must lie within ONE RG!)
1134 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1135 *
1136 * Returns: Resource group containing the block(s)
1137 */
1138
1139static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1140 u32 blen, unsigned char new_state)
1141{
1142 struct gfs2_rgrpd *rgd;
1143 struct gfs2_bitmap *bi = NULL;
1144 u32 length, rgrp_blk, buf_blk;
1145 unsigned int buf;
1146
1147 rgd = gfs2_blk2rgrpd(sdp, bstart);
1148 if (!rgd) {
1149 if (gfs2_consist(sdp))
1150 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1151 return NULL;
1152 }
1153
1154 length = rgd->rd_ri.ri_length;
1155
1156 rgrp_blk = bstart - rgd->rd_ri.ri_data0;
1157
1158 while (blen--) {
1159 for (buf = 0; buf < length; buf++) {
1160 bi = rgd->rd_bits + buf;
1161 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1162 break;
1163 }
1164
1165 gfs2_assert(rgd->rd_sbd, buf < length);
1166
1167 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1168 rgrp_blk++;
1169
1170 if (!bi->bi_clone) {
1171 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1172 GFP_NOFS | __GFP_NOFAIL);
1173 memcpy(bi->bi_clone + bi->bi_offset,
1174 bi->bi_bh->b_data + bi->bi_offset,
1175 bi->bi_len);
1176 }
1177 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1178 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1179 bi->bi_len, buf_blk, new_state);
1180 }
1181
1182 return rgd;
1183}
1184
1185/**
1186 * gfs2_alloc_data - Allocate a data block
1187 * @ip: the inode to allocate the data block for
1188 *
1189 * Returns: the allocated block
1190 */
1191
1192u64 gfs2_alloc_data(struct gfs2_inode *ip)
1193{
1194 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1195 struct gfs2_alloc *al = &ip->i_alloc;
1196 struct gfs2_rgrpd *rgd = al->al_rgd;
1197 u32 goal, blk;
1198 u64 block;
1199
1200 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
1201 goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
1202 else
1203 goal = rgd->rd_last_alloc_data;
1204
1205 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1206 rgd->rd_last_alloc_data = blk;
1207
1208 block = rgd->rd_ri.ri_data0 + blk;
1209 ip->i_di.di_goal_data = block;
1210
1211 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1212 rgd->rd_rg.rg_free--;
1213
1214 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1215 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1216
1217 al->al_alloced++;
1218
1219 gfs2_statfs_change(sdp, 0, -1, 0);
1220 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1221
1222 spin_lock(&sdp->sd_rindex_spin);
1223 rgd->rd_free_clone--;
1224 spin_unlock(&sdp->sd_rindex_spin);
1225
1226 return block;
1227}
1228
1229/**
1230 * gfs2_alloc_meta - Allocate a metadata block
1231 * @ip: the inode to allocate the metadata block for
1232 *
1233 * Returns: the allocated block
1234 */
1235
1236u64 gfs2_alloc_meta(struct gfs2_inode *ip)
1237{
1238 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1239 struct gfs2_alloc *al = &ip->i_alloc;
1240 struct gfs2_rgrpd *rgd = al->al_rgd;
1241 u32 goal, blk;
1242 u64 block;
1243
1244 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
1245 goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
1246 else
1247 goal = rgd->rd_last_alloc_meta;
1248
1249 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1250 rgd->rd_last_alloc_meta = blk;
1251
1252 block = rgd->rd_ri.ri_data0 + blk;
1253 ip->i_di.di_goal_meta = block;
1254
1255 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1256 rgd->rd_rg.rg_free--;
1257
1258 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1259 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1260
1261 al->al_alloced++;
1262
1263 gfs2_statfs_change(sdp, 0, -1, 0);
1264 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1265 gfs2_trans_add_unrevoke(sdp, block);
1266
1267 spin_lock(&sdp->sd_rindex_spin);
1268 rgd->rd_free_clone--;
1269 spin_unlock(&sdp->sd_rindex_spin);
1270
1271 return block;
1272}
1273
1274/**
1275 * gfs2_alloc_di - Allocate a dinode
1276 * @dip: the directory that the inode is going in
1277 *
1278 * Returns: the block allocated
1279 */
1280
1281u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1282{
1283 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1284 struct gfs2_alloc *al = &dip->i_alloc;
1285 struct gfs2_rgrpd *rgd = al->al_rgd;
1286 u32 blk;
1287 u64 block;
1288
1289 blk = rgblk_search(rgd, rgd->rd_last_alloc_meta,
1290 GFS2_BLKST_FREE, GFS2_BLKST_DINODE);
1291
1292 rgd->rd_last_alloc_meta = blk;
1293
1294 block = rgd->rd_ri.ri_data0 + blk;
1295
1296 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1297 rgd->rd_rg.rg_free--;
1298 rgd->rd_rg.rg_dinodes++;
1299 *generation = rgd->rd_rg.rg_igeneration++;
1300 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1301 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1302
1303 al->al_alloced++;
1304
1305 gfs2_statfs_change(sdp, 0, -1, +1);
1306 gfs2_trans_add_unrevoke(sdp, block);
1307
1308 spin_lock(&sdp->sd_rindex_spin);
1309 rgd->rd_free_clone--;
1310 spin_unlock(&sdp->sd_rindex_spin);
1311
1312 return block;
1313}
1314
1315/**
1316 * gfs2_free_data - free a contiguous run of data block(s)
1317 * @ip: the inode these blocks are being freed from
1318 * @bstart: first block of a run of contiguous blocks
1319 * @blen: the length of the block run
1320 *
1321 */
1322
1323void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1324{
1325 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1326 struct gfs2_rgrpd *rgd;
1327
1328 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1329 if (!rgd)
1330 return;
1331
1332 rgd->rd_rg.rg_free += blen;
1333
1334 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1335 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1336
1337 gfs2_trans_add_rg(rgd);
1338
1339 gfs2_statfs_change(sdp, 0, +blen, 0);
1340 gfs2_quota_change(ip, -(s64)blen,
1341 ip->i_di.di_uid, ip->i_di.di_gid);
1342}
1343
1344/**
1345 * gfs2_free_meta - free a contiguous run of data block(s)
1346 * @ip: the inode these blocks are being freed from
1347 * @bstart: first block of a run of contiguous blocks
1348 * @blen: the length of the block run
1349 *
1350 */
1351
1352void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1353{
1354 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1355 struct gfs2_rgrpd *rgd;
1356
1357 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1358 if (!rgd)
1359 return;
1360
1361 rgd->rd_rg.rg_free += blen;
1362
1363 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1364 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1365
1366 gfs2_trans_add_rg(rgd);
1367
1368 gfs2_statfs_change(sdp, 0, +blen, 0);
1369 gfs2_quota_change(ip, -(s64)blen, ip->i_di.di_uid, ip->i_di.di_gid);
1370 gfs2_meta_wipe(ip, bstart, blen);
1371}
1372
1373void gfs2_unlink_di(struct inode *inode)
1374{
1375 struct gfs2_inode *ip = GFS2_I(inode);
1376 struct gfs2_sbd *sdp = GFS2_SB(inode);
1377 struct gfs2_rgrpd *rgd;
1378 u64 blkno = ip->i_num.no_addr;
1379
1380 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1381 if (!rgd)
1382 return;
1383 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1384 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1385 gfs2_trans_add_rg(rgd);
1386}
1387
1388static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1389{
1390 struct gfs2_sbd *sdp = rgd->rd_sbd;
1391 struct gfs2_rgrpd *tmp_rgd;
1392
1393 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1394 if (!tmp_rgd)
1395 return;
1396 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1397
1398 if (!rgd->rd_rg.rg_dinodes)
1399 gfs2_consist_rgrpd(rgd);
1400 rgd->rd_rg.rg_dinodes--;
1401 rgd->rd_rg.rg_free++;
1402
1403 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1404 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1405
1406 gfs2_statfs_change(sdp, 0, +1, -1);
1407 gfs2_trans_add_rg(rgd);
1408}
1409
1410
1411void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1412{
1413 gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
1414 gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
1415 gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
1416}
1417
1418/**
1419 * gfs2_rlist_add - add a RG to a list of RGs
1420 * @sdp: the filesystem
1421 * @rlist: the list of resource groups
1422 * @block: the block
1423 *
1424 * Figure out what RG a block belongs to and add that RG to the list
1425 *
1426 * FIXME: Don't use NOFAIL
1427 *
1428 */
1429
1430void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1431 u64 block)
1432{
1433 struct gfs2_rgrpd *rgd;
1434 struct gfs2_rgrpd **tmp;
1435 unsigned int new_space;
1436 unsigned int x;
1437
1438 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1439 return;
1440
1441 rgd = gfs2_blk2rgrpd(sdp, block);
1442 if (!rgd) {
1443 if (gfs2_consist(sdp))
1444 fs_err(sdp, "block = %llu\n", (unsigned long long)block);
1445 return;
1446 }
1447
1448 for (x = 0; x < rlist->rl_rgrps; x++)
1449 if (rlist->rl_rgd[x] == rgd)
1450 return;
1451
1452 if (rlist->rl_rgrps == rlist->rl_space) {
1453 new_space = rlist->rl_space + 10;
1454
1455 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1456 GFP_NOFS | __GFP_NOFAIL);
1457
1458 if (rlist->rl_rgd) {
1459 memcpy(tmp, rlist->rl_rgd,
1460 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1461 kfree(rlist->rl_rgd);
1462 }
1463
1464 rlist->rl_space = new_space;
1465 rlist->rl_rgd = tmp;
1466 }
1467
1468 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1469}
1470
1471/**
1472 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1473 * and initialize an array of glock holders for them
1474 * @rlist: the list of resource groups
1475 * @state: the lock state to acquire the RG lock in
1476 * @flags: the modifier flags for the holder structures
1477 *
1478 * FIXME: Don't use NOFAIL
1479 *
1480 */
1481
1482void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
1483 int flags)
1484{
1485 unsigned int x;
1486
1487 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1488 GFP_NOFS | __GFP_NOFAIL);
1489 for (x = 0; x < rlist->rl_rgrps; x++)
1490 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1491 state, flags,
1492 &rlist->rl_ghs[x]);
1493}
1494
1495/**
1496 * gfs2_rlist_free - free a resource group list
1497 * @list: the list of resource groups
1498 *
1499 */
1500
1501void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1502{
1503 unsigned int x;
1504
1505 kfree(rlist->rl_rgd);
1506
1507 if (rlist->rl_ghs) {
1508 for (x = 0; x < rlist->rl_rgrps; x++)
1509 gfs2_holder_uninit(&rlist->rl_ghs[x]);
1510 kfree(rlist->rl_ghs);
1511 }
1512}
1513
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
new file mode 100644
index 000000000000..9eedfd12bfff
--- /dev/null
+++ b/fs/gfs2/rgrp.h
@@ -0,0 +1,69 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __RGRP_DOT_H__
11#define __RGRP_DOT_H__
12
13struct gfs2_rgrpd;
14struct gfs2_sbd;
15struct gfs2_holder;
16
17void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
18
19struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk);
20struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
21struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
22
23void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
24int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
25
26int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
27void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
28void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
29
30void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
31
32struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
33static inline void gfs2_alloc_put(struct gfs2_inode *ip)
34{
35 return; /* Se we can see where ip->i_alloc is used */
36}
37
38int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
39 char *file, unsigned int line);
40#define gfs2_inplace_reserve(ip) \
41gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
42
43void gfs2_inplace_release(struct gfs2_inode *ip);
44
45unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
46
47u64 gfs2_alloc_data(struct gfs2_inode *ip);
48u64 gfs2_alloc_meta(struct gfs2_inode *ip);
49u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
50
51void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
52void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
53void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
54void gfs2_unlink_di(struct inode *inode);
55
56struct gfs2_rgrp_list {
57 unsigned int rl_rgrps;
58 unsigned int rl_space;
59 struct gfs2_rgrpd **rl_rgd;
60 struct gfs2_holder *rl_ghs;
61};
62
63void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
64 u64 block);
65void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
66 int flags);
67void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
68
69#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
new file mode 100644
index 000000000000..6a78b1b32e25
--- /dev/null
+++ b/fs/gfs2/super.c
@@ -0,0 +1,976 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/bio.h>
18#include <linux/lm_interface.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "bmap.h"
23#include "dir.h"
24#include "glock.h"
25#include "glops.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "recovery.h"
31#include "rgrp.h"
32#include "super.h"
33#include "trans.h"
34#include "util.h"
35
36static const u32 gfs2_old_fs_formats[] = {
37 0
38};
39
40static const u32 gfs2_old_multihost_formats[] = {
41 0
42};
43
44/**
45 * gfs2_tune_init - Fill a gfs2_tune structure with default values
46 * @gt: tune
47 *
48 */
49
50void gfs2_tune_init(struct gfs2_tune *gt)
51{
52 spin_lock_init(&gt->gt_spin);
53
54 gt->gt_ilimit = 100;
55 gt->gt_ilimit_tries = 3;
56 gt->gt_ilimit_min = 1;
57 gt->gt_demote_secs = 300;
58 gt->gt_incore_log_blocks = 1024;
59 gt->gt_log_flush_secs = 60;
60 gt->gt_jindex_refresh_secs = 60;
61 gt->gt_scand_secs = 15;
62 gt->gt_recoverd_secs = 60;
63 gt->gt_logd_secs = 1;
64 gt->gt_quotad_secs = 5;
65 gt->gt_quota_simul_sync = 64;
66 gt->gt_quota_warn_period = 10;
67 gt->gt_quota_scale_num = 1;
68 gt->gt_quota_scale_den = 1;
69 gt->gt_quota_cache_secs = 300;
70 gt->gt_quota_quantum = 60;
71 gt->gt_atime_quantum = 3600;
72 gt->gt_new_files_jdata = 0;
73 gt->gt_new_files_directio = 0;
74 gt->gt_max_atomic_write = 4 << 20;
75 gt->gt_max_readahead = 1 << 18;
76 gt->gt_lockdump_size = 131072;
77 gt->gt_stall_secs = 600;
78 gt->gt_complain_secs = 10;
79 gt->gt_reclaim_limit = 5000;
80 gt->gt_entries_per_readdir = 32;
81 gt->gt_prefetch_secs = 10;
82 gt->gt_greedy_default = HZ / 10;
83 gt->gt_greedy_quantum = HZ / 40;
84 gt->gt_greedy_max = HZ / 4;
85 gt->gt_statfs_quantum = 30;
86 gt->gt_statfs_slow = 0;
87}
88
89/**
90 * gfs2_check_sb - Check superblock
91 * @sdp: the filesystem
92 * @sb: The superblock
93 * @silent: Don't print a message if the check fails
94 *
95 * Checks the version code of the FS is one that we understand how to
96 * read and that the sizes of the various on-disk structures have not
97 * changed.
98 */
99
100int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
101{
102 unsigned int x;
103
104 if (sb->sb_header.mh_magic != GFS2_MAGIC ||
105 sb->sb_header.mh_type != GFS2_METATYPE_SB) {
106 if (!silent)
107 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
108 return -EINVAL;
109 }
110
111 /* If format numbers match exactly, we're done. */
112
113 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
114 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
115 return 0;
116
117 if (sb->sb_fs_format != GFS2_FORMAT_FS) {
118 for (x = 0; gfs2_old_fs_formats[x]; x++)
119 if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
120 break;
121
122 if (!gfs2_old_fs_formats[x]) {
123 printk(KERN_WARNING
124 "GFS2: code version (%u, %u) is incompatible "
125 "with ondisk format (%u, %u)\n",
126 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
127 sb->sb_fs_format, sb->sb_multihost_format);
128 printk(KERN_WARNING
129 "GFS2: I don't know how to upgrade this FS\n");
130 return -EINVAL;
131 }
132 }
133
134 if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
135 for (x = 0; gfs2_old_multihost_formats[x]; x++)
136 if (gfs2_old_multihost_formats[x] ==
137 sb->sb_multihost_format)
138 break;
139
140 if (!gfs2_old_multihost_formats[x]) {
141 printk(KERN_WARNING
142 "GFS2: code version (%u, %u) is incompatible "
143 "with ondisk format (%u, %u)\n",
144 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
145 sb->sb_fs_format, sb->sb_multihost_format);
146 printk(KERN_WARNING
147 "GFS2: I don't know how to upgrade this FS\n");
148 return -EINVAL;
149 }
150 }
151
152 if (!sdp->sd_args.ar_upgrade) {
153 printk(KERN_WARNING
154 "GFS2: code version (%u, %u) is incompatible "
155 "with ondisk format (%u, %u)\n",
156 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
157 sb->sb_fs_format, sb->sb_multihost_format);
158 printk(KERN_INFO
159 "GFS2: Use the \"upgrade\" mount option to upgrade "
160 "the FS\n");
161 printk(KERN_INFO "GFS2: See the manual for more details\n");
162 return -EINVAL;
163 }
164
165 return 0;
166}
167
168
169static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
170{
171 struct page *page = bio->bi_private;
172 if (bio->bi_size)
173 return 1;
174
175 if (!error)
176 SetPageUptodate(page);
177 else
178 printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
179 unlock_page(page);
180 return 0;
181}
182
183struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
184{
185 struct page *page;
186 struct bio *bio;
187
188 page = alloc_page(GFP_KERNEL);
189 if (unlikely(!page))
190 return NULL;
191
192 ClearPageUptodate(page);
193 ClearPageDirty(page);
194 lock_page(page);
195
196 bio = bio_alloc(GFP_KERNEL, 1);
197 if (unlikely(!bio)) {
198 __free_page(page);
199 return NULL;
200 }
201
202 bio->bi_sector = sector;
203 bio->bi_bdev = sb->s_bdev;
204 bio_add_page(bio, page, PAGE_SIZE, 0);
205
206 bio->bi_end_io = end_bio_io_page;
207 bio->bi_private = page;
208 submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
209 wait_on_page_locked(page);
210 bio_put(bio);
211 if (!PageUptodate(page)) {
212 __free_page(page);
213 return NULL;
214 }
215 return page;
216}
217
218/**
219 * gfs2_read_sb - Read super block
220 * @sdp: The GFS2 superblock
221 * @gl: the glock for the superblock (assumed to be held)
222 * @silent: Don't print message if mount fails
223 *
224 */
225
226int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
227{
228 u32 hash_blocks, ind_blocks, leaf_blocks;
229 u32 tmp_blocks;
230 unsigned int x;
231 int error;
232 struct page *page;
233 char *sb;
234
235 page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
236 if (!page) {
237 if (!silent)
238 fs_err(sdp, "can't read superblock\n");
239 return -EIO;
240 }
241 sb = kmap(page);
242 gfs2_sb_in(&sdp->sd_sb, sb);
243 kunmap(page);
244 __free_page(page);
245
246 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
247 if (error)
248 return error;
249
250 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
251 GFS2_BASIC_BLOCK_SHIFT;
252 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
253 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
254 sizeof(struct gfs2_dinode)) / sizeof(u64);
255 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
256 sizeof(struct gfs2_meta_header)) / sizeof(u64);
257 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
258 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
259 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
260 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
261 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
262 sizeof(struct gfs2_meta_header)) /
263 sizeof(struct gfs2_quota_change);
264
265 /* Compute maximum reservation required to add a entry to a directory */
266
267 hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
268 sdp->sd_jbsize);
269
270 ind_blocks = 0;
271 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
272 tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
273 ind_blocks += tmp_blocks;
274 }
275
276 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
277
278 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
279
280 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
281 sizeof(struct gfs2_dinode);
282 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
283 for (x = 2;; x++) {
284 u64 space, d;
285 u32 m;
286
287 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
288 d = space;
289 m = do_div(d, sdp->sd_inptrs);
290
291 if (d != sdp->sd_heightsize[x - 1] || m)
292 break;
293 sdp->sd_heightsize[x] = space;
294 }
295 sdp->sd_max_height = x;
296 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
297
298 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
299 sizeof(struct gfs2_dinode);
300 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
301 for (x = 2;; x++) {
302 u64 space, d;
303 u32 m;
304
305 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
306 d = space;
307 m = do_div(d, sdp->sd_inptrs);
308
309 if (d != sdp->sd_jheightsize[x - 1] || m)
310 break;
311 sdp->sd_jheightsize[x] = space;
312 }
313 sdp->sd_max_jheight = x;
314 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
315
316 return 0;
317}
318
319/**
320 * gfs2_jindex_hold - Grab a lock on the jindex
321 * @sdp: The GFS2 superblock
322 * @ji_gh: the holder for the jindex glock
323 *
324 * This is very similar to the gfs2_rindex_hold() function, except that
325 * in general we hold the jindex lock for longer periods of time and
326 * we grab it far less frequently (in general) then the rgrp lock.
327 *
328 * Returns: errno
329 */
330
331int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
332{
333 struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
334 struct qstr name;
335 char buf[20];
336 struct gfs2_jdesc *jd;
337 int error;
338
339 name.name = buf;
340
341 mutex_lock(&sdp->sd_jindex_mutex);
342
343 for (;;) {
344 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
345 GL_LOCAL_EXCL, ji_gh);
346 if (error)
347 break;
348
349 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
350 name.hash = gfs2_disk_hash(name.name, name.len);
351
352 error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
353 if (error == -ENOENT) {
354 error = 0;
355 break;
356 }
357
358 gfs2_glock_dq_uninit(ji_gh);
359
360 if (error)
361 break;
362
363 error = -ENOMEM;
364 jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
365 if (!jd)
366 break;
367
368 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
369 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
370 if (!jd->jd_inode)
371 error = -ENOENT;
372 else
373 error = PTR_ERR(jd->jd_inode);
374 kfree(jd);
375 break;
376 }
377
378 spin_lock(&sdp->sd_jindex_spin);
379 jd->jd_jid = sdp->sd_journals++;
380 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
381 spin_unlock(&sdp->sd_jindex_spin);
382 }
383
384 mutex_unlock(&sdp->sd_jindex_mutex);
385
386 return error;
387}
388
389/**
390 * gfs2_jindex_free - Clear all the journal index information
391 * @sdp: The GFS2 superblock
392 *
393 */
394
395void gfs2_jindex_free(struct gfs2_sbd *sdp)
396{
397 struct list_head list;
398 struct gfs2_jdesc *jd;
399
400 spin_lock(&sdp->sd_jindex_spin);
401 list_add(&list, &sdp->sd_jindex_list);
402 list_del_init(&sdp->sd_jindex_list);
403 sdp->sd_journals = 0;
404 spin_unlock(&sdp->sd_jindex_spin);
405
406 while (!list_empty(&list)) {
407 jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
408 list_del(&jd->jd_list);
409 iput(jd->jd_inode);
410 kfree(jd);
411 }
412}
413
414static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
415{
416 struct gfs2_jdesc *jd;
417 int found = 0;
418
419 list_for_each_entry(jd, head, jd_list) {
420 if (jd->jd_jid == jid) {
421 found = 1;
422 break;
423 }
424 }
425
426 if (!found)
427 jd = NULL;
428
429 return jd;
430}
431
432struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
433{
434 struct gfs2_jdesc *jd;
435
436 spin_lock(&sdp->sd_jindex_spin);
437 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
438 spin_unlock(&sdp->sd_jindex_spin);
439
440 return jd;
441}
442
443void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
444{
445 struct gfs2_jdesc *jd;
446
447 spin_lock(&sdp->sd_jindex_spin);
448 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
449 if (jd)
450 jd->jd_dirty = 1;
451 spin_unlock(&sdp->sd_jindex_spin);
452}
453
454struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
455{
456 struct gfs2_jdesc *jd;
457 int found = 0;
458
459 spin_lock(&sdp->sd_jindex_spin);
460
461 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
462 if (jd->jd_dirty) {
463 jd->jd_dirty = 0;
464 found = 1;
465 break;
466 }
467 }
468 spin_unlock(&sdp->sd_jindex_spin);
469
470 if (!found)
471 jd = NULL;
472
473 return jd;
474}
475
476int gfs2_jdesc_check(struct gfs2_jdesc *jd)
477{
478 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
479 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
480 int ar;
481 int error;
482
483 if (ip->i_di.di_size < (8 << 20) || ip->i_di.di_size > (1 << 30) ||
484 (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
485 gfs2_consist_inode(ip);
486 return -EIO;
487 }
488 jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
489
490 error = gfs2_write_alloc_required(ip, 0, ip->i_di.di_size, &ar);
491 if (!error && ar) {
492 gfs2_consist_inode(ip);
493 error = -EIO;
494 }
495
496 return error;
497}
498
499/**
500 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
501 * @sdp: the filesystem
502 *
503 * Returns: errno
504 */
505
506int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
507{
508 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
509 struct gfs2_glock *j_gl = ip->i_gl;
510 struct gfs2_holder t_gh;
511 struct gfs2_log_header head;
512 int error;
513
514 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
515 GL_LOCAL_EXCL, &t_gh);
516 if (error)
517 return error;
518
519 gfs2_meta_cache_flush(ip);
520 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
521
522 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
523 if (error)
524 goto fail;
525
526 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
527 gfs2_consist(sdp);
528 error = -EIO;
529 goto fail;
530 }
531
532 /* Initialize some head of the log stuff */
533 sdp->sd_log_sequence = head.lh_sequence + 1;
534 gfs2_log_pointers_init(sdp, head.lh_blkno);
535
536 error = gfs2_quota_init(sdp);
537 if (error)
538 goto fail;
539
540 set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
541
542 gfs2_glock_dq_uninit(&t_gh);
543
544 return 0;
545
546fail:
547 t_gh.gh_flags |= GL_NOCACHE;
548 gfs2_glock_dq_uninit(&t_gh);
549
550 return error;
551}
552
553/**
554 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
555 * @sdp: the filesystem
556 *
557 * Returns: errno
558 */
559
560int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
561{
562 struct gfs2_holder t_gh;
563 int error;
564
565 gfs2_quota_sync(sdp);
566 gfs2_statfs_sync(sdp);
567
568 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
569 GL_LOCAL_EXCL | GL_NOCACHE,
570 &t_gh);
571 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
572 return error;
573
574 gfs2_meta_syncfs(sdp);
575 gfs2_log_shutdown(sdp);
576
577 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
578
579 if (t_gh.gh_gl)
580 gfs2_glock_dq_uninit(&t_gh);
581
582 gfs2_quota_cleanup(sdp);
583
584 return error;
585}
586
587int gfs2_statfs_init(struct gfs2_sbd *sdp)
588{
589 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
590 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
591 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
592 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
593 struct buffer_head *m_bh, *l_bh;
594 struct gfs2_holder gh;
595 int error;
596
597 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
598 &gh);
599 if (error)
600 return error;
601
602 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
603 if (error)
604 goto out;
605
606 if (sdp->sd_args.ar_spectator) {
607 spin_lock(&sdp->sd_statfs_spin);
608 gfs2_statfs_change_in(m_sc, m_bh->b_data +
609 sizeof(struct gfs2_dinode));
610 spin_unlock(&sdp->sd_statfs_spin);
611 } else {
612 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
613 if (error)
614 goto out_m_bh;
615
616 spin_lock(&sdp->sd_statfs_spin);
617 gfs2_statfs_change_in(m_sc, m_bh->b_data +
618 sizeof(struct gfs2_dinode));
619 gfs2_statfs_change_in(l_sc, l_bh->b_data +
620 sizeof(struct gfs2_dinode));
621 spin_unlock(&sdp->sd_statfs_spin);
622
623 brelse(l_bh);
624 }
625
626out_m_bh:
627 brelse(m_bh);
628out:
629 gfs2_glock_dq_uninit(&gh);
630 return 0;
631}
632
633void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
634 s64 dinodes)
635{
636 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
637 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
638 struct buffer_head *l_bh;
639 int error;
640
641 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
642 if (error)
643 return;
644
645 mutex_lock(&sdp->sd_statfs_mutex);
646 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
647 mutex_unlock(&sdp->sd_statfs_mutex);
648
649 spin_lock(&sdp->sd_statfs_spin);
650 l_sc->sc_total += total;
651 l_sc->sc_free += free;
652 l_sc->sc_dinodes += dinodes;
653 gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
654 spin_unlock(&sdp->sd_statfs_spin);
655
656 brelse(l_bh);
657}
658
659int gfs2_statfs_sync(struct gfs2_sbd *sdp)
660{
661 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
662 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
663 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
664 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
665 struct gfs2_holder gh;
666 struct buffer_head *m_bh, *l_bh;
667 int error;
668
669 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
670 &gh);
671 if (error)
672 return error;
673
674 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
675 if (error)
676 goto out;
677
678 spin_lock(&sdp->sd_statfs_spin);
679 gfs2_statfs_change_in(m_sc, m_bh->b_data +
680 sizeof(struct gfs2_dinode));
681 if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
682 spin_unlock(&sdp->sd_statfs_spin);
683 goto out_bh;
684 }
685 spin_unlock(&sdp->sd_statfs_spin);
686
687 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
688 if (error)
689 goto out_bh;
690
691 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
692 if (error)
693 goto out_bh2;
694
695 mutex_lock(&sdp->sd_statfs_mutex);
696 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
697 mutex_unlock(&sdp->sd_statfs_mutex);
698
699 spin_lock(&sdp->sd_statfs_spin);
700 m_sc->sc_total += l_sc->sc_total;
701 m_sc->sc_free += l_sc->sc_free;
702 m_sc->sc_dinodes += l_sc->sc_dinodes;
703 memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
704 memset(l_bh->b_data + sizeof(struct gfs2_dinode),
705 0, sizeof(struct gfs2_statfs_change));
706 spin_unlock(&sdp->sd_statfs_spin);
707
708 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
709 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
710
711 gfs2_trans_end(sdp);
712
713out_bh2:
714 brelse(l_bh);
715out_bh:
716 brelse(m_bh);
717out:
718 gfs2_glock_dq_uninit(&gh);
719 return error;
720}
721
722/**
723 * gfs2_statfs_i - Do a statfs
724 * @sdp: the filesystem
725 * @sg: the sg structure
726 *
727 * Returns: errno
728 */
729
730int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
731{
732 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
733 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
734
735 spin_lock(&sdp->sd_statfs_spin);
736
737 *sc = *m_sc;
738 sc->sc_total += l_sc->sc_total;
739 sc->sc_free += l_sc->sc_free;
740 sc->sc_dinodes += l_sc->sc_dinodes;
741
742 spin_unlock(&sdp->sd_statfs_spin);
743
744 if (sc->sc_free < 0)
745 sc->sc_free = 0;
746 if (sc->sc_free > sc->sc_total)
747 sc->sc_free = sc->sc_total;
748 if (sc->sc_dinodes < 0)
749 sc->sc_dinodes = 0;
750
751 return 0;
752}
753
754/**
755 * statfs_fill - fill in the sg for a given RG
756 * @rgd: the RG
757 * @sc: the sc structure
758 *
759 * Returns: 0 on success, -ESTALE if the LVB is invalid
760 */
761
762static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
763 struct gfs2_statfs_change *sc)
764{
765 gfs2_rgrp_verify(rgd);
766 sc->sc_total += rgd->rd_ri.ri_data;
767 sc->sc_free += rgd->rd_rg.rg_free;
768 sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
769 return 0;
770}
771
772/**
773 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
774 * @sdp: the filesystem
775 * @sc: the sc info that will be returned
776 *
777 * Any error (other than a signal) will cause this routine to fall back
778 * to the synchronous version.
779 *
780 * FIXME: This really shouldn't busy wait like this.
781 *
782 * Returns: errno
783 */
784
785int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
786{
787 struct gfs2_holder ri_gh;
788 struct gfs2_rgrpd *rgd_next;
789 struct gfs2_holder *gha, *gh;
790 unsigned int slots = 64;
791 unsigned int x;
792 int done;
793 int error = 0, err;
794
795 memset(sc, 0, sizeof(struct gfs2_statfs_change));
796 gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
797 if (!gha)
798 return -ENOMEM;
799
800 error = gfs2_rindex_hold(sdp, &ri_gh);
801 if (error)
802 goto out;
803
804 rgd_next = gfs2_rgrpd_get_first(sdp);
805
806 for (;;) {
807 done = 1;
808
809 for (x = 0; x < slots; x++) {
810 gh = gha + x;
811
812 if (gh->gh_gl && gfs2_glock_poll(gh)) {
813 err = gfs2_glock_wait(gh);
814 if (err) {
815 gfs2_holder_uninit(gh);
816 error = err;
817 } else {
818 if (!error)
819 error = statfs_slow_fill(
820 gh->gh_gl->gl_object, sc);
821 gfs2_glock_dq_uninit(gh);
822 }
823 }
824
825 if (gh->gh_gl)
826 done = 0;
827 else if (rgd_next && !error) {
828 error = gfs2_glock_nq_init(rgd_next->rd_gl,
829 LM_ST_SHARED,
830 GL_ASYNC,
831 gh);
832 rgd_next = gfs2_rgrpd_get_next(rgd_next);
833 done = 0;
834 }
835
836 if (signal_pending(current))
837 error = -ERESTARTSYS;
838 }
839
840 if (done)
841 break;
842
843 yield();
844 }
845
846 gfs2_glock_dq_uninit(&ri_gh);
847
848out:
849 kfree(gha);
850 return error;
851}
852
853struct lfcc {
854 struct list_head list;
855 struct gfs2_holder gh;
856};
857
858/**
859 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
860 * journals are clean
861 * @sdp: the file system
862 * @state: the state to put the transaction lock into
863 * @t_gh: the hold on the transaction lock
864 *
865 * Returns: errno
866 */
867
868static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
869 struct gfs2_holder *t_gh)
870{
871 struct gfs2_inode *ip;
872 struct gfs2_holder ji_gh;
873 struct gfs2_jdesc *jd;
874 struct lfcc *lfcc;
875 LIST_HEAD(list);
876 struct gfs2_log_header lh;
877 int error;
878
879 error = gfs2_jindex_hold(sdp, &ji_gh);
880 if (error)
881 return error;
882
883 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
884 lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
885 if (!lfcc) {
886 error = -ENOMEM;
887 goto out;
888 }
889 ip = GFS2_I(jd->jd_inode);
890 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
891 if (error) {
892 kfree(lfcc);
893 goto out;
894 }
895 list_add(&lfcc->list, &list);
896 }
897
898 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
899 LM_FLAG_PRIORITY | GL_NOCACHE,
900 t_gh);
901
902 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
903 error = gfs2_jdesc_check(jd);
904 if (error)
905 break;
906 error = gfs2_find_jhead(jd, &lh);
907 if (error)
908 break;
909 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
910 error = -EBUSY;
911 break;
912 }
913 }
914
915 if (error)
916 gfs2_glock_dq_uninit(t_gh);
917
918out:
919 while (!list_empty(&list)) {
920 lfcc = list_entry(list.next, struct lfcc, list);
921 list_del(&lfcc->list);
922 gfs2_glock_dq_uninit(&lfcc->gh);
923 kfree(lfcc);
924 }
925 gfs2_glock_dq_uninit(&ji_gh);
926 return error;
927}
928
929/**
930 * gfs2_freeze_fs - freezes the file system
931 * @sdp: the file system
932 *
933 * This function flushes data and meta data for all machines by
934 * aquiring the transaction log exclusively. All journals are
935 * ensured to be in a clean state as well.
936 *
937 * Returns: errno
938 */
939
940int gfs2_freeze_fs(struct gfs2_sbd *sdp)
941{
942 int error = 0;
943
944 mutex_lock(&sdp->sd_freeze_lock);
945
946 if (!sdp->sd_freeze_count++) {
947 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
948 if (error)
949 sdp->sd_freeze_count--;
950 }
951
952 mutex_unlock(&sdp->sd_freeze_lock);
953
954 return error;
955}
956
957/**
958 * gfs2_unfreeze_fs - unfreezes the file system
959 * @sdp: the file system
960 *
961 * This function allows the file system to proceed by unlocking
962 * the exclusively held transaction lock. Other GFS2 nodes are
963 * now free to acquire the lock shared and go on with their lives.
964 *
965 */
966
967void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
968{
969 mutex_lock(&sdp->sd_freeze_lock);
970
971 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
972 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
973
974 mutex_unlock(&sdp->sd_freeze_lock);
975}
976
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
new file mode 100644
index 000000000000..5bb443ae0f59
--- /dev/null
+++ b/fs/gfs2/super.h
@@ -0,0 +1,55 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __SUPER_DOT_H__
11#define __SUPER_DOT_H__
12
13#include "incore.h"
14
15void gfs2_tune_init(struct gfs2_tune *gt);
16
17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
19struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
20
21static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
22{
23 unsigned int x;
24 spin_lock(&sdp->sd_jindex_spin);
25 x = sdp->sd_journals;
26 spin_unlock(&sdp->sd_jindex_spin);
27 return x;
28}
29
30int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
31void gfs2_jindex_free(struct gfs2_sbd *sdp);
32
33struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
34void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
35struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
36int gfs2_jdesc_check(struct gfs2_jdesc *jd);
37
38int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
39 struct gfs2_inode **ipp);
40
41int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
42int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
43
44int gfs2_statfs_init(struct gfs2_sbd *sdp);
45void gfs2_statfs_change(struct gfs2_sbd *sdp,
46 s64 total, s64 free, s64 dinodes);
47int gfs2_statfs_sync(struct gfs2_sbd *sdp);
48int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
49int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
50
51int gfs2_freeze_fs(struct gfs2_sbd *sdp);
52void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
53
54#endif /* __SUPER_DOT_H__ */
55
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
new file mode 100644
index 000000000000..0e0ec988f731
--- /dev/null
+++ b/fs/gfs2/sys.c
@@ -0,0 +1,583 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/kobject.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19#include <asm/uaccess.h>
20
21#include "gfs2.h"
22#include "incore.h"
23#include "lm.h"
24#include "sys.h"
25#include "super.h"
26#include "glock.h"
27#include "quota.h"
28#include "util.h"
29
30char *gfs2_sys_margs;
31spinlock_t gfs2_sys_margs_lock;
32
33static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
34{
35 return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id);
36}
37
38static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
39{
40 return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
41}
42
43static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
44{
45 unsigned int count;
46
47 mutex_lock(&sdp->sd_freeze_lock);
48 count = sdp->sd_freeze_count;
49 mutex_unlock(&sdp->sd_freeze_lock);
50
51 return snprintf(buf, PAGE_SIZE, "%u\n", count);
52}
53
54static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
55{
56 ssize_t ret = len;
57 int error = 0;
58 int n = simple_strtol(buf, NULL, 0);
59
60 if (!capable(CAP_SYS_ADMIN))
61 return -EACCES;
62
63 switch (n) {
64 case 0:
65 gfs2_unfreeze_fs(sdp);
66 break;
67 case 1:
68 error = gfs2_freeze_fs(sdp);
69 break;
70 default:
71 ret = -EINVAL;
72 }
73
74 if (error)
75 fs_warn(sdp, "freeze %d error %d", n, error);
76
77 return ret;
78}
79
80static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
81{
82 unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
83 return snprintf(buf, PAGE_SIZE, "%u\n", b);
84}
85
86static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
87{
88 if (!capable(CAP_SYS_ADMIN))
89 return -EACCES;
90
91 if (simple_strtol(buf, NULL, 0) != 1)
92 return -EINVAL;
93
94 gfs2_lm_withdraw(sdp,
95 "GFS2: fsid=%s: withdrawing from cluster at user's request\n",
96 sdp->sd_fsname);
97 return len;
98}
99
100static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
101 size_t len)
102{
103 if (!capable(CAP_SYS_ADMIN))
104 return -EACCES;
105
106 if (simple_strtol(buf, NULL, 0) != 1)
107 return -EINVAL;
108
109 gfs2_statfs_sync(sdp);
110 return len;
111}
112
113static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
114{
115 if (!capable(CAP_SYS_ADMIN))
116 return -EACCES;
117
118 if (simple_strtol(buf, NULL, 0) != 1)
119 return -EINVAL;
120
121 gfs2_gl_hash_clear(sdp, NO_WAIT);
122 return len;
123}
124
125static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
126 size_t len)
127{
128 if (!capable(CAP_SYS_ADMIN))
129 return -EACCES;
130
131 if (simple_strtol(buf, NULL, 0) != 1)
132 return -EINVAL;
133
134 gfs2_quota_sync(sdp);
135 return len;
136}
137
138static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
139 size_t len)
140{
141 u32 id;
142
143 if (!capable(CAP_SYS_ADMIN))
144 return -EACCES;
145
146 id = simple_strtoul(buf, NULL, 0);
147
148 gfs2_quota_refresh(sdp, 1, id);
149 return len;
150}
151
152static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
153 size_t len)
154{
155 u32 id;
156
157 if (!capable(CAP_SYS_ADMIN))
158 return -EACCES;
159
160 id = simple_strtoul(buf, NULL, 0);
161
162 gfs2_quota_refresh(sdp, 0, id);
163 return len;
164}
165
166struct gfs2_attr {
167 struct attribute attr;
168 ssize_t (*show)(struct gfs2_sbd *, char *);
169 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
170};
171
172#define GFS2_ATTR(name, mode, show, store) \
173static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
174
175GFS2_ATTR(id, 0444, id_show, NULL);
176GFS2_ATTR(fsname, 0444, fsname_show, NULL);
177GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
178GFS2_ATTR(shrink, 0200, NULL, shrink_store);
179GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
180GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
181GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
182GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
183GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
184
185static struct attribute *gfs2_attrs[] = {
186 &gfs2_attr_id.attr,
187 &gfs2_attr_fsname.attr,
188 &gfs2_attr_freeze.attr,
189 &gfs2_attr_shrink.attr,
190 &gfs2_attr_withdraw.attr,
191 &gfs2_attr_statfs_sync.attr,
192 &gfs2_attr_quota_sync.attr,
193 &gfs2_attr_quota_refresh_user.attr,
194 &gfs2_attr_quota_refresh_group.attr,
195 NULL,
196};
197
198static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
199 char *buf)
200{
201 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
202 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
203 return a->show ? a->show(sdp, buf) : 0;
204}
205
206static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
207 const char *buf, size_t len)
208{
209 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
210 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
211 return a->store ? a->store(sdp, buf, len) : len;
212}
213
214static struct sysfs_ops gfs2_attr_ops = {
215 .show = gfs2_attr_show,
216 .store = gfs2_attr_store,
217};
218
219static struct kobj_type gfs2_ktype = {
220 .default_attrs = gfs2_attrs,
221 .sysfs_ops = &gfs2_attr_ops,
222};
223
224static struct kset gfs2_kset = {
225 .subsys = &fs_subsys,
226 .kobj = {.name = "gfs2"},
227 .ktype = &gfs2_ktype,
228};
229
230/*
231 * display struct lm_lockstruct fields
232 */
233
234struct lockstruct_attr {
235 struct attribute attr;
236 ssize_t (*show)(struct gfs2_sbd *, char *);
237};
238
239#define LOCKSTRUCT_ATTR(name, fmt) \
240static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
241{ \
242 return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \
243} \
244static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
245
246LOCKSTRUCT_ATTR(jid, "%u\n");
247LOCKSTRUCT_ATTR(first, "%u\n");
248LOCKSTRUCT_ATTR(lvb_size, "%u\n");
249LOCKSTRUCT_ATTR(flags, "%d\n");
250
251static struct attribute *lockstruct_attrs[] = {
252 &lockstruct_attr_jid.attr,
253 &lockstruct_attr_first.attr,
254 &lockstruct_attr_lvb_size.attr,
255 &lockstruct_attr_flags.attr,
256 NULL,
257};
258
259/*
260 * display struct gfs2_args fields
261 */
262
263struct args_attr {
264 struct attribute attr;
265 ssize_t (*show)(struct gfs2_sbd *, char *);
266};
267
268#define ARGS_ATTR(name, fmt) \
269static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
270{ \
271 return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \
272} \
273static struct args_attr args_attr_##name = __ATTR_RO(name)
274
275ARGS_ATTR(lockproto, "%s\n");
276ARGS_ATTR(locktable, "%s\n");
277ARGS_ATTR(hostdata, "%s\n");
278ARGS_ATTR(spectator, "%d\n");
279ARGS_ATTR(ignore_local_fs, "%d\n");
280ARGS_ATTR(localcaching, "%d\n");
281ARGS_ATTR(localflocks, "%d\n");
282ARGS_ATTR(debug, "%d\n");
283ARGS_ATTR(upgrade, "%d\n");
284ARGS_ATTR(num_glockd, "%u\n");
285ARGS_ATTR(posix_acl, "%d\n");
286ARGS_ATTR(quota, "%u\n");
287ARGS_ATTR(suiddir, "%d\n");
288ARGS_ATTR(data, "%d\n");
289
290/* one oddball doesn't fit the macro mold */
291static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
292{
293 return snprintf(buf, PAGE_SIZE, "%d\n",
294 !!test_bit(SDF_NOATIME, &sdp->sd_flags));
295}
296static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
297
298static struct attribute *args_attrs[] = {
299 &args_attr_lockproto.attr,
300 &args_attr_locktable.attr,
301 &args_attr_hostdata.attr,
302 &args_attr_spectator.attr,
303 &args_attr_ignore_local_fs.attr,
304 &args_attr_localcaching.attr,
305 &args_attr_localflocks.attr,
306 &args_attr_debug.attr,
307 &args_attr_upgrade.attr,
308 &args_attr_num_glockd.attr,
309 &args_attr_posix_acl.attr,
310 &args_attr_quota.attr,
311 &args_attr_suiddir.attr,
312 &args_attr_data.attr,
313 &args_attr_noatime.attr,
314 NULL,
315};
316
317/*
318 * display counters from superblock
319 */
320
321struct counters_attr {
322 struct attribute attr;
323 ssize_t (*show)(struct gfs2_sbd *, char *);
324};
325
326#define COUNTERS_ATTR(name, fmt) \
327static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
328{ \
329 return snprintf(buf, PAGE_SIZE, fmt, \
330 (unsigned int)atomic_read(&sdp->sd_##name)); \
331} \
332static struct counters_attr counters_attr_##name = __ATTR_RO(name)
333
334COUNTERS_ATTR(glock_count, "%u\n");
335COUNTERS_ATTR(glock_held_count, "%u\n");
336COUNTERS_ATTR(inode_count, "%u\n");
337COUNTERS_ATTR(reclaimed, "%u\n");
338
339static struct attribute *counters_attrs[] = {
340 &counters_attr_glock_count.attr,
341 &counters_attr_glock_held_count.attr,
342 &counters_attr_inode_count.attr,
343 &counters_attr_reclaimed.attr,
344 NULL,
345};
346
347/*
348 * get and set struct gfs2_tune fields
349 */
350
351static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
352{
353 return snprintf(buf, PAGE_SIZE, "%u %u\n",
354 sdp->sd_tune.gt_quota_scale_num,
355 sdp->sd_tune.gt_quota_scale_den);
356}
357
358static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
359 size_t len)
360{
361 struct gfs2_tune *gt = &sdp->sd_tune;
362 unsigned int x, y;
363
364 if (!capable(CAP_SYS_ADMIN))
365 return -EACCES;
366
367 if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
368 return -EINVAL;
369
370 spin_lock(&gt->gt_spin);
371 gt->gt_quota_scale_num = x;
372 gt->gt_quota_scale_den = y;
373 spin_unlock(&gt->gt_spin);
374 return len;
375}
376
377static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
378 int check_zero, const char *buf, size_t len)
379{
380 struct gfs2_tune *gt = &sdp->sd_tune;
381 unsigned int x;
382
383 if (!capable(CAP_SYS_ADMIN))
384 return -EACCES;
385
386 x = simple_strtoul(buf, NULL, 0);
387
388 if (check_zero && !x)
389 return -EINVAL;
390
391 spin_lock(&gt->gt_spin);
392 *field = x;
393 spin_unlock(&gt->gt_spin);
394 return len;
395}
396
397struct tune_attr {
398 struct attribute attr;
399 ssize_t (*show)(struct gfs2_sbd *, char *);
400 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
401};
402
403#define TUNE_ATTR_3(name, show, store) \
404static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
405
406#define TUNE_ATTR_2(name, store) \
407static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
408{ \
409 return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \
410} \
411TUNE_ATTR_3(name, name##_show, store)
412
413#define TUNE_ATTR(name, check_zero) \
414static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
415{ \
416 return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
417} \
418TUNE_ATTR_2(name, name##_store)
419
420#define TUNE_ATTR_DAEMON(name, process) \
421static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
422{ \
423 ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \
424 wake_up_process(sdp->sd_##process); \
425 return r; \
426} \
427TUNE_ATTR_2(name, name##_store)
428
429TUNE_ATTR(ilimit, 0);
430TUNE_ATTR(ilimit_tries, 0);
431TUNE_ATTR(ilimit_min, 0);
432TUNE_ATTR(demote_secs, 0);
433TUNE_ATTR(incore_log_blocks, 0);
434TUNE_ATTR(log_flush_secs, 0);
435TUNE_ATTR(jindex_refresh_secs, 0);
436TUNE_ATTR(quota_warn_period, 0);
437TUNE_ATTR(quota_quantum, 0);
438TUNE_ATTR(atime_quantum, 0);
439TUNE_ATTR(max_readahead, 0);
440TUNE_ATTR(complain_secs, 0);
441TUNE_ATTR(reclaim_limit, 0);
442TUNE_ATTR(prefetch_secs, 0);
443TUNE_ATTR(statfs_slow, 0);
444TUNE_ATTR(new_files_jdata, 0);
445TUNE_ATTR(new_files_directio, 0);
446TUNE_ATTR(quota_simul_sync, 1);
447TUNE_ATTR(quota_cache_secs, 1);
448TUNE_ATTR(max_atomic_write, 1);
449TUNE_ATTR(stall_secs, 1);
450TUNE_ATTR(entries_per_readdir, 1);
451TUNE_ATTR(greedy_default, 1);
452TUNE_ATTR(greedy_quantum, 1);
453TUNE_ATTR(greedy_max, 1);
454TUNE_ATTR(statfs_quantum, 1);
455TUNE_ATTR_DAEMON(scand_secs, scand_process);
456TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
457TUNE_ATTR_DAEMON(logd_secs, logd_process);
458TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
459TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
460
461static struct attribute *tune_attrs[] = {
462 &tune_attr_ilimit.attr,
463 &tune_attr_ilimit_tries.attr,
464 &tune_attr_ilimit_min.attr,
465 &tune_attr_demote_secs.attr,
466 &tune_attr_incore_log_blocks.attr,
467 &tune_attr_log_flush_secs.attr,
468 &tune_attr_jindex_refresh_secs.attr,
469 &tune_attr_quota_warn_period.attr,
470 &tune_attr_quota_quantum.attr,
471 &tune_attr_atime_quantum.attr,
472 &tune_attr_max_readahead.attr,
473 &tune_attr_complain_secs.attr,
474 &tune_attr_reclaim_limit.attr,
475 &tune_attr_prefetch_secs.attr,
476 &tune_attr_statfs_slow.attr,
477 &tune_attr_quota_simul_sync.attr,
478 &tune_attr_quota_cache_secs.attr,
479 &tune_attr_max_atomic_write.attr,
480 &tune_attr_stall_secs.attr,
481 &tune_attr_entries_per_readdir.attr,
482 &tune_attr_greedy_default.attr,
483 &tune_attr_greedy_quantum.attr,
484 &tune_attr_greedy_max.attr,
485 &tune_attr_statfs_quantum.attr,
486 &tune_attr_scand_secs.attr,
487 &tune_attr_recoverd_secs.attr,
488 &tune_attr_logd_secs.attr,
489 &tune_attr_quotad_secs.attr,
490 &tune_attr_quota_scale.attr,
491 &tune_attr_new_files_jdata.attr,
492 &tune_attr_new_files_directio.attr,
493 NULL,
494};
495
496static struct attribute_group lockstruct_group = {
497 .name = "lockstruct",
498 .attrs = lockstruct_attrs,
499};
500
501static struct attribute_group counters_group = {
502 .name = "counters",
503 .attrs = counters_attrs,
504};
505
506static struct attribute_group args_group = {
507 .name = "args",
508 .attrs = args_attrs,
509};
510
511static struct attribute_group tune_group = {
512 .name = "tune",
513 .attrs = tune_attrs,
514};
515
516int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
517{
518 int error;
519
520 sdp->sd_kobj.kset = &gfs2_kset;
521 sdp->sd_kobj.ktype = &gfs2_ktype;
522
523 error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name);
524 if (error)
525 goto fail;
526
527 error = kobject_register(&sdp->sd_kobj);
528 if (error)
529 goto fail;
530
531 error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
532 if (error)
533 goto fail_reg;
534
535 error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
536 if (error)
537 goto fail_lockstruct;
538
539 error = sysfs_create_group(&sdp->sd_kobj, &args_group);
540 if (error)
541 goto fail_counters;
542
543 error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
544 if (error)
545 goto fail_args;
546
547 return 0;
548
549fail_args:
550 sysfs_remove_group(&sdp->sd_kobj, &args_group);
551fail_counters:
552 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
553fail_lockstruct:
554 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
555fail_reg:
556 kobject_unregister(&sdp->sd_kobj);
557fail:
558 fs_err(sdp, "error %d adding sysfs files", error);
559 return error;
560}
561
562void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
563{
564 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
565 sysfs_remove_group(&sdp->sd_kobj, &args_group);
566 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
567 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
568 kobject_unregister(&sdp->sd_kobj);
569}
570
571int gfs2_sys_init(void)
572{
573 gfs2_sys_margs = NULL;
574 spin_lock_init(&gfs2_sys_margs_lock);
575 return kset_register(&gfs2_kset);
576}
577
578void gfs2_sys_uninit(void)
579{
580 kfree(gfs2_sys_margs);
581 kset_unregister(&gfs2_kset);
582}
583
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
new file mode 100644
index 000000000000..1ca8cdac5304
--- /dev/null
+++ b/fs/gfs2/sys.h
@@ -0,0 +1,27 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __SYS_DOT_H__
11#define __SYS_DOT_H__
12
13#include <linux/spinlock.h>
14struct gfs2_sbd;
15
16/* Allow args to be passed to GFS2 when using an initial ram disk */
17extern char *gfs2_sys_margs;
18extern spinlock_t gfs2_sys_margs_lock;
19
20int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
21void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
22
23int gfs2_sys_init(void);
24void gfs2_sys_uninit(void);
25
26#endif /* __SYS_DOT_H__ */
27
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
new file mode 100644
index 000000000000..f8dabf8446bb
--- /dev/null
+++ b/fs/gfs2/trans.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/kallsyms.h>
17#include <linux/lm_interface.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "glock.h"
22#include "log.h"
23#include "lops.h"
24#include "meta_io.h"
25#include "trans.h"
26#include "util.h"
27
28int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
29 unsigned int revokes)
30{
31 struct gfs2_trans *tr;
32 int error;
33
34 BUG_ON(current->journal_info);
35 BUG_ON(blocks == 0 && revokes == 0);
36
37 tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
38 if (!tr)
39 return -ENOMEM;
40
41 tr->tr_ip = (unsigned long)__builtin_return_address(0);
42 tr->tr_blocks = blocks;
43 tr->tr_revokes = revokes;
44 tr->tr_reserved = 1;
45 if (blocks)
46 tr->tr_reserved += 6 + blocks;
47 if (revokes)
48 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
49 sizeof(u64));
50 INIT_LIST_HEAD(&tr->tr_list_buf);
51
52 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
53
54 error = gfs2_glock_nq(&tr->tr_t_gh);
55 if (error)
56 goto fail_holder_uninit;
57
58 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
59 tr->tr_t_gh.gh_flags |= GL_NOCACHE;
60 error = -EROFS;
61 goto fail_gunlock;
62 }
63
64 error = gfs2_log_reserve(sdp, tr->tr_reserved);
65 if (error)
66 goto fail_gunlock;
67
68 current->journal_info = tr;
69
70 return 0;
71
72fail_gunlock:
73 gfs2_glock_dq(&tr->tr_t_gh);
74
75fail_holder_uninit:
76 gfs2_holder_uninit(&tr->tr_t_gh);
77 kfree(tr);
78
79 return error;
80}
81
82void gfs2_trans_end(struct gfs2_sbd *sdp)
83{
84 struct gfs2_trans *tr = current->journal_info;
85
86 BUG_ON(!tr);
87 current->journal_info = NULL;
88
89 if (!tr->tr_touched) {
90 gfs2_log_release(sdp, tr->tr_reserved);
91 gfs2_glock_dq(&tr->tr_t_gh);
92 gfs2_holder_uninit(&tr->tr_t_gh);
93 kfree(tr);
94 return;
95 }
96
97 if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) {
98 fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ",
99 tr->tr_num_buf, tr->tr_blocks);
100 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
101 }
102 if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) {
103 fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ",
104 tr->tr_num_revoke, tr->tr_revokes);
105 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
106 }
107
108 gfs2_log_commit(sdp, tr);
109 gfs2_glock_dq(&tr->tr_t_gh);
110 gfs2_holder_uninit(&tr->tr_t_gh);
111 kfree(tr);
112
113 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
114 gfs2_log_flush(sdp, NULL);
115}
116
117void gfs2_trans_add_gl(struct gfs2_glock *gl)
118{
119 lops_add(gl->gl_sbd, &gl->gl_le);
120}
121
122/**
123 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
124 * @gl: the glock the buffer belongs to
125 * @bh: The buffer to add
126 * @meta: True in the case of adding metadata
127 *
128 */
129
130void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
131{
132 struct gfs2_sbd *sdp = gl->gl_sbd;
133 struct gfs2_bufdata *bd;
134
135 bd = bh->b_private;
136 if (bd)
137 gfs2_assert(sdp, bd->bd_gl == gl);
138 else {
139 gfs2_attach_bufdata(gl, bh, meta);
140 bd = bh->b_private;
141 }
142 lops_add(sdp, &bd->bd_le);
143}
144
145void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno)
146{
147 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
148 GFP_NOFS | __GFP_NOFAIL);
149 lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
150 rv->rv_blkno = blkno;
151 lops_add(sdp, &rv->rv_le);
152}
153
154void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
155{
156 struct gfs2_revoke *rv;
157 int found = 0;
158
159 gfs2_log_lock(sdp);
160
161 list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
162 if (rv->rv_blkno == blkno) {
163 list_del(&rv->rv_le.le_list);
164 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
165 sdp->sd_log_num_revoke--;
166 found = 1;
167 break;
168 }
169 }
170
171 gfs2_log_unlock(sdp);
172
173 if (found) {
174 struct gfs2_trans *tr = current->journal_info;
175 kfree(rv);
176 tr->tr_num_revoke_rm++;
177 }
178}
179
180void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
181{
182 lops_add(rgd->rd_sbd, &rgd->rd_le);
183}
184
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
new file mode 100644
index 000000000000..23d4cbe1de5b
--- /dev/null
+++ b/fs/gfs2/trans.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __TRANS_DOT_H__
11#define __TRANS_DOT_H__
12
13#include <linux/buffer_head.h>
14struct gfs2_sbd;
15struct gfs2_rgrpd;
16struct gfs2_glock;
17
18#define RES_DINODE 1
19#define RES_INDIRECT 1
20#define RES_JDATA 1
21#define RES_DATA 1
22#define RES_LEAF 1
23#define RES_RG_BIT 2
24#define RES_EATTR 1
25#define RES_STATFS 1
26#define RES_QUOTA 2
27
28int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
29 unsigned int revokes);
30
31void gfs2_trans_end(struct gfs2_sbd *sdp);
32
33void gfs2_trans_add_gl(struct gfs2_glock *gl);
34void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno);
36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
38
39#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
new file mode 100644
index 000000000000..196c604faadc
--- /dev/null
+++ b/fs/gfs2/util.c
@@ -0,0 +1,245 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "glock.h"
23#include "lm.h"
24#include "util.h"
25
26kmem_cache_t *gfs2_glock_cachep __read_mostly;
27kmem_cache_t *gfs2_inode_cachep __read_mostly;
28kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
29
30void gfs2_assert_i(struct gfs2_sbd *sdp)
31{
32 printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n",
33 sdp->sd_fsname);
34}
35
36/**
37 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
38 * Returns: -1 if this call withdrew the machine,
39 * -2 if it was already withdrawn
40 */
41
42int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
43 const char *function, char *file, unsigned int line)
44{
45 int me;
46 me = gfs2_lm_withdraw(sdp,
47 "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
48 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
49 sdp->sd_fsname, assertion,
50 sdp->sd_fsname, function, file, line);
51 dump_stack();
52 return (me) ? -1 : -2;
53}
54
55/**
56 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
57 * Returns: -1 if we printed something
58 * -2 if we didn't
59 */
60
61int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
62 const char *function, char *file, unsigned int line)
63{
64 if (time_before(jiffies,
65 sdp->sd_last_warning +
66 gfs2_tune_get(sdp, gt_complain_secs) * HZ))
67 return -2;
68
69 printk(KERN_WARNING
70 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
71 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
72 sdp->sd_fsname, assertion,
73 sdp->sd_fsname, function, file, line);
74
75 if (sdp->sd_args.ar_debug)
76 BUG();
77 else
78 dump_stack();
79
80 sdp->sd_last_warning = jiffies;
81
82 return -1;
83}
84
85/**
86 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
87 * Returns: -1 if this call withdrew the machine,
88 * 0 if it was already withdrawn
89 */
90
91int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
92 char *file, unsigned int line)
93{
94 int rv;
95 rv = gfs2_lm_withdraw(sdp,
96 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
97 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
98 sdp->sd_fsname,
99 sdp->sd_fsname, function, file, line);
100 return rv;
101}
102
103/**
104 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
105 * Returns: -1 if this call withdrew the machine,
106 * 0 if it was already withdrawn
107 */
108
109int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
110 const char *function, char *file, unsigned int line)
111{
112 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
113 int rv;
114 rv = gfs2_lm_withdraw(sdp,
115 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
116 "GFS2: fsid=%s: inode = %llu %llu\n"
117 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
118 sdp->sd_fsname,
119 sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
120 (unsigned long long)ip->i_num.no_addr,
121 sdp->sd_fsname, function, file, line);
122 return rv;
123}
124
125/**
126 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
127 * Returns: -1 if this call withdrew the machine,
128 * 0 if it was already withdrawn
129 */
130
131int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
132 const char *function, char *file, unsigned int line)
133{
134 struct gfs2_sbd *sdp = rgd->rd_sbd;
135 int rv;
136 rv = gfs2_lm_withdraw(sdp,
137 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
138 "GFS2: fsid=%s: RG = %llu\n"
139 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
140 sdp->sd_fsname,
141 sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
142 sdp->sd_fsname, function, file, line);
143 return rv;
144}
145
146/**
147 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
148 * Returns: -1 if this call withdrew the machine,
149 * -2 if it was already withdrawn
150 */
151
152int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
153 const char *type, const char *function, char *file,
154 unsigned int line)
155{
156 int me;
157 me = gfs2_lm_withdraw(sdp,
158 "GFS2: fsid=%s: fatal: invalid metadata block\n"
159 "GFS2: fsid=%s: bh = %llu (%s)\n"
160 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
161 sdp->sd_fsname,
162 sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type,
163 sdp->sd_fsname, function, file, line);
164 return (me) ? -1 : -2;
165}
166
167/**
168 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
169 * Returns: -1 if this call withdrew the machine,
170 * -2 if it was already withdrawn
171 */
172
173int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
174 u16 type, u16 t, const char *function,
175 char *file, unsigned int line)
176{
177 int me;
178 me = gfs2_lm_withdraw(sdp,
179 "GFS2: fsid=%s: fatal: invalid metadata block\n"
180 "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n"
181 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
182 sdp->sd_fsname,
183 sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t,
184 sdp->sd_fsname, function, file, line);
185 return (me) ? -1 : -2;
186}
187
188/**
189 * gfs2_io_error_i - Flag an I/O error and withdraw
190 * Returns: -1 if this call withdrew the machine,
191 * 0 if it was already withdrawn
192 */
193
194int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
195 unsigned int line)
196{
197 int rv;
198 rv = gfs2_lm_withdraw(sdp,
199 "GFS2: fsid=%s: fatal: I/O error\n"
200 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
201 sdp->sd_fsname,
202 sdp->sd_fsname, function, file, line);
203 return rv;
204}
205
206/**
207 * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
208 * Returns: -1 if this call withdrew the machine,
209 * 0 if it was already withdrawn
210 */
211
212int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
213 const char *function, char *file, unsigned int line)
214{
215 int rv;
216 rv = gfs2_lm_withdraw(sdp,
217 "GFS2: fsid=%s: fatal: I/O error\n"
218 "GFS2: fsid=%s: block = %llu\n"
219 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
220 sdp->sd_fsname,
221 sdp->sd_fsname, (unsigned long long)bh->b_blocknr,
222 sdp->sd_fsname, function, file, line);
223 return rv;
224}
225
226void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
227 unsigned int bit, int new_value)
228{
229 unsigned int c, o, b = bit;
230 int old_value;
231
232 c = b / (8 * PAGE_SIZE);
233 b %= 8 * PAGE_SIZE;
234 o = b / 8;
235 b %= 8;
236
237 old_value = (bitmap[c][o] & (1 << b));
238 gfs2_assert_withdraw(sdp, !old_value != !new_value);
239
240 if (new_value)
241 bitmap[c][o] |= 1 << b;
242 else
243 bitmap[c][o] &= ~(1 << b);
244}
245
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
new file mode 100644
index 000000000000..76a50899fe9e
--- /dev/null
+++ b/fs/gfs2/util.h
@@ -0,0 +1,170 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__
12
13#include "incore.h"
14
15#define fs_printk(level, fs, fmt, arg...) \
16 printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg)
17
18#define fs_info(fs, fmt, arg...) \
19 fs_printk(KERN_INFO , fs , fmt , ## arg)
20
21#define fs_warn(fs, fmt, arg...) \
22 fs_printk(KERN_WARNING , fs , fmt , ## arg)
23
24#define fs_err(fs, fmt, arg...) \
25 fs_printk(KERN_ERR, fs , fmt , ## arg)
26
27
28void gfs2_assert_i(struct gfs2_sbd *sdp);
29
30#define gfs2_assert(sdp, assertion) \
31do { \
32 if (unlikely(!(assertion))) { \
33 gfs2_assert_i(sdp); \
34 BUG(); \
35 } \
36} while (0)
37
38
39int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
40 const char *function, char *file, unsigned int line);
41
42#define gfs2_assert_withdraw(sdp, assertion) \
43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
44 __FUNCTION__, __FILE__, __LINE__))
45
46
47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
48 const char *function, char *file, unsigned int line);
49
50#define gfs2_assert_warn(sdp, assertion) \
51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
52 __FUNCTION__, __FILE__, __LINE__))
53
54
55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
56 const char *function, char *file, unsigned int line);
57
58#define gfs2_consist(sdp) \
59gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
60
61
62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
63 const char *function, char *file, unsigned int line);
64
65#define gfs2_consist_inode(ip) \
66gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
67
68
69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
70 const char *function, char *file, unsigned int line);
71
72#define gfs2_consist_rgrpd(rgd) \
73gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
74
75
76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
77 const char *type, const char *function,
78 char *file, unsigned int line);
79
80static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
81 struct buffer_head *bh,
82 const char *function,
83 char *file, unsigned int line)
84{
85 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
86 u32 magic = mh->mh_magic;
87 magic = be32_to_cpu(magic);
88 if (unlikely(magic != GFS2_MAGIC))
89 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
90 file, line);
91 return 0;
92}
93
94#define gfs2_meta_check(sdp, bh) \
95gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
96
97
98int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
99 u16 type, u16 t,
100 const char *function,
101 char *file, unsigned int line);
102
103static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
104 struct buffer_head *bh,
105 u16 type,
106 const char *function,
107 char *file, unsigned int line)
108{
109 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
110 u32 magic = mh->mh_magic;
111 u16 t = be32_to_cpu(mh->mh_type);
112 magic = be32_to_cpu(magic);
113 if (unlikely(magic != GFS2_MAGIC))
114 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
115 file, line);
116 if (unlikely(t != type))
117 return gfs2_metatype_check_ii(sdp, bh, type, t, function,
118 file, line);
119 return 0;
120}
121
122#define gfs2_metatype_check(sdp, bh, type) \
123gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
124
125static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
126 u16 format)
127{
128 struct gfs2_meta_header *mh;
129 mh = (struct gfs2_meta_header *)bh->b_data;
130 mh->mh_type = cpu_to_be32(type);
131 mh->mh_format = cpu_to_be32(format);
132}
133
134
135int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
136 char *file, unsigned int line);
137
138#define gfs2_io_error(sdp) \
139gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
140
141
142int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
143 const char *function, char *file, unsigned int line);
144
145#define gfs2_io_error_bh(sdp, bh) \
146gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
147
148
149extern kmem_cache_t *gfs2_glock_cachep;
150extern kmem_cache_t *gfs2_inode_cachep;
151extern kmem_cache_t *gfs2_bufdata_cachep;
152
153static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
154 unsigned int *p)
155{
156 unsigned int x;
157 spin_lock(&gt->gt_spin);
158 x = *p;
159 spin_unlock(&gt->gt_spin);
160 return x;
161}
162
163#define gfs2_tune_get(sdp, field) \
164gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
165
166void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
167 unsigned int bit, int new_value);
168
169#endif /* __UTIL_DOT_H__ */
170