aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext2
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/ext2
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/ext2')
-rw-r--r--fs/ext2/CHANGES157
-rw-r--r--fs/ext2/Makefile12
-rw-r--r--fs/ext2/acl.c518
-rw-r--r--fs/ext2/acl.h82
-rw-r--r--fs/ext2/balloc.c699
-rw-r--r--fs/ext2/bitmap.c25
-rw-r--r--fs/ext2/dir.c673
-rw-r--r--fs/ext2/ext2.h160
-rw-r--r--fs/ext2/file.c68
-rw-r--r--fs/ext2/fsync.c51
-rw-r--r--fs/ext2/ialloc.c735
-rw-r--r--fs/ext2/inode.c1276
-rw-r--r--fs/ext2/ioctl.c81
-rw-r--r--fs/ext2/namei.c418
-rw-r--r--fs/ext2/super.c1161
-rw-r--r--fs/ext2/symlink.c52
-rw-r--r--fs/ext2/xattr.c1043
-rw-r--r--fs/ext2/xattr.h118
-rw-r--r--fs/ext2/xattr_security.c53
-rw-r--r--fs/ext2/xattr_trusted.c64
-rw-r--r--fs/ext2/xattr_user.c77
21 files changed, 7523 insertions, 0 deletions
diff --git a/fs/ext2/CHANGES b/fs/ext2/CHANGES
new file mode 100644
index 000000000000..aa5aaf0e5911
--- /dev/null
+++ b/fs/ext2/CHANGES
@@ -0,0 +1,157 @@
1Changes from version 0.5a to version 0.5b
2=========================================
3 - Now that we have sysctl(), the immutable flag cannot be changed when
4 the system is running at security level > 0.
5 - Some cleanups in the code.
6 - More consistency checks on directories.
7 - The ext2.diff patch from Tom May <ftom@netcom.com> has been
8 integrated. This patch replaces expensive "/" and "%" with
9 cheap ">>" and "&" where possible.
10
11Changes from version 0.5 to version 0.5a
12========================================
13 - Zero the partial block following the end of the file when a file
14 is truncated.
15 - Dates updated in the copyright.
16 - More checks when the filesystem is mounted: the count of blocks,
17 fragments, and inodes per group is checked against the block size.
18 - The buffers used by the error routines are now static variables, to
19 avoid using space on the kernel stack, as requested by Linus.
20 - Some cleanups in the error messages (some versions of syslog contain
21 a bug which truncates an error message if it contains '\n').
22 - Check that no data can be written to a file past the 2GB limit.
23 - The famous readdir() bug has been fixed by Stephen Tweedie.
24 - Added a revision level in the superblock.
25 - Full support for O_SYNC flag of the open system call.
26 - New mount options: `resuid=#uid' and `resgid=#gid'. `resuid' causes
27 ext2fs to consider user #uid like root for the reserved blocks.
28 `resgid' acts the same way with group #gid. New fields in the
29 superblock contain default values for resuid and resgid and can
30 be modified by tune2fs.
31 Idea comes from Rene Cougnenc <cougnenc@renux.frmug.fr.net>.
32 - New mount options: `bsddf' and `minixdf'. `bsddf' causes ext2fs
33 to remove the blocks used for FS structures from the total block
34 count in statfs. With `minixdf', ext2fs mimics Minix behavior
35 in statfs (i.e. it returns the total number of blocks on the
36 partition). This is intended to make bde happy :-)
37 - New file attributes:
38 - Immutable files cannot be modified. Data cannot be written to
39 these files. They cannot be removed, renamed and new links cannot
40 be created. Even root cannot modify the files. He has to remove
41 the immutable attribute first.
42 - Append-only files: can only be written in append-mode when writing.
43 They cannot be removed, renamed and new links cannot be created.
44 Note: files may only be added to an append-only directory.
45 - No-dump files: the attribute is not used by the kernel. My port
46 of dump uses it to avoid backing up files which are not important.
47 - New check in ext2_check_dir_entry: the inode number is checked.
48 - Support for big file systems: the copy of the FS descriptor is now
49 dynamically allocated (previous versions used a fixed size array).
50 This allows to mount 2GB+ FS.
51 - Reorganization of the ext2_inode structure to allow other operating
52 systems to create specific fields if they use ext2fs as their native
53 file system. Currently, ext2fs is only implemented in Linux but
54 will soon be part of Gnu Hurd and of Masix.
55
56Changes from version 0.4b to version 0.5
57========================================
58 - New superblock fields: s_lastcheck and s_checkinterval added
59 by Uwe Ohse <uwe@tirka.gun.de> to implement timedependent checks
60 of the file system
61 - Real random numbers for secure rm added by Pierre del Perugia
62 <delperug@gla.ecoledoc.ibp.fr>
63 - The mount warnings related to the state of a fs are not printed
64 if the fs is mounted read-only, idea by Nick Holloway
65 <alfie@dcs.warwick.ac.uk>
66
67Changes from version 0.4a to version 0.4b
68=========================================
69 - Copyrights changed to include the name of my laboratory.
70 - Clean up of balloc.c and ialloc.c.
71 - More consistency checks.
72 - Block preallocation added by Stephen Tweedie.
73 - Direct reads of directories disallowed.
74 - Readahead implemented in readdir by Stephen Tweedie.
75 - Bugs in block and inodes allocation fixed.
76 - Readahead implemented in ext2_find_entry by Chip Salzenberg.
77 - New mount options:
78 `check=none|normal|strict'
79 `debug'
80 `errors=continue|remount-ro|panic'
81 `grpid', `bsdgroups'
82 `nocheck'
83 `nogrpid', `sysvgroups'
84 - truncate() now tries to deallocate contiguous blocks in a single call
85 to ext2_free_blocks().
86 - lots of cosmetic changes.
87
88Changes from version 0.4 to version 0.4a
89========================================
90 - the `sync' option support is now complete. Version 0.4 was not
91 supporting it when truncating a file. I have tested the synchronous
92 writes and they work but they make the system very slow :-( I have
93 to work again on this to make it faster.
94 - when detecting an error on a mounted filesystem, version 0.4 used
95 to try to write a flag in the super block even if the filesystem had
96 been mounted read-only. This is fixed.
97 - the `sb=#' option now causes the kernel code to use the filesystem
98 descriptors located at block #+1. Version 0.4 used the superblock
99 backup located at block # but used the main copy of the descriptors.
100 - a new file attribute `S' is supported. This attribute causes
101 synchronous writes but is applied to a file not to the entire file
102 system (thanks to Michael Kraehe <kraehe@bakunin.north.de> for
103 suggesting it).
104 - the directory cache is inhibited by default. The cache management
105 code seems to be buggy and I have to look at it carefully before
106 using it again.
107 - deleting a file with the `s' attribute (secure deletion) causes its
108 blocks to be overwritten with random values not with zeros (thanks to
109 Michael A. Griffith <grif@cs.ucr.edu> for suggesting it).
110 - lots of cosmetic changes have been made.
111
112Changes from version 0.3 to version 0.4
113=======================================
114 - Three new mount options are supported: `check', `sync' and `sb=#'.
115 `check' tells the kernel code to make more consistency checks
116 when the file system is mounted. Currently, the kernel code checks
117 that the blocks and inodes bitmaps are consistent with the free
118 blocks and inodes counts. More checks will be added in future
119 releases.
120 `sync' tells the kernel code to use synchronous writes when updating
121 an inode, a bitmap, a directory entry or an indirect block. This
122 can make the file system much slower but can be a big win for files
123 recovery in case of a crash (and we can now say to the BSD folks
124 that Linux also supports synchronous updates :-).
125 `sb=#' tells the kernel code to use an alternate super block instead
126 of its master copy. `#' is the number of the block (counted in
127 1024 bytes blocks) which contains the alternate super block.
128 An ext2 file system typically contains backups of the super block
129 at blocks 8193, 16385, and so on.
130 - I have change the meaning of the valid flag used by e2fsck. it
131 now contains the state of the file system. If the kernel code
132 detects an inconsistency while the file system is mounted, it flags
133 it as erroneous and e2fsck will detect that on next run.
134 - The super block now contains a mount counter. This counter is
135 incremented each time the file system is mounted read/write. When
136 this counter becomes bigger than a maximal mount counts (also stored
137 in the super block), e2fsck checks the file system, even if it had
138 been unmounted cleanly, and resets this counter to 0.
139 - File attributes are now supported. One can associate a set of
140 attributes to a file. Three attributes are defined:
141 `c': the file is marked for automatic compression,
142 `s': the file is marked for secure deletion: when the file is
143 deleted, its blocks are zeroed and written back to the disk,
144 `u': the file is marked for undeletion: when the file is deleted,
145 its contents are saved to allow a future undeletion.
146 Currently, only the `s' attribute is implemented in the kernel
147 code. Support for the other attributes will be added in a future
148 release.
149 - a few bugs related to times updates have been fixed by Bruce
150 Evans and me.
151 - a bug related to the links count of deleted inodes has been fixed.
152 Previous versions used to keep the links count set to 1 when a file
153 was deleted. The new version now sets links_count to 0 when deleting
154 the last link.
155 - a race condition when deallocating an inode has been fixed by
156 Stephen Tweedie.
157
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
new file mode 100644
index 000000000000..ee240a14e70f
--- /dev/null
+++ b/fs/ext2/Makefile
@@ -0,0 +1,12 @@
1#
2# Makefile for the linux ext2-filesystem routines.
3#
4
5obj-$(CONFIG_EXT2_FS) += ext2.o
6
7ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o
9
10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
11ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
12ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
new file mode 100644
index 000000000000..8369ee8d28c4
--- /dev/null
+++ b/fs/ext2/acl.c
@@ -0,0 +1,518 @@
1/*
2 * linux/fs/ext2/acl.c
3 *
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 */
6
7#include <linux/init.h>
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include <linux/fs.h>
11#include "ext2.h"
12#include "xattr.h"
13#include "acl.h"
14
15/*
16 * Convert from filesystem to in-memory representation.
17 */
18static struct posix_acl *
19ext2_acl_from_disk(const void *value, size_t size)
20{
21 const char *end = (char *)value + size;
22 int n, count;
23 struct posix_acl *acl;
24
25 if (!value)
26 return NULL;
27 if (size < sizeof(ext2_acl_header))
28 return ERR_PTR(-EINVAL);
29 if (((ext2_acl_header *)value)->a_version !=
30 cpu_to_le32(EXT2_ACL_VERSION))
31 return ERR_PTR(-EINVAL);
32 value = (char *)value + sizeof(ext2_acl_header);
33 count = ext2_acl_count(size);
34 if (count < 0)
35 return ERR_PTR(-EINVAL);
36 if (count == 0)
37 return NULL;
38 acl = posix_acl_alloc(count, GFP_KERNEL);
39 if (!acl)
40 return ERR_PTR(-ENOMEM);
41 for (n=0; n < count; n++) {
42 ext2_acl_entry *entry =
43 (ext2_acl_entry *)value;
44 if ((char *)value + sizeof(ext2_acl_entry_short) > end)
45 goto fail;
46 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
47 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
48 switch(acl->a_entries[n].e_tag) {
49 case ACL_USER_OBJ:
50 case ACL_GROUP_OBJ:
51 case ACL_MASK:
52 case ACL_OTHER:
53 value = (char *)value +
54 sizeof(ext2_acl_entry_short);
55 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
56 break;
57
58 case ACL_USER:
59 case ACL_GROUP:
60 value = (char *)value + sizeof(ext2_acl_entry);
61 if ((char *)value > end)
62 goto fail;
63 acl->a_entries[n].e_id =
64 le32_to_cpu(entry->e_id);
65 break;
66
67 default:
68 goto fail;
69 }
70 }
71 if (value != end)
72 goto fail;
73 return acl;
74
75fail:
76 posix_acl_release(acl);
77 return ERR_PTR(-EINVAL);
78}
79
80/*
81 * Convert from in-memory to filesystem representation.
82 */
83static void *
84ext2_acl_to_disk(const struct posix_acl *acl, size_t *size)
85{
86 ext2_acl_header *ext_acl;
87 char *e;
88 size_t n;
89
90 *size = ext2_acl_size(acl->a_count);
91 ext_acl = (ext2_acl_header *)kmalloc(sizeof(ext2_acl_header) +
92 acl->a_count * sizeof(ext2_acl_entry), GFP_KERNEL);
93 if (!ext_acl)
94 return ERR_PTR(-ENOMEM);
95 ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION);
96 e = (char *)ext_acl + sizeof(ext2_acl_header);
97 for (n=0; n < acl->a_count; n++) {
98 ext2_acl_entry *entry = (ext2_acl_entry *)e;
99 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
100 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
101 switch(acl->a_entries[n].e_tag) {
102 case ACL_USER:
103 case ACL_GROUP:
104 entry->e_id =
105 cpu_to_le32(acl->a_entries[n].e_id);
106 e += sizeof(ext2_acl_entry);
107 break;
108
109 case ACL_USER_OBJ:
110 case ACL_GROUP_OBJ:
111 case ACL_MASK:
112 case ACL_OTHER:
113 e += sizeof(ext2_acl_entry_short);
114 break;
115
116 default:
117 goto fail;
118 }
119 }
120 return (char *)ext_acl;
121
122fail:
123 kfree(ext_acl);
124 return ERR_PTR(-EINVAL);
125}
126
127static inline struct posix_acl *
128ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
129{
130 struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
131
132 spin_lock(&inode->i_lock);
133 if (*i_acl != EXT2_ACL_NOT_CACHED)
134 acl = posix_acl_dup(*i_acl);
135 spin_unlock(&inode->i_lock);
136
137 return acl;
138}
139
140static inline void
141ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
142 struct posix_acl *acl)
143{
144 spin_lock(&inode->i_lock);
145 if (*i_acl != EXT2_ACL_NOT_CACHED)
146 posix_acl_release(*i_acl);
147 *i_acl = posix_acl_dup(acl);
148 spin_unlock(&inode->i_lock);
149}
150
151/*
152 * inode->i_sem: don't care
153 */
154static struct posix_acl *
155ext2_get_acl(struct inode *inode, int type)
156{
157 struct ext2_inode_info *ei = EXT2_I(inode);
158 int name_index;
159 char *value = NULL;
160 struct posix_acl *acl;
161 int retval;
162
163 if (!test_opt(inode->i_sb, POSIX_ACL))
164 return NULL;
165
166 switch(type) {
167 case ACL_TYPE_ACCESS:
168 acl = ext2_iget_acl(inode, &ei->i_acl);
169 if (acl != EXT2_ACL_NOT_CACHED)
170 return acl;
171 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
172 break;
173
174 case ACL_TYPE_DEFAULT:
175 acl = ext2_iget_acl(inode, &ei->i_default_acl);
176 if (acl != EXT2_ACL_NOT_CACHED)
177 return acl;
178 name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
179 break;
180
181 default:
182 return ERR_PTR(-EINVAL);
183 }
184 retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
185 if (retval > 0) {
186 value = kmalloc(retval, GFP_KERNEL);
187 if (!value)
188 return ERR_PTR(-ENOMEM);
189 retval = ext2_xattr_get(inode, name_index, "", value, retval);
190 }
191 if (retval > 0)
192 acl = ext2_acl_from_disk(value, retval);
193 else if (retval == -ENODATA || retval == -ENOSYS)
194 acl = NULL;
195 else
196 acl = ERR_PTR(retval);
197 if (value)
198 kfree(value);
199
200 if (!IS_ERR(acl)) {
201 switch(type) {
202 case ACL_TYPE_ACCESS:
203 ext2_iset_acl(inode, &ei->i_acl, acl);
204 break;
205
206 case ACL_TYPE_DEFAULT:
207 ext2_iset_acl(inode, &ei->i_default_acl, acl);
208 break;
209 }
210 }
211 return acl;
212}
213
214/*
215 * inode->i_sem: down
216 */
217static int
218ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219{
220 struct ext2_inode_info *ei = EXT2_I(inode);
221 int name_index;
222 void *value = NULL;
223 size_t size;
224 int error;
225
226 if (S_ISLNK(inode->i_mode))
227 return -EOPNOTSUPP;
228 if (!test_opt(inode->i_sb, POSIX_ACL))
229 return 0;
230
231 switch(type) {
232 case ACL_TYPE_ACCESS:
233 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
234 if (acl) {
235 mode_t mode = inode->i_mode;
236 error = posix_acl_equiv_mode(acl, &mode);
237 if (error < 0)
238 return error;
239 else {
240 inode->i_mode = mode;
241 mark_inode_dirty(inode);
242 if (error == 0)
243 acl = NULL;
244 }
245 }
246 break;
247
248 case ACL_TYPE_DEFAULT:
249 name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
250 if (!S_ISDIR(inode->i_mode))
251 return acl ? -EACCES : 0;
252 break;
253
254 default:
255 return -EINVAL;
256 }
257 if (acl) {
258 value = ext2_acl_to_disk(acl, &size);
259 if (IS_ERR(value))
260 return (int)PTR_ERR(value);
261 }
262
263 error = ext2_xattr_set(inode, name_index, "", value, size, 0);
264
265 if (value)
266 kfree(value);
267 if (!error) {
268 switch(type) {
269 case ACL_TYPE_ACCESS:
270 ext2_iset_acl(inode, &ei->i_acl, acl);
271 break;
272
273 case ACL_TYPE_DEFAULT:
274 ext2_iset_acl(inode, &ei->i_default_acl, acl);
275 break;
276 }
277 }
278 return error;
279}
280
281static int
282ext2_check_acl(struct inode *inode, int mask)
283{
284 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
285
286 if (acl) {
287 int error = posix_acl_permission(inode, acl, mask);
288 posix_acl_release(acl);
289 return error;
290 }
291
292 return -EAGAIN;
293}
294
295int
296ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
297{
298 return generic_permission(inode, mask, ext2_check_acl);
299}
300
301/*
302 * Initialize the ACLs of a new inode. Called from ext2_new_inode.
303 *
304 * dir->i_sem: down
305 * inode->i_sem: up (access to inode is still exclusive)
306 */
307int
308ext2_init_acl(struct inode *inode, struct inode *dir)
309{
310 struct posix_acl *acl = NULL;
311 int error = 0;
312
313 if (!S_ISLNK(inode->i_mode)) {
314 if (test_opt(dir->i_sb, POSIX_ACL)) {
315 acl = ext2_get_acl(dir, ACL_TYPE_DEFAULT);
316 if (IS_ERR(acl))
317 return PTR_ERR(acl);
318 }
319 if (!acl)
320 inode->i_mode &= ~current->fs->umask;
321 }
322 if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
323 struct posix_acl *clone;
324 mode_t mode;
325
326 if (S_ISDIR(inode->i_mode)) {
327 error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl);
328 if (error)
329 goto cleanup;
330 }
331 clone = posix_acl_clone(acl, GFP_KERNEL);
332 error = -ENOMEM;
333 if (!clone)
334 goto cleanup;
335 mode = inode->i_mode;
336 error = posix_acl_create_masq(clone, &mode);
337 if (error >= 0) {
338 inode->i_mode = mode;
339 if (error > 0) {
340 /* This is an extended ACL */
341 error = ext2_set_acl(inode,
342 ACL_TYPE_ACCESS, clone);
343 }
344 }
345 posix_acl_release(clone);
346 }
347cleanup:
348 posix_acl_release(acl);
349 return error;
350}
351
352/*
353 * Does chmod for an inode that may have an Access Control List. The
354 * inode->i_mode field must be updated to the desired value by the caller
355 * before calling this function.
356 * Returns 0 on success, or a negative error number.
357 *
358 * We change the ACL rather than storing some ACL entries in the file
359 * mode permission bits (which would be more efficient), because that
360 * would break once additional permissions (like ACL_APPEND, ACL_DELETE
361 * for directories) are added. There are no more bits available in the
362 * file mode.
363 *
364 * inode->i_sem: down
365 */
366int
367ext2_acl_chmod(struct inode *inode)
368{
369 struct posix_acl *acl, *clone;
370 int error;
371
372 if (!test_opt(inode->i_sb, POSIX_ACL))
373 return 0;
374 if (S_ISLNK(inode->i_mode))
375 return -EOPNOTSUPP;
376 acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
377 if (IS_ERR(acl) || !acl)
378 return PTR_ERR(acl);
379 clone = posix_acl_clone(acl, GFP_KERNEL);
380 posix_acl_release(acl);
381 if (!clone)
382 return -ENOMEM;
383 error = posix_acl_chmod_masq(clone, inode->i_mode);
384 if (!error)
385 error = ext2_set_acl(inode, ACL_TYPE_ACCESS, clone);
386 posix_acl_release(clone);
387 return error;
388}
389
390/*
391 * Extended attribut handlers
392 */
393static size_t
394ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
395 const char *name, size_t name_len)
396{
397 const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
398
399 if (!test_opt(inode->i_sb, POSIX_ACL))
400 return 0;
401 if (list && size <= list_size)
402 memcpy(list, XATTR_NAME_ACL_ACCESS, size);
403 return size;
404}
405
406static size_t
407ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
408 const char *name, size_t name_len)
409{
410 const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
411
412 if (!test_opt(inode->i_sb, POSIX_ACL))
413 return 0;
414 if (list && size <= list_size)
415 memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
416 return size;
417}
418
419static int
420ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
421{
422 struct posix_acl *acl;
423 int error;
424
425 if (!test_opt(inode->i_sb, POSIX_ACL))
426 return -EOPNOTSUPP;
427
428 acl = ext2_get_acl(inode, type);
429 if (IS_ERR(acl))
430 return PTR_ERR(acl);
431 if (acl == NULL)
432 return -ENODATA;
433 error = posix_acl_to_xattr(acl, buffer, size);
434 posix_acl_release(acl);
435
436 return error;
437}
438
439static int
440ext2_xattr_get_acl_access(struct inode *inode, const char *name,
441 void *buffer, size_t size)
442{
443 if (strcmp(name, "") != 0)
444 return -EINVAL;
445 return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
446}
447
448static int
449ext2_xattr_get_acl_default(struct inode *inode, const char *name,
450 void *buffer, size_t size)
451{
452 if (strcmp(name, "") != 0)
453 return -EINVAL;
454 return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
455}
456
457static int
458ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
459 size_t size)
460{
461 struct posix_acl *acl;
462 int error;
463
464 if (!test_opt(inode->i_sb, POSIX_ACL))
465 return -EOPNOTSUPP;
466 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
467 return -EPERM;
468
469 if (value) {
470 acl = posix_acl_from_xattr(value, size);
471 if (IS_ERR(acl))
472 return PTR_ERR(acl);
473 else if (acl) {
474 error = posix_acl_valid(acl);
475 if (error)
476 goto release_and_out;
477 }
478 } else
479 acl = NULL;
480
481 error = ext2_set_acl(inode, type, acl);
482
483release_and_out:
484 posix_acl_release(acl);
485 return error;
486}
487
488static int
489ext2_xattr_set_acl_access(struct inode *inode, const char *name,
490 const void *value, size_t size, int flags)
491{
492 if (strcmp(name, "") != 0)
493 return -EINVAL;
494 return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
495}
496
497static int
498ext2_xattr_set_acl_default(struct inode *inode, const char *name,
499 const void *value, size_t size, int flags)
500{
501 if (strcmp(name, "") != 0)
502 return -EINVAL;
503 return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
504}
505
506struct xattr_handler ext2_xattr_acl_access_handler = {
507 .prefix = XATTR_NAME_ACL_ACCESS,
508 .list = ext2_xattr_list_acl_access,
509 .get = ext2_xattr_get_acl_access,
510 .set = ext2_xattr_set_acl_access,
511};
512
513struct xattr_handler ext2_xattr_acl_default_handler = {
514 .prefix = XATTR_NAME_ACL_DEFAULT,
515 .list = ext2_xattr_list_acl_default,
516 .get = ext2_xattr_get_acl_default,
517 .set = ext2_xattr_set_acl_default,
518};
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
new file mode 100644
index 000000000000..fed96ae81a7d
--- /dev/null
+++ b/fs/ext2/acl.h
@@ -0,0 +1,82 @@
1/*
2 File: fs/ext2/acl.h
3
4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
5*/
6
7#include <linux/xattr_acl.h>
8
9#define EXT2_ACL_VERSION 0x0001
10
11typedef struct {
12 __le16 e_tag;
13 __le16 e_perm;
14 __le32 e_id;
15} ext2_acl_entry;
16
17typedef struct {
18 __le16 e_tag;
19 __le16 e_perm;
20} ext2_acl_entry_short;
21
22typedef struct {
23 __le32 a_version;
24} ext2_acl_header;
25
26static inline size_t ext2_acl_size(int count)
27{
28 if (count <= 4) {
29 return sizeof(ext2_acl_header) +
30 count * sizeof(ext2_acl_entry_short);
31 } else {
32 return sizeof(ext2_acl_header) +
33 4 * sizeof(ext2_acl_entry_short) +
34 (count - 4) * sizeof(ext2_acl_entry);
35 }
36}
37
38static inline int ext2_acl_count(size_t size)
39{
40 ssize_t s;
41 size -= sizeof(ext2_acl_header);
42 s = size - 4 * sizeof(ext2_acl_entry_short);
43 if (s < 0) {
44 if (size % sizeof(ext2_acl_entry_short))
45 return -1;
46 return size / sizeof(ext2_acl_entry_short);
47 } else {
48 if (s % sizeof(ext2_acl_entry))
49 return -1;
50 return s / sizeof(ext2_acl_entry) + 4;
51 }
52}
53
54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55
56/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT2_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */
61extern int ext2_permission (struct inode *, int, struct nameidata *);
62extern int ext2_acl_chmod (struct inode *);
63extern int ext2_init_acl (struct inode *, struct inode *);
64
65#else
66#include <linux/sched.h>
67#define ext2_permission NULL
68#define ext2_get_acl NULL
69#define ext2_set_acl NULL
70
71static inline int
72ext2_acl_chmod (struct inode *inode)
73{
74 return 0;
75}
76
77static inline int ext2_init_acl (struct inode *inode, struct inode *dir)
78{
79 return 0;
80}
81#endif
82
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
new file mode 100644
index 000000000000..6591abef64d0
--- /dev/null
+++ b/fs/ext2/balloc.c
@@ -0,0 +1,699 @@
1/*
2 * linux/fs/ext2/balloc.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 * Big-endian to little-endian byte-swapping/bitmaps by
11 * David S. Miller (davem@caip.rutgers.edu), 1995
12 */
13
14#include <linux/config.h>
15#include "ext2.h"
16#include <linux/quotaops.h>
17#include <linux/sched.h>
18#include <linux/buffer_head.h>
19
20/*
21 * balloc.c contains the blocks allocation and deallocation routines
22 */
23
24/*
25 * The free blocks are managed by bitmaps. A file system contains several
26 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
27 * block for inodes, N blocks for the inode table and data blocks.
28 *
29 * The file system contains group descriptors which are located after the
30 * super block. Each descriptor contains the number of the bitmap block and
31 * the free blocks count in the block. The descriptors are loaded in memory
32 * when a file system is mounted (see ext2_read_super).
33 */
34
35
36#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
37
38struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
39 unsigned int block_group,
40 struct buffer_head ** bh)
41{
42 unsigned long group_desc;
43 unsigned long offset;
44 struct ext2_group_desc * desc;
45 struct ext2_sb_info *sbi = EXT2_SB(sb);
46
47 if (block_group >= sbi->s_groups_count) {
48 ext2_error (sb, "ext2_get_group_desc",
49 "block_group >= groups_count - "
50 "block_group = %d, groups_count = %lu",
51 block_group, sbi->s_groups_count);
52
53 return NULL;
54 }
55
56 group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
57 offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
58 if (!sbi->s_group_desc[group_desc]) {
59 ext2_error (sb, "ext2_get_group_desc",
60 "Group descriptor not loaded - "
61 "block_group = %d, group_desc = %lu, desc = %lu",
62 block_group, group_desc, offset);
63 return NULL;
64 }
65
66 desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
67 if (bh)
68 *bh = sbi->s_group_desc[group_desc];
69 return desc + offset;
70}
71
72/*
73 * Read the bitmap for a given block_group, reading into the specified
74 * slot in the superblock's bitmap cache.
75 *
76 * Return buffer_head on success or NULL in case of failure.
77 */
78static struct buffer_head *
79read_block_bitmap(struct super_block *sb, unsigned int block_group)
80{
81 struct ext2_group_desc * desc;
82 struct buffer_head * bh = NULL;
83
84 desc = ext2_get_group_desc (sb, block_group, NULL);
85 if (!desc)
86 goto error_out;
87 bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
88 if (!bh)
89 ext2_error (sb, "read_block_bitmap",
90 "Cannot read block bitmap - "
91 "block_group = %d, block_bitmap = %u",
92 block_group, le32_to_cpu(desc->bg_block_bitmap));
93error_out:
94 return bh;
95}
96
97/*
98 * Set sb->s_dirt here because the superblock was "logically" altered. We
99 * need to recalculate its free blocks count and flush it out.
100 */
101static int reserve_blocks(struct super_block *sb, int count)
102{
103 struct ext2_sb_info *sbi = EXT2_SB(sb);
104 struct ext2_super_block *es = sbi->s_es;
105 unsigned free_blocks;
106 unsigned root_blocks;
107
108 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
109 root_blocks = le32_to_cpu(es->s_r_blocks_count);
110
111 if (free_blocks < count)
112 count = free_blocks;
113
114 if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
115 sbi->s_resuid != current->fsuid &&
116 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
117 /*
118 * We are too close to reserve and we are not privileged.
119 * Can we allocate anything at all?
120 */
121 if (free_blocks > root_blocks)
122 count = free_blocks - root_blocks;
123 else
124 return 0;
125 }
126
127 percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
128 sb->s_dirt = 1;
129 return count;
130}
131
132static void release_blocks(struct super_block *sb, int count)
133{
134 if (count) {
135 struct ext2_sb_info *sbi = EXT2_SB(sb);
136
137 percpu_counter_mod(&sbi->s_freeblocks_counter, count);
138 sb->s_dirt = 1;
139 }
140}
141
142static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
143 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
144{
145 unsigned free_blocks;
146
147 if (!desc->bg_free_blocks_count)
148 return 0;
149
150 spin_lock(sb_bgl_lock(sbi, group_no));
151 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
152 if (free_blocks < count)
153 count = free_blocks;
154 desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
155 spin_unlock(sb_bgl_lock(sbi, group_no));
156 mark_buffer_dirty(bh);
157 return count;
158}
159
160static void group_release_blocks(struct super_block *sb, int group_no,
161 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
162{
163 if (count) {
164 struct ext2_sb_info *sbi = EXT2_SB(sb);
165 unsigned free_blocks;
166
167 spin_lock(sb_bgl_lock(sbi, group_no));
168 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
169 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
170 spin_unlock(sb_bgl_lock(sbi, group_no));
171 sb->s_dirt = 1;
172 mark_buffer_dirty(bh);
173 }
174}
175
176/* Free given blocks, update quota and i_blocks field */
177void ext2_free_blocks (struct inode * inode, unsigned long block,
178 unsigned long count)
179{
180 struct buffer_head *bitmap_bh = NULL;
181 struct buffer_head * bh2;
182 unsigned long block_group;
183 unsigned long bit;
184 unsigned long i;
185 unsigned long overflow;
186 struct super_block * sb = inode->i_sb;
187 struct ext2_sb_info * sbi = EXT2_SB(sb);
188 struct ext2_group_desc * desc;
189 struct ext2_super_block * es = sbi->s_es;
190 unsigned freed = 0, group_freed;
191
192 if (block < le32_to_cpu(es->s_first_data_block) ||
193 block + count < block ||
194 block + count > le32_to_cpu(es->s_blocks_count)) {
195 ext2_error (sb, "ext2_free_blocks",
196 "Freeing blocks not in datazone - "
197 "block = %lu, count = %lu", block, count);
198 goto error_return;
199 }
200
201 ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
202
203do_more:
204 overflow = 0;
205 block_group = (block - le32_to_cpu(es->s_first_data_block)) /
206 EXT2_BLOCKS_PER_GROUP(sb);
207 bit = (block - le32_to_cpu(es->s_first_data_block)) %
208 EXT2_BLOCKS_PER_GROUP(sb);
209 /*
210 * Check to see if we are freeing blocks across a group
211 * boundary.
212 */
213 if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
214 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
215 count -= overflow;
216 }
217 brelse(bitmap_bh);
218 bitmap_bh = read_block_bitmap(sb, block_group);
219 if (!bitmap_bh)
220 goto error_return;
221
222 desc = ext2_get_group_desc (sb, block_group, &bh2);
223 if (!desc)
224 goto error_return;
225
226 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
227 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
228 in_range (block, le32_to_cpu(desc->bg_inode_table),
229 sbi->s_itb_per_group) ||
230 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
231 sbi->s_itb_per_group))
232 ext2_error (sb, "ext2_free_blocks",
233 "Freeing blocks in system zones - "
234 "Block = %lu, count = %lu",
235 block, count);
236
237 for (i = 0, group_freed = 0; i < count; i++) {
238 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
239 bit + i, bitmap_bh->b_data)) {
240 ext2_error(sb, __FUNCTION__,
241 "bit already cleared for block %lu", block + i);
242 } else {
243 group_freed++;
244 }
245 }
246
247 mark_buffer_dirty(bitmap_bh);
248 if (sb->s_flags & MS_SYNCHRONOUS)
249 sync_dirty_buffer(bitmap_bh);
250
251 group_release_blocks(sb, block_group, desc, bh2, group_freed);
252 freed += group_freed;
253
254 if (overflow) {
255 block += count;
256 count = overflow;
257 goto do_more;
258 }
259error_return:
260 brelse(bitmap_bh);
261 release_blocks(sb, freed);
262 DQUOT_FREE_BLOCK(inode, freed);
263}
264
265static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
266{
267 int k;
268 char *p, *r;
269
270 if (!ext2_test_bit(goal, map))
271 goto got_it;
272
273repeat:
274 if (goal) {
275 /*
276 * The goal was occupied; search forward for a free
277 * block within the next XX blocks.
278 *
279 * end_goal is more or less random, but it has to be
280 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
281 * next 64-bit boundary is simple..
282 */
283 k = (goal + 63) & ~63;
284 goal = ext2_find_next_zero_bit(map, k, goal);
285 if (goal < k)
286 goto got_it;
287 /*
288 * Search in the remainder of the current group.
289 */
290 }
291
292 p = map + (goal >> 3);
293 r = memscan(p, 0, (size - goal + 7) >> 3);
294 k = (r - map) << 3;
295 if (k < size) {
296 /*
297 * We have succeeded in finding a free byte in the block
298 * bitmap. Now search backwards to find the start of this
299 * group of free blocks - won't take more than 7 iterations.
300 */
301 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
302 ;
303 goto got_it;
304 }
305
306 k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
307 if (k < size) {
308 goal = k;
309 goto got_it;
310 }
311 return -1;
312got_it:
313 if (ext2_set_bit_atomic(lock, goal, (void *) map))
314 goto repeat;
315 return goal;
316}
317
318/*
319 * ext2_new_block uses a goal block to assist allocation. If the goal is
320 * free, or there is a free block within 32 blocks of the goal, that block
321 * is allocated. Otherwise a forward search is made for a free block; within
322 * each block group the search first looks for an entire free byte in the block
323 * bitmap, and then for any free bit if that fails.
324 * This function also updates quota and i_blocks field.
325 */
326int ext2_new_block(struct inode *inode, unsigned long goal,
327 u32 *prealloc_count, u32 *prealloc_block, int *err)
328{
329 struct buffer_head *bitmap_bh = NULL;
330 struct buffer_head *gdp_bh; /* bh2 */
331 struct ext2_group_desc *desc;
332 int group_no; /* i */
333 int ret_block; /* j */
334 int group_idx; /* k */
335 int target_block; /* tmp */
336 int block = 0;
337 struct super_block *sb = inode->i_sb;
338 struct ext2_sb_info *sbi = EXT2_SB(sb);
339 struct ext2_super_block *es = sbi->s_es;
340 unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
341 unsigned prealloc_goal = es->s_prealloc_blocks;
342 unsigned group_alloc = 0, es_alloc, dq_alloc;
343 int nr_scanned_groups;
344
345 if (!prealloc_goal--)
346 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
347 if (!prealloc_count || *prealloc_count)
348 prealloc_goal = 0;
349
350 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
351 *err = -EDQUOT;
352 goto out;
353 }
354
355 while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
356 prealloc_goal--;
357
358 dq_alloc = prealloc_goal + 1;
359 es_alloc = reserve_blocks(sb, dq_alloc);
360 if (!es_alloc) {
361 *err = -ENOSPC;
362 goto out_dquot;
363 }
364
365 ext2_debug ("goal=%lu.\n", goal);
366
367 if (goal < le32_to_cpu(es->s_first_data_block) ||
368 goal >= le32_to_cpu(es->s_blocks_count))
369 goal = le32_to_cpu(es->s_first_data_block);
370 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
371 desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
372 if (!desc) {
373 /*
374 * gdp_bh may still be uninitialised. But group_release_blocks
375 * will not touch it because group_alloc is zero.
376 */
377 goto io_error;
378 }
379
380 group_alloc = group_reserve_blocks(sbi, group_no, desc,
381 gdp_bh, es_alloc);
382 if (group_alloc) {
383 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
384 group_size);
385 brelse(bitmap_bh);
386 bitmap_bh = read_block_bitmap(sb, group_no);
387 if (!bitmap_bh)
388 goto io_error;
389
390 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
391
392 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
393 bitmap_bh->b_data, group_size, ret_block);
394 if (ret_block >= 0)
395 goto got_block;
396 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
397 group_alloc = 0;
398 }
399
400 ext2_debug ("Bit not found in block group %d.\n", group_no);
401
402 /*
403 * Now search the rest of the groups. We assume that
404 * i and desc correctly point to the last group visited.
405 */
406 nr_scanned_groups = 0;
407retry:
408 for (group_idx = 0; !group_alloc &&
409 group_idx < sbi->s_groups_count; group_idx++) {
410 group_no++;
411 if (group_no >= sbi->s_groups_count)
412 group_no = 0;
413 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
414 if (!desc)
415 goto io_error;
416 group_alloc = group_reserve_blocks(sbi, group_no, desc,
417 gdp_bh, es_alloc);
418 }
419 if (!group_alloc) {
420 *err = -ENOSPC;
421 goto out_release;
422 }
423 brelse(bitmap_bh);
424 bitmap_bh = read_block_bitmap(sb, group_no);
425 if (!bitmap_bh)
426 goto io_error;
427
428 ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
429 group_size, 0);
430 if (ret_block < 0) {
431 /*
432 * If a free block counter is corrupted we can loop inifintely.
433 * Detect that here.
434 */
435 nr_scanned_groups++;
436 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
437 ext2_error(sb, "ext2_new_block",
438 "corrupted free blocks counters");
439 goto io_error;
440 }
441 /*
442 * Someone else grabbed the last free block in this blockgroup
443 * before us. Retry the scan.
444 */
445 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
446 group_alloc = 0;
447 goto retry;
448 }
449
450got_block:
451 ext2_debug("using block group %d(%d)\n",
452 group_no, desc->bg_free_blocks_count);
453
454 target_block = ret_block + group_no * group_size +
455 le32_to_cpu(es->s_first_data_block);
456
457 if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
458 target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
459 in_range(target_block, le32_to_cpu(desc->bg_inode_table),
460 sbi->s_itb_per_group))
461 ext2_error (sb, "ext2_new_block",
462 "Allocating block in system zone - "
463 "block = %u", target_block);
464
465 if (target_block >= le32_to_cpu(es->s_blocks_count)) {
466 ext2_error (sb, "ext2_new_block",
467 "block(%d) >= blocks count(%d) - "
468 "block_group = %d, es == %p ", ret_block,
469 le32_to_cpu(es->s_blocks_count), group_no, es);
470 goto io_error;
471 }
472 block = target_block;
473
474 /* OK, we _had_ allocated something */
475 ext2_debug("found bit %d\n", ret_block);
476
477 dq_alloc--;
478 es_alloc--;
479 group_alloc--;
480
481 /*
482 * Do block preallocation now if required.
483 */
484 write_lock(&EXT2_I(inode)->i_meta_lock);
485 if (group_alloc && !*prealloc_count) {
486 unsigned n;
487
488 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
489 if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
490 ret_block,
491 (void*) bitmap_bh->b_data))
492 break;
493 }
494 *prealloc_block = block + 1;
495 *prealloc_count = n;
496 es_alloc -= n;
497 dq_alloc -= n;
498 group_alloc -= n;
499 }
500 write_unlock(&EXT2_I(inode)->i_meta_lock);
501
502 mark_buffer_dirty(bitmap_bh);
503 if (sb->s_flags & MS_SYNCHRONOUS)
504 sync_dirty_buffer(bitmap_bh);
505
506 ext2_debug ("allocating block %d. ", block);
507
508 *err = 0;
509out_release:
510 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
511 release_blocks(sb, es_alloc);
512out_dquot:
513 DQUOT_FREE_BLOCK(inode, dq_alloc);
514out:
515 brelse(bitmap_bh);
516 return block;
517
518io_error:
519 *err = -EIO;
520 goto out_release;
521}
522
523unsigned long ext2_count_free_blocks (struct super_block * sb)
524{
525 struct ext2_group_desc * desc;
526 unsigned long desc_count = 0;
527 int i;
528#ifdef EXT2FS_DEBUG
529 unsigned long bitmap_count, x;
530 struct ext2_super_block *es;
531
532 lock_super (sb);
533 es = EXT2_SB(sb)->s_es;
534 desc_count = 0;
535 bitmap_count = 0;
536 desc = NULL;
537 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
538 struct buffer_head *bitmap_bh;
539 desc = ext2_get_group_desc (sb, i, NULL);
540 if (!desc)
541 continue;
542 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
543 bitmap_bh = read_block_bitmap(sb, i);
544 if (!bitmap_bh)
545 continue;
546
547 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
548 printk ("group %d: stored = %d, counted = %lu\n",
549 i, le16_to_cpu(desc->bg_free_blocks_count), x);
550 bitmap_count += x;
551 brelse(bitmap_bh);
552 }
553 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
554 (long)le32_to_cpu(es->s_free_blocks_count),
555 desc_count, bitmap_count);
556 unlock_super (sb);
557 return bitmap_count;
558#else
559 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
560 desc = ext2_get_group_desc (sb, i, NULL);
561 if (!desc)
562 continue;
563 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
564 }
565 return desc_count;
566#endif
567}
568
569static inline int
570block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
571{
572 return ext2_test_bit ((block -
573 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
574 EXT2_BLOCKS_PER_GROUP(sb), map);
575}
576
577static inline int test_root(int a, int b)
578{
579 int num = b;
580
581 while (a > num)
582 num *= b;
583 return num == a;
584}
585
586static int ext2_group_sparse(int group)
587{
588 if (group <= 1)
589 return 1;
590 return (test_root(group, 3) || test_root(group, 5) ||
591 test_root(group, 7));
592}
593
594/**
595 * ext2_bg_has_super - number of blocks used by the superblock in group
596 * @sb: superblock for filesystem
597 * @group: group number to check
598 *
599 * Return the number of blocks used by the superblock (primary or backup)
600 * in this group. Currently this will be only 0 or 1.
601 */
602int ext2_bg_has_super(struct super_block *sb, int group)
603{
604 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
605 !ext2_group_sparse(group))
606 return 0;
607 return 1;
608}
609
610/**
611 * ext2_bg_num_gdb - number of blocks used by the group table in group
612 * @sb: superblock for filesystem
613 * @group: group number to check
614 *
615 * Return the number of blocks used by the group descriptor table
616 * (primary or backup) in this group. In the future there may be a
617 * different number of descriptor blocks in each group.
618 */
619unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
620{
621 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
622 !ext2_group_sparse(group))
623 return 0;
624 return EXT2_SB(sb)->s_gdb_count;
625}
626
627#ifdef CONFIG_EXT2_CHECK
628/* Called at mount-time, super-block is locked */
629void ext2_check_blocks_bitmap (struct super_block * sb)
630{
631 struct buffer_head *bitmap_bh = NULL;
632 struct ext2_super_block * es;
633 unsigned long desc_count, bitmap_count, x, j;
634 unsigned long desc_blocks;
635 struct ext2_group_desc * desc;
636 int i;
637
638 es = EXT2_SB(sb)->s_es;
639 desc_count = 0;
640 bitmap_count = 0;
641 desc = NULL;
642 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
643 desc = ext2_get_group_desc (sb, i, NULL);
644 if (!desc)
645 continue;
646 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
647 brelse(bitmap_bh);
648 bitmap_bh = read_block_bitmap(sb, i);
649 if (!bitmap_bh)
650 continue;
651
652 if (ext2_bg_has_super(sb, i) &&
653 !ext2_test_bit(0, bitmap_bh->b_data))
654 ext2_error(sb, __FUNCTION__,
655 "Superblock in group %d is marked free", i);
656
657 desc_blocks = ext2_bg_num_gdb(sb, i);
658 for (j = 0; j < desc_blocks; j++)
659 if (!ext2_test_bit(j + 1, bitmap_bh->b_data))
660 ext2_error(sb, __FUNCTION__,
661 "Descriptor block #%ld in group "
662 "%d is marked free", j, i);
663
664 if (!block_in_use(le32_to_cpu(desc->bg_block_bitmap),
665 sb, bitmap_bh->b_data))
666 ext2_error(sb, "ext2_check_blocks_bitmap",
667 "Block bitmap for group %d is marked free",
668 i);
669
670 if (!block_in_use(le32_to_cpu(desc->bg_inode_bitmap),
671 sb, bitmap_bh->b_data))
672 ext2_error(sb, "ext2_check_blocks_bitmap",
673 "Inode bitmap for group %d is marked free",
674 i);
675
676 for (j = 0; j < EXT2_SB(sb)->s_itb_per_group; j++)
677 if (!block_in_use(le32_to_cpu(desc->bg_inode_table) + j,
678 sb, bitmap_bh->b_data))
679 ext2_error (sb, "ext2_check_blocks_bitmap",
680 "Block #%ld of the inode table in "
681 "group %d is marked free", j, i);
682
683 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
684 if (le16_to_cpu(desc->bg_free_blocks_count) != x)
685 ext2_error (sb, "ext2_check_blocks_bitmap",
686 "Wrong free blocks count for group %d, "
687 "stored = %d, counted = %lu", i,
688 le16_to_cpu(desc->bg_free_blocks_count), x);
689 bitmap_count += x;
690 }
691 if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
692 ext2_error (sb, "ext2_check_blocks_bitmap",
693 "Wrong free blocks count in super block, "
694 "stored = %lu, counted = %lu",
695 (unsigned long)le32_to_cpu(es->s_free_blocks_count),
696 bitmap_count);
697 brelse(bitmap_bh);
698}
699#endif
diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c
new file mode 100644
index 000000000000..20145b74623f
--- /dev/null
+++ b/fs/ext2/bitmap.c
@@ -0,0 +1,25 @@
1/*
2 * linux/fs/ext2/bitmap.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 */
9
10#include <linux/buffer_head.h>
11
12static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
13
14unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
15{
16 unsigned int i;
17 unsigned long sum = 0;
18
19 if (!map)
20 return (0);
21 for (i = 0; i < numchars; i++)
22 sum += nibblemap[map->b_data[i] & 0xf] +
23 nibblemap[(map->b_data[i] >> 4) & 0xf];
24 return (sum);
25}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
new file mode 100644
index 000000000000..5b5f52876b42
--- /dev/null
+++ b/fs/ext2/dir.c
@@ -0,0 +1,673 @@
1/*
2 * linux/fs/ext2/dir.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/fs/minix/dir.c
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 *
15 * ext2 directory handling functions
16 *
17 * Big-endian to little-endian byte-swapping/bitmaps by
18 * David S. Miller (davem@caip.rutgers.edu), 1995
19 *
20 * All code that works with directory layout had been switched to pagecache
21 * and moved here. AV
22 */
23
24#include "ext2.h"
25#include <linux/pagemap.h>
26#include <linux/smp_lock.h>
27
28typedef struct ext2_dir_entry_2 ext2_dirent;
29
30/*
31 * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
32 * more robust, but we have what we have
33 */
34static inline unsigned ext2_chunk_size(struct inode *inode)
35{
36 return inode->i_sb->s_blocksize;
37}
38
39static inline void ext2_put_page(struct page *page)
40{
41 kunmap(page);
42 page_cache_release(page);
43}
44
45static inline unsigned long dir_pages(struct inode *inode)
46{
47 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
48}
49
50/*
51 * Return the offset into page `page_nr' of the last valid
52 * byte in that page, plus one.
53 */
54static unsigned
55ext2_last_byte(struct inode *inode, unsigned long page_nr)
56{
57 unsigned last_byte = inode->i_size;
58
59 last_byte -= page_nr << PAGE_CACHE_SHIFT;
60 if (last_byte > PAGE_CACHE_SIZE)
61 last_byte = PAGE_CACHE_SIZE;
62 return last_byte;
63}
64
65static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to)
66{
67 struct inode *dir = page->mapping->host;
68 int err = 0;
69 dir->i_version++;
70 page->mapping->a_ops->commit_write(NULL, page, from, to);
71 if (IS_DIRSYNC(dir))
72 err = write_one_page(page, 1);
73 else
74 unlock_page(page);
75 return err;
76}
77
78static void ext2_check_page(struct page *page)
79{
80 struct inode *dir = page->mapping->host;
81 struct super_block *sb = dir->i_sb;
82 unsigned chunk_size = ext2_chunk_size(dir);
83 char *kaddr = page_address(page);
84 u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
85 unsigned offs, rec_len;
86 unsigned limit = PAGE_CACHE_SIZE;
87 ext2_dirent *p;
88 char *error;
89
90 if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
91 limit = dir->i_size & ~PAGE_CACHE_MASK;
92 if (limit & (chunk_size - 1))
93 goto Ebadsize;
94 if (!limit)
95 goto out;
96 }
97 for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
98 p = (ext2_dirent *)(kaddr + offs);
99 rec_len = le16_to_cpu(p->rec_len);
100
101 if (rec_len < EXT2_DIR_REC_LEN(1))
102 goto Eshort;
103 if (rec_len & 3)
104 goto Ealign;
105 if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
106 goto Enamelen;
107 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
108 goto Espan;
109 if (le32_to_cpu(p->inode) > max_inumber)
110 goto Einumber;
111 }
112 if (offs != limit)
113 goto Eend;
114out:
115 SetPageChecked(page);
116 return;
117
118 /* Too bad, we had an error */
119
120Ebadsize:
121 ext2_error(sb, "ext2_check_page",
122 "size of directory #%lu is not a multiple of chunk size",
123 dir->i_ino
124 );
125 goto fail;
126Eshort:
127 error = "rec_len is smaller than minimal";
128 goto bad_entry;
129Ealign:
130 error = "unaligned directory entry";
131 goto bad_entry;
132Enamelen:
133 error = "rec_len is too small for name_len";
134 goto bad_entry;
135Espan:
136 error = "directory entry across blocks";
137 goto bad_entry;
138Einumber:
139 error = "inode out of bounds";
140bad_entry:
141 ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - "
142 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
143 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
144 (unsigned long) le32_to_cpu(p->inode),
145 rec_len, p->name_len);
146 goto fail;
147Eend:
148 p = (ext2_dirent *)(kaddr + offs);
149 ext2_error (sb, "ext2_check_page",
150 "entry in directory #%lu spans the page boundary"
151 "offset=%lu, inode=%lu",
152 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
153 (unsigned long) le32_to_cpu(p->inode));
154fail:
155 SetPageChecked(page);
156 SetPageError(page);
157}
158
159static struct page * ext2_get_page(struct inode *dir, unsigned long n)
160{
161 struct address_space *mapping = dir->i_mapping;
162 struct page *page = read_cache_page(mapping, n,
163 (filler_t*)mapping->a_ops->readpage, NULL);
164 if (!IS_ERR(page)) {
165 wait_on_page_locked(page);
166 kmap(page);
167 if (!PageUptodate(page))
168 goto fail;
169 if (!PageChecked(page))
170 ext2_check_page(page);
171 if (PageError(page))
172 goto fail;
173 }
174 return page;
175
176fail:
177 ext2_put_page(page);
178 return ERR_PTR(-EIO);
179}
180
181/*
182 * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
183 *
184 * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller.
185 */
186static inline int ext2_match (int len, const char * const name,
187 struct ext2_dir_entry_2 * de)
188{
189 if (len != de->name_len)
190 return 0;
191 if (!de->inode)
192 return 0;
193 return !memcmp(name, de->name, len);
194}
195
196/*
197 * p is at least 6 bytes before the end of page
198 */
199static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
200{
201 return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
202}
203
204static inline unsigned
205ext2_validate_entry(char *base, unsigned offset, unsigned mask)
206{
207 ext2_dirent *de = (ext2_dirent*)(base + offset);
208 ext2_dirent *p = (ext2_dirent*)(base + (offset&mask));
209 while ((char*)p < (char*)de) {
210 if (p->rec_len == 0)
211 break;
212 p = ext2_next_entry(p);
213 }
214 return (char *)p - base;
215}
216
217static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
218 [EXT2_FT_UNKNOWN] = DT_UNKNOWN,
219 [EXT2_FT_REG_FILE] = DT_REG,
220 [EXT2_FT_DIR] = DT_DIR,
221 [EXT2_FT_CHRDEV] = DT_CHR,
222 [EXT2_FT_BLKDEV] = DT_BLK,
223 [EXT2_FT_FIFO] = DT_FIFO,
224 [EXT2_FT_SOCK] = DT_SOCK,
225 [EXT2_FT_SYMLINK] = DT_LNK,
226};
227
228#define S_SHIFT 12
229static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
230 [S_IFREG >> S_SHIFT] = EXT2_FT_REG_FILE,
231 [S_IFDIR >> S_SHIFT] = EXT2_FT_DIR,
232 [S_IFCHR >> S_SHIFT] = EXT2_FT_CHRDEV,
233 [S_IFBLK >> S_SHIFT] = EXT2_FT_BLKDEV,
234 [S_IFIFO >> S_SHIFT] = EXT2_FT_FIFO,
235 [S_IFSOCK >> S_SHIFT] = EXT2_FT_SOCK,
236 [S_IFLNK >> S_SHIFT] = EXT2_FT_SYMLINK,
237};
238
239static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
240{
241 mode_t mode = inode->i_mode;
242 if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
243 de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
244 else
245 de->file_type = 0;
246}
247
248static int
249ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
250{
251 loff_t pos = filp->f_pos;
252 struct inode *inode = filp->f_dentry->d_inode;
253 struct super_block *sb = inode->i_sb;
254 unsigned int offset = pos & ~PAGE_CACHE_MASK;
255 unsigned long n = pos >> PAGE_CACHE_SHIFT;
256 unsigned long npages = dir_pages(inode);
257 unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
258 unsigned char *types = NULL;
259 int need_revalidate = (filp->f_version != inode->i_version);
260 int ret;
261
262 if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
263 goto success;
264
265 if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
266 types = ext2_filetype_table;
267
268 for ( ; n < npages; n++, offset = 0) {
269 char *kaddr, *limit;
270 ext2_dirent *de;
271 struct page *page = ext2_get_page(inode, n);
272
273 if (IS_ERR(page)) {
274 ext2_error(sb, __FUNCTION__,
275 "bad page in #%lu",
276 inode->i_ino);
277 filp->f_pos += PAGE_CACHE_SIZE - offset;
278 ret = -EIO;
279 goto done;
280 }
281 kaddr = page_address(page);
282 if (need_revalidate) {
283 offset = ext2_validate_entry(kaddr, offset, chunk_mask);
284 need_revalidate = 0;
285 }
286 de = (ext2_dirent *)(kaddr+offset);
287 limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1);
288 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
289 if (de->rec_len == 0) {
290 ext2_error(sb, __FUNCTION__,
291 "zero-length directory entry");
292 ret = -EIO;
293 ext2_put_page(page);
294 goto done;
295 }
296 if (de->inode) {
297 int over;
298 unsigned char d_type = DT_UNKNOWN;
299
300 if (types && de->file_type < EXT2_FT_MAX)
301 d_type = types[de->file_type];
302
303 offset = (char *)de - kaddr;
304 over = filldir(dirent, de->name, de->name_len,
305 (n<<PAGE_CACHE_SHIFT) | offset,
306 le32_to_cpu(de->inode), d_type);
307 if (over) {
308 ext2_put_page(page);
309 goto success;
310 }
311 }
312 filp->f_pos += le16_to_cpu(de->rec_len);
313 }
314 ext2_put_page(page);
315 }
316
317success:
318 ret = 0;
319done:
320 filp->f_version = inode->i_version;
321 return ret;
322}
323
324/*
325 * ext2_find_entry()
326 *
327 * finds an entry in the specified directory with the wanted name. It
328 * returns the page in which the entry was found, and the entry itself
329 * (as a parameter - res_dir). Page is returned mapped and unlocked.
330 * Entry is guaranteed to be valid.
331 */
332struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
333 struct dentry *dentry, struct page ** res_page)
334{
335 const char *name = dentry->d_name.name;
336 int namelen = dentry->d_name.len;
337 unsigned reclen = EXT2_DIR_REC_LEN(namelen);
338 unsigned long start, n;
339 unsigned long npages = dir_pages(dir);
340 struct page *page = NULL;
341 struct ext2_inode_info *ei = EXT2_I(dir);
342 ext2_dirent * de;
343
344 if (npages == 0)
345 goto out;
346
347 /* OFFSET_CACHE */
348 *res_page = NULL;
349
350 start = ei->i_dir_start_lookup;
351 if (start >= npages)
352 start = 0;
353 n = start;
354 do {
355 char *kaddr;
356 page = ext2_get_page(dir, n);
357 if (!IS_ERR(page)) {
358 kaddr = page_address(page);
359 de = (ext2_dirent *) kaddr;
360 kaddr += ext2_last_byte(dir, n) - reclen;
361 while ((char *) de <= kaddr) {
362 if (de->rec_len == 0) {
363 ext2_error(dir->i_sb, __FUNCTION__,
364 "zero-length directory entry");
365 ext2_put_page(page);
366 goto out;
367 }
368 if (ext2_match (namelen, name, de))
369 goto found;
370 de = ext2_next_entry(de);
371 }
372 ext2_put_page(page);
373 }
374 if (++n >= npages)
375 n = 0;
376 } while (n != start);
377out:
378 return NULL;
379
380found:
381 *res_page = page;
382 ei->i_dir_start_lookup = n;
383 return de;
384}
385
386struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
387{
388 struct page *page = ext2_get_page(dir, 0);
389 ext2_dirent *de = NULL;
390
391 if (!IS_ERR(page)) {
392 de = ext2_next_entry((ext2_dirent *) page_address(page));
393 *p = page;
394 }
395 return de;
396}
397
398ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry)
399{
400 ino_t res = 0;
401 struct ext2_dir_entry_2 * de;
402 struct page *page;
403
404 de = ext2_find_entry (dir, dentry, &page);
405 if (de) {
406 res = le32_to_cpu(de->inode);
407 kunmap(page);
408 page_cache_release(page);
409 }
410 return res;
411}
412
413/* Releases the page */
414void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
415 struct page *page, struct inode *inode)
416{
417 unsigned from = (char *) de - (char *) page_address(page);
418 unsigned to = from + le16_to_cpu(de->rec_len);
419 int err;
420
421 lock_page(page);
422 err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
423 if (err)
424 BUG();
425 de->inode = cpu_to_le32(inode->i_ino);
426 ext2_set_de_type (de, inode);
427 err = ext2_commit_chunk(page, from, to);
428 ext2_put_page(page);
429 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
430 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
431 mark_inode_dirty(dir);
432}
433
434/*
435 * Parent is locked.
436 */
437int ext2_add_link (struct dentry *dentry, struct inode *inode)
438{
439 struct inode *dir = dentry->d_parent->d_inode;
440 const char *name = dentry->d_name.name;
441 int namelen = dentry->d_name.len;
442 unsigned chunk_size = ext2_chunk_size(dir);
443 unsigned reclen = EXT2_DIR_REC_LEN(namelen);
444 unsigned short rec_len, name_len;
445 struct page *page = NULL;
446 ext2_dirent * de;
447 unsigned long npages = dir_pages(dir);
448 unsigned long n;
449 char *kaddr;
450 unsigned from, to;
451 int err;
452
453 /*
454 * We take care of directory expansion in the same loop.
455 * This code plays outside i_size, so it locks the page
456 * to protect that region.
457 */
458 for (n = 0; n <= npages; n++) {
459 char *dir_end;
460
461 page = ext2_get_page(dir, n);
462 err = PTR_ERR(page);
463 if (IS_ERR(page))
464 goto out;
465 lock_page(page);
466 kaddr = page_address(page);
467 dir_end = kaddr + ext2_last_byte(dir, n);
468 de = (ext2_dirent *)kaddr;
469 kaddr += PAGE_CACHE_SIZE - reclen;
470 while ((char *)de <= kaddr) {
471 if ((char *)de == dir_end) {
472 /* We hit i_size */
473 name_len = 0;
474 rec_len = chunk_size;
475 de->rec_len = cpu_to_le16(chunk_size);
476 de->inode = 0;
477 goto got_it;
478 }
479 if (de->rec_len == 0) {
480 ext2_error(dir->i_sb, __FUNCTION__,
481 "zero-length directory entry");
482 err = -EIO;
483 goto out_unlock;
484 }
485 err = -EEXIST;
486 if (ext2_match (namelen, name, de))
487 goto out_unlock;
488 name_len = EXT2_DIR_REC_LEN(de->name_len);
489 rec_len = le16_to_cpu(de->rec_len);
490 if (!de->inode && rec_len >= reclen)
491 goto got_it;
492 if (rec_len >= name_len + reclen)
493 goto got_it;
494 de = (ext2_dirent *) ((char *) de + rec_len);
495 }
496 unlock_page(page);
497 ext2_put_page(page);
498 }
499 BUG();
500 return -EINVAL;
501
502got_it:
503 from = (char*)de - (char*)page_address(page);
504 to = from + rec_len;
505 err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
506 if (err)
507 goto out_unlock;
508 if (de->inode) {
509 ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
510 de1->rec_len = cpu_to_le16(rec_len - name_len);
511 de->rec_len = cpu_to_le16(name_len);
512 de = de1;
513 }
514 de->name_len = namelen;
515 memcpy (de->name, name, namelen);
516 de->inode = cpu_to_le32(inode->i_ino);
517 ext2_set_de_type (de, inode);
518 err = ext2_commit_chunk(page, from, to);
519 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
520 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
521 mark_inode_dirty(dir);
522 /* OFFSET_CACHE */
523out_put:
524 ext2_put_page(page);
525out:
526 return err;
527out_unlock:
528 unlock_page(page);
529 goto out_put;
530}
531
532/*
533 * ext2_delete_entry deletes a directory entry by merging it with the
534 * previous entry. Page is up-to-date. Releases the page.
535 */
536int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
537{
538 struct address_space *mapping = page->mapping;
539 struct inode *inode = mapping->host;
540 char *kaddr = page_address(page);
541 unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
542 unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
543 ext2_dirent * pde = NULL;
544 ext2_dirent * de = (ext2_dirent *) (kaddr + from);
545 int err;
546
547 while ((char*)de < (char*)dir) {
548 if (de->rec_len == 0) {
549 ext2_error(inode->i_sb, __FUNCTION__,
550 "zero-length directory entry");
551 err = -EIO;
552 goto out;
553 }
554 pde = de;
555 de = ext2_next_entry(de);
556 }
557 if (pde)
558 from = (char*)pde - (char*)page_address(page);
559 lock_page(page);
560 err = mapping->a_ops->prepare_write(NULL, page, from, to);
561 if (err)
562 BUG();
563 if (pde)
564 pde->rec_len = cpu_to_le16(to-from);
565 dir->inode = 0;
566 err = ext2_commit_chunk(page, from, to);
567 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
568 EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
569 mark_inode_dirty(inode);
570out:
571 ext2_put_page(page);
572 return err;
573}
574
575/*
576 * Set the first fragment of directory.
577 */
578int ext2_make_empty(struct inode *inode, struct inode *parent)
579{
580 struct address_space *mapping = inode->i_mapping;
581 struct page *page = grab_cache_page(mapping, 0);
582 unsigned chunk_size = ext2_chunk_size(inode);
583 struct ext2_dir_entry_2 * de;
584 int err;
585 void *kaddr;
586
587 if (!page)
588 return -ENOMEM;
589 err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
590 if (err) {
591 unlock_page(page);
592 goto fail;
593 }
594 kaddr = kmap_atomic(page, KM_USER0);
595 memset(kaddr, 0, chunk_size);
596 de = (struct ext2_dir_entry_2 *)kaddr;
597 de->name_len = 1;
598 de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
599 memcpy (de->name, ".\0\0", 4);
600 de->inode = cpu_to_le32(inode->i_ino);
601 ext2_set_de_type (de, inode);
602
603 de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1));
604 de->name_len = 2;
605 de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
606 de->inode = cpu_to_le32(parent->i_ino);
607 memcpy (de->name, "..\0", 4);
608 ext2_set_de_type (de, inode);
609 kunmap_atomic(kaddr, KM_USER0);
610 err = ext2_commit_chunk(page, 0, chunk_size);
611fail:
612 page_cache_release(page);
613 return err;
614}
615
616/*
617 * routine to check that the specified directory is empty (for rmdir)
618 */
619int ext2_empty_dir (struct inode * inode)
620{
621 struct page *page = NULL;
622 unsigned long i, npages = dir_pages(inode);
623
624 for (i = 0; i < npages; i++) {
625 char *kaddr;
626 ext2_dirent * de;
627 page = ext2_get_page(inode, i);
628
629 if (IS_ERR(page))
630 continue;
631
632 kaddr = page_address(page);
633 de = (ext2_dirent *)kaddr;
634 kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1);
635
636 while ((char *)de <= kaddr) {
637 if (de->rec_len == 0) {
638 ext2_error(inode->i_sb, __FUNCTION__,
639 "zero-length directory entry");
640 printk("kaddr=%p, de=%p\n", kaddr, de);
641 goto not_empty;
642 }
643 if (de->inode != 0) {
644 /* check for . and .. */
645 if (de->name[0] != '.')
646 goto not_empty;
647 if (de->name_len > 2)
648 goto not_empty;
649 if (de->name_len < 2) {
650 if (de->inode !=
651 cpu_to_le32(inode->i_ino))
652 goto not_empty;
653 } else if (de->name[1] != '.')
654 goto not_empty;
655 }
656 de = ext2_next_entry(de);
657 }
658 ext2_put_page(page);
659 }
660 return 1;
661
662not_empty:
663 ext2_put_page(page);
664 return 0;
665}
666
667struct file_operations ext2_dir_operations = {
668 .llseek = generic_file_llseek,
669 .read = generic_read_dir,
670 .readdir = ext2_readdir,
671 .ioctl = ext2_ioctl,
672 .fsync = ext2_sync_file,
673};
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
new file mode 100644
index 000000000000..9f1a40e7945c
--- /dev/null
+++ b/fs/ext2/ext2.h
@@ -0,0 +1,160 @@
1#include <linux/fs.h>
2#include <linux/ext2_fs.h>
3
4/*
5 * second extended file system inode data in memory
6 */
7struct ext2_inode_info {
8 __le32 i_data[15];
9 __u32 i_flags;
10 __u32 i_faddr;
11 __u8 i_frag_no;
12 __u8 i_frag_size;
13 __u16 i_state;
14 __u32 i_file_acl;
15 __u32 i_dir_acl;
16 __u32 i_dtime;
17
18 /*
19 * i_block_group is the number of the block group which contains
20 * this file's inode. Constant across the lifetime of the inode,
21 * it is ued for making block allocation decisions - we try to
22 * place a file's data blocks near its inode block, and new inodes
23 * near to their parent directory's inode.
24 */
25 __u32 i_block_group;
26
27 /*
28 * i_next_alloc_block is the logical (file-relative) number of the
29 * most-recently-allocated block in this file. Yes, it is misnamed.
30 * We use this for detecting linearly ascending allocation requests.
31 */
32 __u32 i_next_alloc_block;
33
34 /*
35 * i_next_alloc_goal is the *physical* companion to i_next_alloc_block.
36 * it the the physical block number of the block which was most-recently
37 * allocated to this file. This give us the goal (target) for the next
38 * allocation when we detect linearly ascending requests.
39 */
40 __u32 i_next_alloc_goal;
41 __u32 i_prealloc_block;
42 __u32 i_prealloc_count;
43 __u32 i_dir_start_lookup;
44#ifdef CONFIG_EXT2_FS_XATTR
45 /*
46 * Extended attributes can be read independently of the main file
47 * data. Taking i_sem even when reading would cause contention
48 * between readers of EAs and writers of regular file data, so
49 * instead we synchronize on xattr_sem when reading or changing
50 * EAs.
51 */
52 struct rw_semaphore xattr_sem;
53#endif
54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 struct posix_acl *i_acl;
56 struct posix_acl *i_default_acl;
57#endif
58 rwlock_t i_meta_lock;
59 struct inode vfs_inode;
60};
61
62/*
63 * Inode dynamic state flags
64 */
65#define EXT2_STATE_NEW 0x00000001 /* inode is newly created */
66
67
68/*
69 * Function prototypes
70 */
71
72/*
73 * Ok, these declarations are also in <linux/kernel.h> but none of the
74 * ext2 source programs needs to include it so they are duplicated here.
75 */
76
77static inline struct ext2_inode_info *EXT2_I(struct inode *inode)
78{
79 return container_of(inode, struct ext2_inode_info, vfs_inode);
80}
81
82/* balloc.c */
83extern int ext2_bg_has_super(struct super_block *sb, int group);
84extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group);
85extern int ext2_new_block (struct inode *, unsigned long,
86 __u32 *, __u32 *, int *);
87extern void ext2_free_blocks (struct inode *, unsigned long,
88 unsigned long);
89extern unsigned long ext2_count_free_blocks (struct super_block *);
90extern unsigned long ext2_count_dirs (struct super_block *);
91extern void ext2_check_blocks_bitmap (struct super_block *);
92extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
93 unsigned int block_group,
94 struct buffer_head ** bh);
95
96/* dir.c */
97extern int ext2_add_link (struct dentry *, struct inode *);
98extern ino_t ext2_inode_by_name(struct inode *, struct dentry *);
99extern int ext2_make_empty(struct inode *, struct inode *);
100extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **);
101extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
102extern int ext2_empty_dir (struct inode *);
103extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
104extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *);
105
106/* fsync.c */
107extern int ext2_sync_file (struct file *, struct dentry *, int);
108
109/* ialloc.c */
110extern struct inode * ext2_new_inode (struct inode *, int);
111extern void ext2_free_inode (struct inode *);
112extern unsigned long ext2_count_free_inodes (struct super_block *);
113extern void ext2_check_inodes_bitmap (struct super_block *);
114extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
115
116/* inode.c */
117extern void ext2_read_inode (struct inode *);
118extern int ext2_write_inode (struct inode *, int);
119extern void ext2_delete_inode (struct inode *);
120extern int ext2_sync_inode (struct inode *);
121extern void ext2_discard_prealloc (struct inode *);
122extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
123extern void ext2_truncate (struct inode *);
124extern int ext2_setattr (struct dentry *, struct iattr *);
125extern void ext2_set_inode_flags(struct inode *inode);
126
127/* ioctl.c */
128extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
129 unsigned long);
130
131/* super.c */
132extern void ext2_error (struct super_block *, const char *, const char *, ...)
133 __attribute__ ((format (printf, 3, 4)));
134extern void ext2_warning (struct super_block *, const char *, const char *, ...)
135 __attribute__ ((format (printf, 3, 4)));
136extern void ext2_update_dynamic_rev (struct super_block *sb);
137extern void ext2_write_super (struct super_block *);
138
139/*
140 * Inodes and files operations
141 */
142
143/* dir.c */
144extern struct file_operations ext2_dir_operations;
145
146/* file.c */
147extern struct inode_operations ext2_file_inode_operations;
148extern struct file_operations ext2_file_operations;
149
150/* inode.c */
151extern struct address_space_operations ext2_aops;
152extern struct address_space_operations ext2_nobh_aops;
153
154/* namei.c */
155extern struct inode_operations ext2_dir_inode_operations;
156extern struct inode_operations ext2_special_inode_operations;
157
158/* symlink.c */
159extern struct inode_operations ext2_fast_symlink_inode_operations;
160extern struct inode_operations ext2_symlink_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
new file mode 100644
index 000000000000..f5e86141ec54
--- /dev/null
+++ b/fs/ext2/file.c
@@ -0,0 +1,68 @@
1/*
2 * linux/fs/ext2/file.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/fs/minix/file.c
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 *
15 * ext2 fs regular file handling primitives
16 *
17 * 64-bit file support on 64-bit platforms by Jakub Jelinek
18 * (jj@sunsite.ms.mff.cuni.cz)
19 */
20
21#include <linux/time.h>
22#include "ext2.h"
23#include "xattr.h"
24#include "acl.h"
25
26/*
27 * Called when an inode is released. Note that this is different
28 * from ext2_open_file: open gets called at every open, but release
29 * gets called only when /all/ the files are closed.
30 */
31static int ext2_release_file (struct inode * inode, struct file * filp)
32{
33 if (filp->f_mode & FMODE_WRITE)
34 ext2_discard_prealloc (inode);
35 return 0;
36}
37
38/*
39 * We have mostly NULL's here: the current defaults are ok for
40 * the ext2 filesystem.
41 */
42struct file_operations ext2_file_operations = {
43 .llseek = generic_file_llseek,
44 .read = generic_file_read,
45 .write = generic_file_write,
46 .aio_read = generic_file_aio_read,
47 .aio_write = generic_file_aio_write,
48 .ioctl = ext2_ioctl,
49 .mmap = generic_file_mmap,
50 .open = generic_file_open,
51 .release = ext2_release_file,
52 .fsync = ext2_sync_file,
53 .readv = generic_file_readv,
54 .writev = generic_file_writev,
55 .sendfile = generic_file_sendfile,
56};
57
58struct inode_operations ext2_file_inode_operations = {
59 .truncate = ext2_truncate,
60#ifdef CONFIG_EXT2_FS_XATTR
61 .setxattr = generic_setxattr,
62 .getxattr = generic_getxattr,
63 .listxattr = ext2_listxattr,
64 .removexattr = generic_removexattr,
65#endif
66 .setattr = ext2_setattr,
67 .permission = ext2_permission,
68};
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
new file mode 100644
index 000000000000..c9c2e5ffa48e
--- /dev/null
+++ b/fs/ext2/fsync.c
@@ -0,0 +1,51 @@
1/*
2 * linux/fs/ext2/fsync.c
3 *
4 * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk)
5 * from
6 * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)
9 * from
10 * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
11 *
12 * ext2fs fsync primitive
13 *
14 * Big-endian to little-endian byte-swapping/bitmaps by
15 * David S. Miller (davem@caip.rutgers.edu), 1995
16 *
17 * Removed unnecessary code duplication for little endian machines
18 * and excessive __inline__s.
19 * Andi Kleen, 1997
20 *
21 * Major simplications and cleanup - we only need to do the metadata, because
22 * we can depend on generic_block_fdatasync() to sync the data blocks.
23 */
24
25#include "ext2.h"
26#include <linux/smp_lock.h>
27#include <linux/buffer_head.h> /* for fsync_inode_buffers() */
28
29
30/*
31 * File may be NULL when we are called. Perhaps we shouldn't
32 * even pass file to fsync ?
33 */
34
35int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync)
36{
37 struct inode *inode = dentry->d_inode;
38 int err;
39 int ret;
40
41 ret = sync_mapping_buffers(inode->i_mapping);
42 if (!(inode->i_state & I_DIRTY))
43 return ret;
44 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
45 return ret;
46
47 err = ext2_sync_inode(inode);
48 if (ret == 0)
49 ret = err;
50 return ret;
51}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
new file mode 100644
index 000000000000..77e059149212
--- /dev/null
+++ b/fs/ext2/ialloc.c
@@ -0,0 +1,735 @@
1/*
2 * linux/fs/ext2/ialloc.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * BSD ufs-inspired inode and directory allocation by
10 * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
11 * Big-endian to little-endian byte-swapping/bitmaps by
12 * David S. Miller (davem@caip.rutgers.edu), 1995
13 */
14
15#include <linux/config.h>
16#include <linux/quotaops.h>
17#include <linux/sched.h>
18#include <linux/backing-dev.h>
19#include <linux/buffer_head.h>
20#include <linux/random.h>
21#include "ext2.h"
22#include "xattr.h"
23#include "acl.h"
24
25/*
26 * ialloc.c contains the inodes allocation and deallocation routines
27 */
28
29/*
30 * The free inodes are managed by bitmaps. A file system contains several
31 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
32 * block for inodes, N blocks for the inode table and data blocks.
33 *
34 * The file system contains group descriptors which are located after the
35 * super block. Each descriptor contains the number of the bitmap block and
36 * the free blocks count in the block.
37 */
38
39
40/*
41 * Read the inode allocation bitmap for a given block_group, reading
42 * into the specified slot in the superblock's bitmap cache.
43 *
44 * Return buffer_head of bitmap on success or NULL.
45 */
46static struct buffer_head *
47read_inode_bitmap(struct super_block * sb, unsigned long block_group)
48{
49 struct ext2_group_desc *desc;
50 struct buffer_head *bh = NULL;
51
52 desc = ext2_get_group_desc(sb, block_group, NULL);
53 if (!desc)
54 goto error_out;
55
56 bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
57 if (!bh)
58 ext2_error(sb, "read_inode_bitmap",
59 "Cannot read inode bitmap - "
60 "block_group = %lu, inode_bitmap = %u",
61 block_group, le32_to_cpu(desc->bg_inode_bitmap));
62error_out:
63 return bh;
64}
65
66static void ext2_release_inode(struct super_block *sb, int group, int dir)
67{
68 struct ext2_group_desc * desc;
69 struct buffer_head *bh;
70
71 desc = ext2_get_group_desc(sb, group, &bh);
72 if (!desc) {
73 ext2_error(sb, "ext2_release_inode",
74 "can't get descriptor for group %d", group);
75 return;
76 }
77
78 spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
79 desc->bg_free_inodes_count =
80 cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
81 if (dir)
82 desc->bg_used_dirs_count =
83 cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
84 spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
85 if (dir)
86 percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
87 sb->s_dirt = 1;
88 mark_buffer_dirty(bh);
89}
90
91/*
92 * NOTE! When we get the inode, we're the only people
93 * that have access to it, and as such there are no
94 * race conditions we have to worry about. The inode
95 * is not on the hash-lists, and it cannot be reached
96 * through the filesystem because the directory entry
97 * has been deleted earlier.
98 *
99 * HOWEVER: we must make sure that we get no aliases,
100 * which means that we have to call "clear_inode()"
101 * _before_ we mark the inode not in use in the inode
102 * bitmaps. Otherwise a newly created file might use
103 * the same inode number (not actually the same pointer
104 * though), and then we'd have two inodes sharing the
105 * same inode number and space on the harddisk.
106 */
107void ext2_free_inode (struct inode * inode)
108{
109 struct super_block * sb = inode->i_sb;
110 int is_directory;
111 unsigned long ino;
112 struct buffer_head *bitmap_bh = NULL;
113 unsigned long block_group;
114 unsigned long bit;
115 struct ext2_super_block * es;
116
117 ino = inode->i_ino;
118 ext2_debug ("freeing inode %lu\n", ino);
119
120 /*
121 * Note: we must free any quota before locking the superblock,
122 * as writing the quota to disk may need the lock as well.
123 */
124 if (!is_bad_inode(inode)) {
125 /* Quota is already initialized in iput() */
126 ext2_xattr_delete_inode(inode);
127 DQUOT_FREE_INODE(inode);
128 DQUOT_DROP(inode);
129 }
130
131 es = EXT2_SB(sb)->s_es;
132 is_directory = S_ISDIR(inode->i_mode);
133
134 /* Do this BEFORE marking the inode not in use or returning an error */
135 clear_inode (inode);
136
137 if (ino < EXT2_FIRST_INO(sb) ||
138 ino > le32_to_cpu(es->s_inodes_count)) {
139 ext2_error (sb, "ext2_free_inode",
140 "reserved or nonexistent inode %lu", ino);
141 goto error_return;
142 }
143 block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
144 bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
145 brelse(bitmap_bh);
146 bitmap_bh = read_inode_bitmap(sb, block_group);
147 if (!bitmap_bh)
148 goto error_return;
149
150 /* Ok, now we can actually update the inode bitmaps.. */
151 if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),
152 bit, (void *) bitmap_bh->b_data))
153 ext2_error (sb, "ext2_free_inode",
154 "bit already cleared for inode %lu", ino);
155 else
156 ext2_release_inode(sb, block_group, is_directory);
157 mark_buffer_dirty(bitmap_bh);
158 if (sb->s_flags & MS_SYNCHRONOUS)
159 sync_dirty_buffer(bitmap_bh);
160error_return:
161 brelse(bitmap_bh);
162}
163
164/*
165 * We perform asynchronous prereading of the new inode's inode block when
166 * we create the inode, in the expectation that the inode will be written
167 * back soon. There are two reasons:
168 *
169 * - When creating a large number of files, the async prereads will be
170 * nicely merged into large reads
171 * - When writing out a large number of inodes, we don't need to keep on
172 * stalling the writes while we read the inode block.
173 *
174 * FIXME: ext2_get_group_desc() needs to be simplified.
175 */
176static void ext2_preread_inode(struct inode *inode)
177{
178 unsigned long block_group;
179 unsigned long offset;
180 unsigned long block;
181 struct buffer_head *bh;
182 struct ext2_group_desc * gdp;
183 struct backing_dev_info *bdi;
184
185 bdi = inode->i_mapping->backing_dev_info;
186 if (bdi_read_congested(bdi))
187 return;
188 if (bdi_write_congested(bdi))
189 return;
190
191 block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
192 gdp = ext2_get_group_desc(inode->i_sb, block_group, &bh);
193 if (gdp == NULL)
194 return;
195
196 /*
197 * Figure out the offset within the block group inode table
198 */
199 offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
200 EXT2_INODE_SIZE(inode->i_sb);
201 block = le32_to_cpu(gdp->bg_inode_table) +
202 (offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
203 sb_breadahead(inode->i_sb, block);
204}
205
206/*
207 * There are two policies for allocating an inode. If the new inode is
208 * a directory, then a forward search is made for a block group with both
209 * free space and a low directory-to-inode ratio; if that fails, then of
210 * the groups with above-average free space, that group with the fewest
211 * directories already is chosen.
212 *
213 * For other inodes, search forward from the parent directory\'s block
214 * group to find a free inode.
215 */
216static int find_group_dir(struct super_block *sb, struct inode *parent)
217{
218 int ngroups = EXT2_SB(sb)->s_groups_count;
219 int avefreei = ext2_count_free_inodes(sb) / ngroups;
220 struct ext2_group_desc *desc, *best_desc = NULL;
221 struct buffer_head *bh, *best_bh = NULL;
222 int group, best_group = -1;
223
224 for (group = 0; group < ngroups; group++) {
225 desc = ext2_get_group_desc (sb, group, &bh);
226 if (!desc || !desc->bg_free_inodes_count)
227 continue;
228 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
229 continue;
230 if (!best_desc ||
231 (le16_to_cpu(desc->bg_free_blocks_count) >
232 le16_to_cpu(best_desc->bg_free_blocks_count))) {
233 best_group = group;
234 best_desc = desc;
235 best_bh = bh;
236 }
237 }
238 if (!best_desc)
239 return -1;
240
241 return best_group;
242}
243
244/*
245 * Orlov's allocator for directories.
246 *
247 * We always try to spread first-level directories.
248 *
249 * If there are blockgroups with both free inodes and free blocks counts
250 * not worse than average we return one with smallest directory count.
251 * Otherwise we simply return a random group.
252 *
253 * For the rest rules look so:
254 *
255 * It's OK to put directory into a group unless
256 * it has too many directories already (max_dirs) or
257 * it has too few free inodes left (min_inodes) or
258 * it has too few free blocks left (min_blocks) or
259 * it's already running too large debt (max_debt).
260 * Parent's group is prefered, if it doesn't satisfy these
261 * conditions we search cyclically through the rest. If none
262 * of the groups look good we just look for a group with more
263 * free inodes than average (starting at parent's group).
264 *
265 * Debt is incremented each time we allocate a directory and decremented
266 * when we allocate an inode, within 0--255.
267 */
268
269#define INODE_COST 64
270#define BLOCK_COST 256
271
272static int find_group_orlov(struct super_block *sb, struct inode *parent)
273{
274 int parent_group = EXT2_I(parent)->i_block_group;
275 struct ext2_sb_info *sbi = EXT2_SB(sb);
276 struct ext2_super_block *es = sbi->s_es;
277 int ngroups = sbi->s_groups_count;
278 int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
279 int freei;
280 int avefreei;
281 int free_blocks;
282 int avefreeb;
283 int blocks_per_dir;
284 int ndirs;
285 int max_debt, max_dirs, min_blocks, min_inodes;
286 int group = -1, i;
287 struct ext2_group_desc *desc;
288 struct buffer_head *bh;
289
290 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
291 avefreei = freei / ngroups;
292 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
293 avefreeb = free_blocks / ngroups;
294 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
295
296 if ((parent == sb->s_root->d_inode) ||
297 (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) {
298 struct ext2_group_desc *best_desc = NULL;
299 struct buffer_head *best_bh = NULL;
300 int best_ndir = inodes_per_group;
301 int best_group = -1;
302
303 get_random_bytes(&group, sizeof(group));
304 parent_group = (unsigned)group % ngroups;
305 for (i = 0; i < ngroups; i++) {
306 group = (parent_group + i) % ngroups;
307 desc = ext2_get_group_desc (sb, group, &bh);
308 if (!desc || !desc->bg_free_inodes_count)
309 continue;
310 if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
311 continue;
312 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
313 continue;
314 if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
315 continue;
316 best_group = group;
317 best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
318 best_desc = desc;
319 best_bh = bh;
320 }
321 if (best_group >= 0) {
322 desc = best_desc;
323 bh = best_bh;
324 group = best_group;
325 goto found;
326 }
327 goto fallback;
328 }
329
330 if (ndirs == 0)
331 ndirs = 1; /* percpu_counters are approximate... */
332
333 blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs;
334
335 max_dirs = ndirs / ngroups + inodes_per_group / 16;
336 min_inodes = avefreei - inodes_per_group / 4;
337 min_blocks = avefreeb - EXT2_BLOCKS_PER_GROUP(sb) / 4;
338
339 max_debt = EXT2_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);
340 if (max_debt * INODE_COST > inodes_per_group)
341 max_debt = inodes_per_group / INODE_COST;
342 if (max_debt > 255)
343 max_debt = 255;
344 if (max_debt == 0)
345 max_debt = 1;
346
347 for (i = 0; i < ngroups; i++) {
348 group = (parent_group + i) % ngroups;
349 desc = ext2_get_group_desc (sb, group, &bh);
350 if (!desc || !desc->bg_free_inodes_count)
351 continue;
352 if (sbi->s_debts[group] >= max_debt)
353 continue;
354 if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
355 continue;
356 if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
357 continue;
358 if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
359 continue;
360 goto found;
361 }
362
363fallback:
364 for (i = 0; i < ngroups; i++) {
365 group = (parent_group + i) % ngroups;
366 desc = ext2_get_group_desc (sb, group, &bh);
367 if (!desc || !desc->bg_free_inodes_count)
368 continue;
369 if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
370 goto found;
371 }
372
373 if (avefreei) {
374 /*
375 * The free-inodes counter is approximate, and for really small
376 * filesystems the above test can fail to find any blockgroups
377 */
378 avefreei = 0;
379 goto fallback;
380 }
381
382 return -1;
383
384found:
385 return group;
386}
387
388static int find_group_other(struct super_block *sb, struct inode *parent)
389{
390 int parent_group = EXT2_I(parent)->i_block_group;
391 int ngroups = EXT2_SB(sb)->s_groups_count;
392 struct ext2_group_desc *desc;
393 struct buffer_head *bh;
394 int group, i;
395
396 /*
397 * Try to place the inode in its parent directory
398 */
399 group = parent_group;
400 desc = ext2_get_group_desc (sb, group, &bh);
401 if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
402 le16_to_cpu(desc->bg_free_blocks_count))
403 goto found;
404
405 /*
406 * We're going to place this inode in a different blockgroup from its
407 * parent. We want to cause files in a common directory to all land in
408 * the same blockgroup. But we want files which are in a different
409 * directory which shares a blockgroup with our parent to land in a
410 * different blockgroup.
411 *
412 * So add our directory's i_ino into the starting point for the hash.
413 */
414 group = (group + parent->i_ino) % ngroups;
415
416 /*
417 * Use a quadratic hash to find a group with a free inode and some
418 * free blocks.
419 */
420 for (i = 1; i < ngroups; i <<= 1) {
421 group += i;
422 if (group >= ngroups)
423 group -= ngroups;
424 desc = ext2_get_group_desc (sb, group, &bh);
425 if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
426 le16_to_cpu(desc->bg_free_blocks_count))
427 goto found;
428 }
429
430 /*
431 * That failed: try linear search for a free inode, even if that group
432 * has no free blocks.
433 */
434 group = parent_group;
435 for (i = 0; i < ngroups; i++) {
436 if (++group >= ngroups)
437 group = 0;
438 desc = ext2_get_group_desc (sb, group, &bh);
439 if (desc && le16_to_cpu(desc->bg_free_inodes_count))
440 goto found;
441 }
442
443 return -1;
444
445found:
446 return group;
447}
448
449struct inode *ext2_new_inode(struct inode *dir, int mode)
450{
451 struct super_block *sb;
452 struct buffer_head *bitmap_bh = NULL;
453 struct buffer_head *bh2;
454 int group, i;
455 ino_t ino = 0;
456 struct inode * inode;
457 struct ext2_group_desc *gdp;
458 struct ext2_super_block *es;
459 struct ext2_inode_info *ei;
460 struct ext2_sb_info *sbi;
461 int err;
462
463 sb = dir->i_sb;
464 inode = new_inode(sb);
465 if (!inode)
466 return ERR_PTR(-ENOMEM);
467
468 ei = EXT2_I(inode);
469 sbi = EXT2_SB(sb);
470 es = sbi->s_es;
471 if (S_ISDIR(mode)) {
472 if (test_opt(sb, OLDALLOC))
473 group = find_group_dir(sb, dir);
474 else
475 group = find_group_orlov(sb, dir);
476 } else
477 group = find_group_other(sb, dir);
478
479 if (group == -1) {
480 err = -ENOSPC;
481 goto fail;
482 }
483
484 for (i = 0; i < sbi->s_groups_count; i++) {
485 gdp = ext2_get_group_desc(sb, group, &bh2);
486 brelse(bitmap_bh);
487 bitmap_bh = read_inode_bitmap(sb, group);
488 if (!bitmap_bh) {
489 err = -EIO;
490 goto fail;
491 }
492 ino = 0;
493
494repeat_in_this_group:
495 ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data,
496 EXT2_INODES_PER_GROUP(sb), ino);
497 if (ino >= EXT2_INODES_PER_GROUP(sb)) {
498 /*
499 * Rare race: find_group_xx() decided that there were
500 * free inodes in this group, but by the time we tried
501 * to allocate one, they're all gone. This can also
502 * occur because the counters which find_group_orlov()
503 * uses are approximate. So just go and search the
504 * next block group.
505 */
506 if (++group == sbi->s_groups_count)
507 group = 0;
508 continue;
509 }
510 if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group),
511 ino, bitmap_bh->b_data)) {
512 /* we lost this inode */
513 if (++ino >= EXT2_INODES_PER_GROUP(sb)) {
514 /* this group is exhausted, try next group */
515 if (++group == sbi->s_groups_count)
516 group = 0;
517 continue;
518 }
519 /* try to find free inode in the same group */
520 goto repeat_in_this_group;
521 }
522 goto got;
523 }
524
525 /*
526 * Scanned all blockgroups.
527 */
528 err = -ENOSPC;
529 goto fail;
530got:
531 mark_buffer_dirty(bitmap_bh);
532 if (sb->s_flags & MS_SYNCHRONOUS)
533 sync_dirty_buffer(bitmap_bh);
534 brelse(bitmap_bh);
535
536 ino += group * EXT2_INODES_PER_GROUP(sb) + 1;
537 if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
538 ext2_error (sb, "ext2_new_inode",
539 "reserved inode or inode > inodes count - "
540 "block_group = %d,inode=%lu", group,
541 (unsigned long) ino);
542 err = -EIO;
543 goto fail;
544 }
545
546 percpu_counter_mod(&sbi->s_freeinodes_counter, -1);
547 if (S_ISDIR(mode))
548 percpu_counter_inc(&sbi->s_dirs_counter);
549
550 spin_lock(sb_bgl_lock(sbi, group));
551 gdp->bg_free_inodes_count =
552 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
553 if (S_ISDIR(mode)) {
554 if (sbi->s_debts[group] < 255)
555 sbi->s_debts[group]++;
556 gdp->bg_used_dirs_count =
557 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
558 } else {
559 if (sbi->s_debts[group])
560 sbi->s_debts[group]--;
561 }
562 spin_unlock(sb_bgl_lock(sbi, group));
563
564 sb->s_dirt = 1;
565 mark_buffer_dirty(bh2);
566 inode->i_uid = current->fsuid;
567 if (test_opt (sb, GRPID))
568 inode->i_gid = dir->i_gid;
569 else if (dir->i_mode & S_ISGID) {
570 inode->i_gid = dir->i_gid;
571 if (S_ISDIR(mode))
572 mode |= S_ISGID;
573 } else
574 inode->i_gid = current->fsgid;
575 inode->i_mode = mode;
576
577 inode->i_ino = ino;
578 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
579 inode->i_blocks = 0;
580 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
581 memset(ei->i_data, 0, sizeof(ei->i_data));
582 ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
583 if (S_ISLNK(mode))
584 ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
585 /* dirsync is only applied to directories */
586 if (!S_ISDIR(mode))
587 ei->i_flags &= ~EXT2_DIRSYNC_FL;
588 ei->i_faddr = 0;
589 ei->i_frag_no = 0;
590 ei->i_frag_size = 0;
591 ei->i_file_acl = 0;
592 ei->i_dir_acl = 0;
593 ei->i_dtime = 0;
594 ei->i_block_group = group;
595 ei->i_next_alloc_block = 0;
596 ei->i_next_alloc_goal = 0;
597 ei->i_prealloc_block = 0;
598 ei->i_prealloc_count = 0;
599 ei->i_dir_start_lookup = 0;
600 ei->i_state = EXT2_STATE_NEW;
601 ext2_set_inode_flags(inode);
602 spin_lock(&sbi->s_next_gen_lock);
603 inode->i_generation = sbi->s_next_generation++;
604 spin_unlock(&sbi->s_next_gen_lock);
605 insert_inode_hash(inode);
606
607 if (DQUOT_ALLOC_INODE(inode)) {
608 DQUOT_DROP(inode);
609 err = -ENOSPC;
610 goto fail2;
611 }
612 err = ext2_init_acl(inode, dir);
613 if (err) {
614 DQUOT_FREE_INODE(inode);
615 goto fail2;
616 }
617 mark_inode_dirty(inode);
618 ext2_debug("allocating inode %lu\n", inode->i_ino);
619 ext2_preread_inode(inode);
620 return inode;
621
622fail2:
623 inode->i_flags |= S_NOQUOTA;
624 inode->i_nlink = 0;
625 iput(inode);
626 return ERR_PTR(err);
627
628fail:
629 make_bad_inode(inode);
630 iput(inode);
631 return ERR_PTR(err);
632}
633
634unsigned long ext2_count_free_inodes (struct super_block * sb)
635{
636 struct ext2_group_desc *desc;
637 unsigned long desc_count = 0;
638 int i;
639
640#ifdef EXT2FS_DEBUG
641 struct ext2_super_block *es;
642 unsigned long bitmap_count = 0;
643 struct buffer_head *bitmap_bh = NULL;
644
645 lock_super (sb);
646 es = EXT2_SB(sb)->s_es;
647 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
648 unsigned x;
649
650 desc = ext2_get_group_desc (sb, i, NULL);
651 if (!desc)
652 continue;
653 desc_count += le16_to_cpu(desc->bg_free_inodes_count);
654 brelse(bitmap_bh);
655 bitmap_bh = read_inode_bitmap(sb, i);
656 if (!bitmap_bh)
657 continue;
658
659 x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
660 printk("group %d: stored = %d, counted = %u\n",
661 i, le16_to_cpu(desc->bg_free_inodes_count), x);
662 bitmap_count += x;
663 }
664 brelse(bitmap_bh);
665 printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
666 percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter),
667 desc_count, bitmap_count);
668 unlock_super(sb);
669 return desc_count;
670#else
671 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
672 desc = ext2_get_group_desc (sb, i, NULL);
673 if (!desc)
674 continue;
675 desc_count += le16_to_cpu(desc->bg_free_inodes_count);
676 }
677 return desc_count;
678#endif
679}
680
681/* Called at mount-time, super-block is locked */
682unsigned long ext2_count_dirs (struct super_block * sb)
683{
684 unsigned long count = 0;
685 int i;
686
687 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
688 struct ext2_group_desc *gdp = ext2_get_group_desc (sb, i, NULL);
689 if (!gdp)
690 continue;
691 count += le16_to_cpu(gdp->bg_used_dirs_count);
692 }
693 return count;
694}
695
696#ifdef CONFIG_EXT2_CHECK
697/* Called at mount-time, super-block is locked */
698void ext2_check_inodes_bitmap (struct super_block * sb)
699{
700 struct ext2_super_block * es = EXT2_SB(sb)->s_es;
701 unsigned long desc_count = 0, bitmap_count = 0;
702 struct buffer_head *bitmap_bh = NULL;
703 int i;
704
705 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
706 struct ext2_group_desc *desc;
707 unsigned x;
708
709 desc = ext2_get_group_desc(sb, i, NULL);
710 if (!desc)
711 continue;
712 desc_count += le16_to_cpu(desc->bg_free_inodes_count);
713 brelse(bitmap_bh);
714 bitmap_bh = read_inode_bitmap(sb, i);
715 if (!bitmap_bh)
716 continue;
717
718 x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
719 if (le16_to_cpu(desc->bg_free_inodes_count) != x)
720 ext2_error (sb, "ext2_check_inodes_bitmap",
721 "Wrong free inodes count in group %d, "
722 "stored = %d, counted = %lu", i,
723 le16_to_cpu(desc->bg_free_inodes_count), x);
724 bitmap_count += x;
725 }
726 brelse(bitmap_bh);
727 if (percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter) !=
728 bitmap_count)
729 ext2_error(sb, "ext2_check_inodes_bitmap",
730 "Wrong free inodes count in super block, "
731 "stored = %lu, counted = %lu",
732 (unsigned long)le32_to_cpu(es->s_free_inodes_count),
733 bitmap_count);
734}
735#endif
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
new file mode 100644
index 000000000000..b890be022496
--- /dev/null
+++ b/fs/ext2/inode.c
@@ -0,0 +1,1276 @@
1/*
2 * linux/fs/ext2/inode.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/fs/minix/inode.c
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 *
15 * Goal-directed block allocation by Stephen Tweedie
16 * (sct@dcs.ed.ac.uk), 1993, 1998
17 * Big-endian to little-endian byte-swapping/bitmaps by
18 * David S. Miller (davem@caip.rutgers.edu), 1995
19 * 64-bit file support on 64-bit platforms by Jakub Jelinek
20 * (jj@sunsite.ms.mff.cuni.cz)
21 *
22 * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000
23 */
24
25#include <linux/smp_lock.h>
26#include <linux/time.h>
27#include <linux/highuid.h>
28#include <linux/pagemap.h>
29#include <linux/quotaops.h>
30#include <linux/module.h>
31#include <linux/writeback.h>
32#include <linux/buffer_head.h>
33#include <linux/mpage.h>
34#include "ext2.h"
35#include "acl.h"
36
37MODULE_AUTHOR("Remy Card and others");
38MODULE_DESCRIPTION("Second Extended Filesystem");
39MODULE_LICENSE("GPL");
40
41static int ext2_update_inode(struct inode * inode, int do_sync);
42
43/*
44 * Test whether an inode is a fast symlink.
45 */
46static inline int ext2_inode_is_fast_symlink(struct inode *inode)
47{
48 int ea_blocks = EXT2_I(inode)->i_file_acl ?
49 (inode->i_sb->s_blocksize >> 9) : 0;
50
51 return (S_ISLNK(inode->i_mode) &&
52 inode->i_blocks - ea_blocks == 0);
53}
54
55/*
56 * Called at the last iput() if i_nlink is zero.
57 */
58void ext2_delete_inode (struct inode * inode)
59{
60 if (is_bad_inode(inode))
61 goto no_delete;
62 EXT2_I(inode)->i_dtime = get_seconds();
63 mark_inode_dirty(inode);
64 ext2_update_inode(inode, inode_needs_sync(inode));
65
66 inode->i_size = 0;
67 if (inode->i_blocks)
68 ext2_truncate (inode);
69 ext2_free_inode (inode);
70
71 return;
72no_delete:
73 clear_inode(inode); /* We must guarantee clearing of inode... */
74}
75
76void ext2_discard_prealloc (struct inode * inode)
77{
78#ifdef EXT2_PREALLOCATE
79 struct ext2_inode_info *ei = EXT2_I(inode);
80 write_lock(&ei->i_meta_lock);
81 if (ei->i_prealloc_count) {
82 unsigned short total = ei->i_prealloc_count;
83 unsigned long block = ei->i_prealloc_block;
84 ei->i_prealloc_count = 0;
85 ei->i_prealloc_block = 0;
86 write_unlock(&ei->i_meta_lock);
87 ext2_free_blocks (inode, block, total);
88 return;
89 } else
90 write_unlock(&ei->i_meta_lock);
91#endif
92}
93
94static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err)
95{
96#ifdef EXT2FS_DEBUG
97 static unsigned long alloc_hits, alloc_attempts;
98#endif
99 unsigned long result;
100
101
102#ifdef EXT2_PREALLOCATE
103 struct ext2_inode_info *ei = EXT2_I(inode);
104 write_lock(&ei->i_meta_lock);
105 if (ei->i_prealloc_count &&
106 (goal == ei->i_prealloc_block || goal + 1 == ei->i_prealloc_block))
107 {
108 result = ei->i_prealloc_block++;
109 ei->i_prealloc_count--;
110 write_unlock(&ei->i_meta_lock);
111 ext2_debug ("preallocation hit (%lu/%lu).\n",
112 ++alloc_hits, ++alloc_attempts);
113 } else {
114 write_unlock(&ei->i_meta_lock);
115 ext2_discard_prealloc (inode);
116 ext2_debug ("preallocation miss (%lu/%lu).\n",
117 alloc_hits, ++alloc_attempts);
118 if (S_ISREG(inode->i_mode))
119 result = ext2_new_block (inode, goal,
120 &ei->i_prealloc_count,
121 &ei->i_prealloc_block, err);
122 else
123 result = ext2_new_block(inode, goal, NULL, NULL, err);
124 }
125#else
126 result = ext2_new_block (inode, goal, 0, 0, err);
127#endif
128 return result;
129}
130
131typedef struct {
132 __le32 *p;
133 __le32 key;
134 struct buffer_head *bh;
135} Indirect;
136
137static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
138{
139 p->key = *(p->p = v);
140 p->bh = bh;
141}
142
143static inline int verify_chain(Indirect *from, Indirect *to)
144{
145 while (from <= to && from->key == *from->p)
146 from++;
147 return (from > to);
148}
149
150/**
151 * ext2_block_to_path - parse the block number into array of offsets
152 * @inode: inode in question (we are only interested in its superblock)
153 * @i_block: block number to be parsed
154 * @offsets: array to store the offsets in
155 * @boundary: set this non-zero if the referred-to block is likely to be
156 * followed (on disk) by an indirect block.
157 * To store the locations of file's data ext2 uses a data structure common
158 * for UNIX filesystems - tree of pointers anchored in the inode, with
159 * data blocks at leaves and indirect blocks in intermediate nodes.
160 * This function translates the block number into path in that tree -
161 * return value is the path length and @offsets[n] is the offset of
162 * pointer to (n+1)th node in the nth one. If @block is out of range
163 * (negative or too large) warning is printed and zero returned.
164 *
165 * Note: function doesn't find node addresses, so no IO is needed. All
166 * we need to know is the capacity of indirect blocks (taken from the
167 * inode->i_sb).
168 */
169
170/*
171 * Portability note: the last comparison (check that we fit into triple
172 * indirect block) is spelled differently, because otherwise on an
173 * architecture with 32-bit longs and 8Kb pages we might get into trouble
174 * if our filesystem had 8Kb blocks. We might use long long, but that would
175 * kill us on x86. Oh, well, at least the sign propagation does not matter -
176 * i_block would have to be negative in the very beginning, so we would not
177 * get there at all.
178 */
179
180static int ext2_block_to_path(struct inode *inode,
181 long i_block, int offsets[4], int *boundary)
182{
183 int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb);
184 int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
185 const long direct_blocks = EXT2_NDIR_BLOCKS,
186 indirect_blocks = ptrs,
187 double_blocks = (1 << (ptrs_bits * 2));
188 int n = 0;
189 int final = 0;
190
191 if (i_block < 0) {
192 ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0");
193 } else if (i_block < direct_blocks) {
194 offsets[n++] = i_block;
195 final = direct_blocks;
196 } else if ( (i_block -= direct_blocks) < indirect_blocks) {
197 offsets[n++] = EXT2_IND_BLOCK;
198 offsets[n++] = i_block;
199 final = ptrs;
200 } else if ((i_block -= indirect_blocks) < double_blocks) {
201 offsets[n++] = EXT2_DIND_BLOCK;
202 offsets[n++] = i_block >> ptrs_bits;
203 offsets[n++] = i_block & (ptrs - 1);
204 final = ptrs;
205 } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
206 offsets[n++] = EXT2_TIND_BLOCK;
207 offsets[n++] = i_block >> (ptrs_bits * 2);
208 offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
209 offsets[n++] = i_block & (ptrs - 1);
210 final = ptrs;
211 } else {
212 ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big");
213 }
214 if (boundary)
215 *boundary = (i_block & (ptrs - 1)) == (final - 1);
216 return n;
217}
218
219/**
220 * ext2_get_branch - read the chain of indirect blocks leading to data
221 * @inode: inode in question
222 * @depth: depth of the chain (1 - direct pointer, etc.)
223 * @offsets: offsets of pointers in inode/indirect blocks
224 * @chain: place to store the result
225 * @err: here we store the error value
226 *
227 * Function fills the array of triples <key, p, bh> and returns %NULL
228 * if everything went OK or the pointer to the last filled triple
229 * (incomplete one) otherwise. Upon the return chain[i].key contains
230 * the number of (i+1)-th block in the chain (as it is stored in memory,
231 * i.e. little-endian 32-bit), chain[i].p contains the address of that
232 * number (it points into struct inode for i==0 and into the bh->b_data
233 * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
234 * block for i>0 and NULL for i==0. In other words, it holds the block
235 * numbers of the chain, addresses they were taken from (and where we can
236 * verify that chain did not change) and buffer_heads hosting these
237 * numbers.
238 *
239 * Function stops when it stumbles upon zero pointer (absent block)
240 * (pointer to last triple returned, *@err == 0)
241 * or when it gets an IO error reading an indirect block
242 * (ditto, *@err == -EIO)
243 * or when it notices that chain had been changed while it was reading
244 * (ditto, *@err == -EAGAIN)
245 * or when it reads all @depth-1 indirect blocks successfully and finds
246 * the whole chain, all way to the data (returns %NULL, *err == 0).
247 */
248static Indirect *ext2_get_branch(struct inode *inode,
249 int depth,
250 int *offsets,
251 Indirect chain[4],
252 int *err)
253{
254 struct super_block *sb = inode->i_sb;
255 Indirect *p = chain;
256 struct buffer_head *bh;
257
258 *err = 0;
259 /* i_data is not going away, no lock needed */
260 add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets);
261 if (!p->key)
262 goto no_block;
263 while (--depth) {
264 bh = sb_bread(sb, le32_to_cpu(p->key));
265 if (!bh)
266 goto failure;
267 read_lock(&EXT2_I(inode)->i_meta_lock);
268 if (!verify_chain(chain, p))
269 goto changed;
270 add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
271 read_unlock(&EXT2_I(inode)->i_meta_lock);
272 if (!p->key)
273 goto no_block;
274 }
275 return NULL;
276
277changed:
278 read_unlock(&EXT2_I(inode)->i_meta_lock);
279 brelse(bh);
280 *err = -EAGAIN;
281 goto no_block;
282failure:
283 *err = -EIO;
284no_block:
285 return p;
286}
287
288/**
289 * ext2_find_near - find a place for allocation with sufficient locality
290 * @inode: owner
291 * @ind: descriptor of indirect block.
292 *
293 * This function returns the prefered place for block allocation.
294 * It is used when heuristic for sequential allocation fails.
295 * Rules are:
296 * + if there is a block to the left of our position - allocate near it.
297 * + if pointer will live in indirect block - allocate near that block.
298 * + if pointer will live in inode - allocate in the same cylinder group.
299 *
300 * In the latter case we colour the starting block by the callers PID to
301 * prevent it from clashing with concurrent allocations for a different inode
302 * in the same block group. The PID is used here so that functionally related
303 * files will be close-by on-disk.
304 *
305 * Caller must make sure that @ind is valid and will stay that way.
306 */
307
308static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
309{
310 struct ext2_inode_info *ei = EXT2_I(inode);
311 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
312 __le32 *p;
313 unsigned long bg_start;
314 unsigned long colour;
315
316 /* Try to find previous block */
317 for (p = ind->p - 1; p >= start; p--)
318 if (*p)
319 return le32_to_cpu(*p);
320
321 /* No such thing, so let's try location of indirect block */
322 if (ind->bh)
323 return ind->bh->b_blocknr;
324
325 /*
326 * It is going to be refered from inode itself? OK, just put it into
327 * the same cylinder group then.
328 */
329 bg_start = (ei->i_block_group * EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
330 le32_to_cpu(EXT2_SB(inode->i_sb)->s_es->s_first_data_block);
331 colour = (current->pid % 16) *
332 (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16);
333 return bg_start + colour;
334}
335
336/**
337 * ext2_find_goal - find a prefered place for allocation.
338 * @inode: owner
339 * @block: block we want
340 * @chain: chain of indirect blocks
341 * @partial: pointer to the last triple within a chain
342 * @goal: place to store the result.
343 *
344 * Normally this function find the prefered place for block allocation,
345 * stores it in *@goal and returns zero. If the branch had been changed
346 * under us we return -EAGAIN.
347 */
348
349static inline int ext2_find_goal(struct inode *inode,
350 long block,
351 Indirect chain[4],
352 Indirect *partial,
353 unsigned long *goal)
354{
355 struct ext2_inode_info *ei = EXT2_I(inode);
356 write_lock(&ei->i_meta_lock);
357 if ((block == ei->i_next_alloc_block + 1) && ei->i_next_alloc_goal) {
358 ei->i_next_alloc_block++;
359 ei->i_next_alloc_goal++;
360 }
361 if (verify_chain(chain, partial)) {
362 /*
363 * try the heuristic for sequential allocation,
364 * failing that at least try to get decent locality.
365 */
366 if (block == ei->i_next_alloc_block)
367 *goal = ei->i_next_alloc_goal;
368 if (!*goal)
369 *goal = ext2_find_near(inode, partial);
370 write_unlock(&ei->i_meta_lock);
371 return 0;
372 }
373 write_unlock(&ei->i_meta_lock);
374 return -EAGAIN;
375}
376
377/**
378 * ext2_alloc_branch - allocate and set up a chain of blocks.
379 * @inode: owner
380 * @num: depth of the chain (number of blocks to allocate)
381 * @offsets: offsets (in the blocks) to store the pointers to next.
382 * @branch: place to store the chain in.
383 *
384 * This function allocates @num blocks, zeroes out all but the last one,
385 * links them into chain and (if we are synchronous) writes them to disk.
386 * In other words, it prepares a branch that can be spliced onto the
387 * inode. It stores the information about that chain in the branch[], in
388 * the same format as ext2_get_branch() would do. We are calling it after
389 * we had read the existing part of chain and partial points to the last
390 * triple of that (one with zero ->key). Upon the exit we have the same
391 * picture as after the successful ext2_get_block(), excpet that in one
392 * place chain is disconnected - *branch->p is still zero (we did not
393 * set the last link), but branch->key contains the number that should
394 * be placed into *branch->p to fill that gap.
395 *
396 * If allocation fails we free all blocks we've allocated (and forget
397 * their buffer_heads) and return the error value the from failed
398 * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain
399 * as described above and return 0.
400 */
401
402static int ext2_alloc_branch(struct inode *inode,
403 int num,
404 unsigned long goal,
405 int *offsets,
406 Indirect *branch)
407{
408 int blocksize = inode->i_sb->s_blocksize;
409 int n = 0;
410 int err;
411 int i;
412 int parent = ext2_alloc_block(inode, goal, &err);
413
414 branch[0].key = cpu_to_le32(parent);
415 if (parent) for (n = 1; n < num; n++) {
416 struct buffer_head *bh;
417 /* Allocate the next block */
418 int nr = ext2_alloc_block(inode, parent, &err);
419 if (!nr)
420 break;
421 branch[n].key = cpu_to_le32(nr);
422 /*
423 * Get buffer_head for parent block, zero it out and set
424 * the pointer to new one, then send parent to disk.
425 */
426 bh = sb_getblk(inode->i_sb, parent);
427 lock_buffer(bh);
428 memset(bh->b_data, 0, blocksize);
429 branch[n].bh = bh;
430 branch[n].p = (__le32 *) bh->b_data + offsets[n];
431 *branch[n].p = branch[n].key;
432 set_buffer_uptodate(bh);
433 unlock_buffer(bh);
434 mark_buffer_dirty_inode(bh, inode);
435 /* We used to sync bh here if IS_SYNC(inode).
436 * But we now rely upon generic_osync_inode()
437 * and b_inode_buffers. But not for directories.
438 */
439 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
440 sync_dirty_buffer(bh);
441 parent = nr;
442 }
443 if (n == num)
444 return 0;
445
446 /* Allocation failed, free what we already allocated */
447 for (i = 1; i < n; i++)
448 bforget(branch[i].bh);
449 for (i = 0; i < n; i++)
450 ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1);
451 return err;
452}
453
454/**
455 * ext2_splice_branch - splice the allocated branch onto inode.
456 * @inode: owner
457 * @block: (logical) number of block we are adding
458 * @chain: chain of indirect blocks (with a missing link - see
459 * ext2_alloc_branch)
460 * @where: location of missing link
461 * @num: number of blocks we are adding
462 *
463 * This function verifies that chain (up to the missing link) had not
464 * changed, fills the missing link and does all housekeeping needed in
465 * inode (->i_blocks, etc.). In case of success we end up with the full
466 * chain to new block and return 0. Otherwise (== chain had been changed)
467 * we free the new blocks (forgetting their buffer_heads, indeed) and
468 * return -EAGAIN.
469 */
470
471static inline int ext2_splice_branch(struct inode *inode,
472 long block,
473 Indirect chain[4],
474 Indirect *where,
475 int num)
476{
477 struct ext2_inode_info *ei = EXT2_I(inode);
478 int i;
479
480 /* Verify that place we are splicing to is still there and vacant */
481
482 write_lock(&ei->i_meta_lock);
483 if (!verify_chain(chain, where-1) || *where->p)
484 goto changed;
485
486 /* That's it */
487
488 *where->p = where->key;
489 ei->i_next_alloc_block = block;
490 ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key);
491
492 write_unlock(&ei->i_meta_lock);
493
494 /* We are done with atomic stuff, now do the rest of housekeeping */
495
496 inode->i_ctime = CURRENT_TIME_SEC;
497
498 /* had we spliced it onto indirect block? */
499 if (where->bh)
500 mark_buffer_dirty_inode(where->bh, inode);
501
502 mark_inode_dirty(inode);
503 return 0;
504
505changed:
506 write_unlock(&ei->i_meta_lock);
507 for (i = 1; i < num; i++)
508 bforget(where[i].bh);
509 for (i = 0; i < num; i++)
510 ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1);
511 return -EAGAIN;
512}
513
514/*
515 * Allocation strategy is simple: if we have to allocate something, we will
516 * have to go the whole way to leaf. So let's do it before attaching anything
517 * to tree, set linkage between the newborn blocks, write them if sync is
518 * required, recheck the path, free and repeat if check fails, otherwise
519 * set the last missing link (that will protect us from any truncate-generated
520 * removals - all blocks on the path are immune now) and possibly force the
521 * write on the parent block.
522 * That has a nice additional property: no special recovery from the failed
523 * allocations is needed - we simply release blocks and do not touch anything
524 * reachable from inode.
525 */
526
527int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
528{
529 int err = -EIO;
530 int offsets[4];
531 Indirect chain[4];
532 Indirect *partial;
533 unsigned long goal;
534 int left;
535 int boundary = 0;
536 int depth = ext2_block_to_path(inode, iblock, offsets, &boundary);
537
538 if (depth == 0)
539 goto out;
540
541reread:
542 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
543
544 /* Simplest case - block found, no allocation needed */
545 if (!partial) {
546got_it:
547 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
548 if (boundary)
549 set_buffer_boundary(bh_result);
550 /* Clean up and exit */
551 partial = chain+depth-1; /* the whole chain */
552 goto cleanup;
553 }
554
555 /* Next simple case - plain lookup or failed read of indirect block */
556 if (!create || err == -EIO) {
557cleanup:
558 while (partial > chain) {
559 brelse(partial->bh);
560 partial--;
561 }
562out:
563 return err;
564 }
565
566 /*
567 * Indirect block might be removed by truncate while we were
568 * reading it. Handling of that case (forget what we've got and
569 * reread) is taken out of the main path.
570 */
571 if (err == -EAGAIN)
572 goto changed;
573
574 goal = 0;
575 if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0)
576 goto changed;
577
578 left = (chain + depth) - partial;
579 err = ext2_alloc_branch(inode, left, goal,
580 offsets+(partial-chain), partial);
581 if (err)
582 goto cleanup;
583
584 if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
585 goto changed;
586
587 set_buffer_new(bh_result);
588 goto got_it;
589
590changed:
591 while (partial > chain) {
592 brelse(partial->bh);
593 partial--;
594 }
595 goto reread;
596}
597
598static int ext2_writepage(struct page *page, struct writeback_control *wbc)
599{
600 return block_write_full_page(page, ext2_get_block, wbc);
601}
602
603static int ext2_readpage(struct file *file, struct page *page)
604{
605 return mpage_readpage(page, ext2_get_block);
606}
607
608static int
609ext2_readpages(struct file *file, struct address_space *mapping,
610 struct list_head *pages, unsigned nr_pages)
611{
612 return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
613}
614
615static int
616ext2_prepare_write(struct file *file, struct page *page,
617 unsigned from, unsigned to)
618{
619 return block_prepare_write(page,from,to,ext2_get_block);
620}
621
622static int
623ext2_nobh_prepare_write(struct file *file, struct page *page,
624 unsigned from, unsigned to)
625{
626 return nobh_prepare_write(page,from,to,ext2_get_block);
627}
628
629static int ext2_nobh_writepage(struct page *page,
630 struct writeback_control *wbc)
631{
632 return nobh_writepage(page, ext2_get_block, wbc);
633}
634
635static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
636{
637 return generic_block_bmap(mapping,block,ext2_get_block);
638}
639
640static int
641ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
642 struct buffer_head *bh_result, int create)
643{
644 int ret;
645
646 ret = ext2_get_block(inode, iblock, bh_result, create);
647 if (ret == 0)
648 bh_result->b_size = (1 << inode->i_blkbits);
649 return ret;
650}
651
652static ssize_t
653ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
654 loff_t offset, unsigned long nr_segs)
655{
656 struct file *file = iocb->ki_filp;
657 struct inode *inode = file->f_mapping->host;
658
659 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
660 offset, nr_segs, ext2_get_blocks, NULL);
661}
662
663static int
664ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
665{
666 return mpage_writepages(mapping, wbc, ext2_get_block);
667}
668
669struct address_space_operations ext2_aops = {
670 .readpage = ext2_readpage,
671 .readpages = ext2_readpages,
672 .writepage = ext2_writepage,
673 .sync_page = block_sync_page,
674 .prepare_write = ext2_prepare_write,
675 .commit_write = generic_commit_write,
676 .bmap = ext2_bmap,
677 .direct_IO = ext2_direct_IO,
678 .writepages = ext2_writepages,
679};
680
681struct address_space_operations ext2_nobh_aops = {
682 .readpage = ext2_readpage,
683 .readpages = ext2_readpages,
684 .writepage = ext2_nobh_writepage,
685 .sync_page = block_sync_page,
686 .prepare_write = ext2_nobh_prepare_write,
687 .commit_write = nobh_commit_write,
688 .bmap = ext2_bmap,
689 .direct_IO = ext2_direct_IO,
690 .writepages = ext2_writepages,
691};
692
693/*
694 * Probably it should be a library function... search for first non-zero word
695 * or memcmp with zero_page, whatever is better for particular architecture.
696 * Linus?
697 */
698static inline int all_zeroes(__le32 *p, __le32 *q)
699{
700 while (p < q)
701 if (*p++)
702 return 0;
703 return 1;
704}
705
706/**
707 * ext2_find_shared - find the indirect blocks for partial truncation.
708 * @inode: inode in question
709 * @depth: depth of the affected branch
710 * @offsets: offsets of pointers in that branch (see ext2_block_to_path)
711 * @chain: place to store the pointers to partial indirect blocks
712 * @top: place to the (detached) top of branch
713 *
714 * This is a helper function used by ext2_truncate().
715 *
716 * When we do truncate() we may have to clean the ends of several indirect
717 * blocks but leave the blocks themselves alive. Block is partially
718 * truncated if some data below the new i_size is refered from it (and
719 * it is on the path to the first completely truncated data block, indeed).
720 * We have to free the top of that path along with everything to the right
721 * of the path. Since no allocation past the truncation point is possible
722 * until ext2_truncate() finishes, we may safely do the latter, but top
723 * of branch may require special attention - pageout below the truncation
724 * point might try to populate it.
725 *
726 * We atomically detach the top of branch from the tree, store the block
727 * number of its root in *@top, pointers to buffer_heads of partially
728 * truncated blocks - in @chain[].bh and pointers to their last elements
729 * that should not be removed - in @chain[].p. Return value is the pointer
730 * to last filled element of @chain.
731 *
732 * The work left to caller to do the actual freeing of subtrees:
733 * a) free the subtree starting from *@top
734 * b) free the subtrees whose roots are stored in
735 * (@chain[i].p+1 .. end of @chain[i].bh->b_data)
736 * c) free the subtrees growing from the inode past the @chain[0].p
737 * (no partially truncated stuff there).
738 */
739
740static Indirect *ext2_find_shared(struct inode *inode,
741 int depth,
742 int offsets[4],
743 Indirect chain[4],
744 __le32 *top)
745{
746 Indirect *partial, *p;
747 int k, err;
748
749 *top = 0;
750 for (k = depth; k > 1 && !offsets[k-1]; k--)
751 ;
752 partial = ext2_get_branch(inode, k, offsets, chain, &err);
753 if (!partial)
754 partial = chain + k-1;
755 /*
756 * If the branch acquired continuation since we've looked at it -
757 * fine, it should all survive and (new) top doesn't belong to us.
758 */
759 write_lock(&EXT2_I(inode)->i_meta_lock);
760 if (!partial->key && *partial->p) {
761 write_unlock(&EXT2_I(inode)->i_meta_lock);
762 goto no_top;
763 }
764 for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
765 ;
766 /*
767 * OK, we've found the last block that must survive. The rest of our
768 * branch should be detached before unlocking. However, if that rest
769 * of branch is all ours and does not grow immediately from the inode
770 * it's easier to cheat and just decrement partial->p.
771 */
772 if (p == chain + k - 1 && p > chain) {
773 p->p--;
774 } else {
775 *top = *p->p;
776 *p->p = 0;
777 }
778 write_unlock(&EXT2_I(inode)->i_meta_lock);
779
780 while(partial > p)
781 {
782 brelse(partial->bh);
783 partial--;
784 }
785no_top:
786 return partial;
787}
788
789/**
790 * ext2_free_data - free a list of data blocks
791 * @inode: inode we are dealing with
792 * @p: array of block numbers
793 * @q: points immediately past the end of array
794 *
795 * We are freeing all blocks refered from that array (numbers are
796 * stored as little-endian 32-bit) and updating @inode->i_blocks
797 * appropriately.
798 */
799static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
800{
801 unsigned long block_to_free = 0, count = 0;
802 unsigned long nr;
803
804 for ( ; p < q ; p++) {
805 nr = le32_to_cpu(*p);
806 if (nr) {
807 *p = 0;
808 /* accumulate blocks to free if they're contiguous */
809 if (count == 0)
810 goto free_this;
811 else if (block_to_free == nr - count)
812 count++;
813 else {
814 mark_inode_dirty(inode);
815 ext2_free_blocks (inode, block_to_free, count);
816 free_this:
817 block_to_free = nr;
818 count = 1;
819 }
820 }
821 }
822 if (count > 0) {
823 mark_inode_dirty(inode);
824 ext2_free_blocks (inode, block_to_free, count);
825 }
826}
827
828/**
829 * ext2_free_branches - free an array of branches
830 * @inode: inode we are dealing with
831 * @p: array of block numbers
832 * @q: pointer immediately past the end of array
833 * @depth: depth of the branches to free
834 *
835 * We are freeing all blocks refered from these branches (numbers are
836 * stored as little-endian 32-bit) and updating @inode->i_blocks
837 * appropriately.
838 */
839static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth)
840{
841 struct buffer_head * bh;
842 unsigned long nr;
843
844 if (depth--) {
845 int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
846 for ( ; p < q ; p++) {
847 nr = le32_to_cpu(*p);
848 if (!nr)
849 continue;
850 *p = 0;
851 bh = sb_bread(inode->i_sb, nr);
852 /*
853 * A read failure? Report error and clear slot
854 * (should be rare).
855 */
856 if (!bh) {
857 ext2_error(inode->i_sb, "ext2_free_branches",
858 "Read failure, inode=%ld, block=%ld",
859 inode->i_ino, nr);
860 continue;
861 }
862 ext2_free_branches(inode,
863 (__le32*)bh->b_data,
864 (__le32*)bh->b_data + addr_per_block,
865 depth);
866 bforget(bh);
867 ext2_free_blocks(inode, nr, 1);
868 mark_inode_dirty(inode);
869 }
870 } else
871 ext2_free_data(inode, p, q);
872}
873
874void ext2_truncate (struct inode * inode)
875{
876 __le32 *i_data = EXT2_I(inode)->i_data;
877 int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
878 int offsets[4];
879 Indirect chain[4];
880 Indirect *partial;
881 __le32 nr = 0;
882 int n;
883 long iblock;
884 unsigned blocksize;
885
886 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
887 S_ISLNK(inode->i_mode)))
888 return;
889 if (ext2_inode_is_fast_symlink(inode))
890 return;
891 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
892 return;
893
894 ext2_discard_prealloc(inode);
895
896 blocksize = inode->i_sb->s_blocksize;
897 iblock = (inode->i_size + blocksize-1)
898 >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
899
900 if (test_opt(inode->i_sb, NOBH))
901 nobh_truncate_page(inode->i_mapping, inode->i_size);
902 else
903 block_truncate_page(inode->i_mapping,
904 inode->i_size, ext2_get_block);
905
906 n = ext2_block_to_path(inode, iblock, offsets, NULL);
907 if (n == 0)
908 return;
909
910 if (n == 1) {
911 ext2_free_data(inode, i_data+offsets[0],
912 i_data + EXT2_NDIR_BLOCKS);
913 goto do_indirects;
914 }
915
916 partial = ext2_find_shared(inode, n, offsets, chain, &nr);
917 /* Kill the top of shared branch (already detached) */
918 if (nr) {
919 if (partial == chain)
920 mark_inode_dirty(inode);
921 else
922 mark_buffer_dirty_inode(partial->bh, inode);
923 ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial);
924 }
925 /* Clear the ends of indirect blocks on the shared branch */
926 while (partial > chain) {
927 ext2_free_branches(inode,
928 partial->p + 1,
929 (__le32*)partial->bh->b_data+addr_per_block,
930 (chain+n-1) - partial);
931 mark_buffer_dirty_inode(partial->bh, inode);
932 brelse (partial->bh);
933 partial--;
934 }
935do_indirects:
936 /* Kill the remaining (whole) subtrees */
937 switch (offsets[0]) {
938 default:
939 nr = i_data[EXT2_IND_BLOCK];
940 if (nr) {
941 i_data[EXT2_IND_BLOCK] = 0;
942 mark_inode_dirty(inode);
943 ext2_free_branches(inode, &nr, &nr+1, 1);
944 }
945 case EXT2_IND_BLOCK:
946 nr = i_data[EXT2_DIND_BLOCK];
947 if (nr) {
948 i_data[EXT2_DIND_BLOCK] = 0;
949 mark_inode_dirty(inode);
950 ext2_free_branches(inode, &nr, &nr+1, 2);
951 }
952 case EXT2_DIND_BLOCK:
953 nr = i_data[EXT2_TIND_BLOCK];
954 if (nr) {
955 i_data[EXT2_TIND_BLOCK] = 0;
956 mark_inode_dirty(inode);
957 ext2_free_branches(inode, &nr, &nr+1, 3);
958 }
959 case EXT2_TIND_BLOCK:
960 ;
961 }
962 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
963 if (inode_needs_sync(inode)) {
964 sync_mapping_buffers(inode->i_mapping);
965 ext2_sync_inode (inode);
966 } else {
967 mark_inode_dirty(inode);
968 }
969}
970
971static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
972 struct buffer_head **p)
973{
974 struct buffer_head * bh;
975 unsigned long block_group;
976 unsigned long block;
977 unsigned long offset;
978 struct ext2_group_desc * gdp;
979
980 *p = NULL;
981 if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) ||
982 ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
983 goto Einval;
984
985 block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
986 gdp = ext2_get_group_desc(sb, block_group, &bh);
987 if (!gdp)
988 goto Egdp;
989 /*
990 * Figure out the offset within the block group inode table
991 */
992 offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb);
993 block = le32_to_cpu(gdp->bg_inode_table) +
994 (offset >> EXT2_BLOCK_SIZE_BITS(sb));
995 if (!(bh = sb_bread(sb, block)))
996 goto Eio;
997
998 *p = bh;
999 offset &= (EXT2_BLOCK_SIZE(sb) - 1);
1000 return (struct ext2_inode *) (bh->b_data + offset);
1001
1002Einval:
1003 ext2_error(sb, "ext2_get_inode", "bad inode number: %lu",
1004 (unsigned long) ino);
1005 return ERR_PTR(-EINVAL);
1006Eio:
1007 ext2_error(sb, "ext2_get_inode",
1008 "unable to read inode block - inode=%lu, block=%lu",
1009 (unsigned long) ino, block);
1010Egdp:
1011 return ERR_PTR(-EIO);
1012}
1013
1014void ext2_set_inode_flags(struct inode *inode)
1015{
1016 unsigned int flags = EXT2_I(inode)->i_flags;
1017
1018 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
1019 if (flags & EXT2_SYNC_FL)
1020 inode->i_flags |= S_SYNC;
1021 if (flags & EXT2_APPEND_FL)
1022 inode->i_flags |= S_APPEND;
1023 if (flags & EXT2_IMMUTABLE_FL)
1024 inode->i_flags |= S_IMMUTABLE;
1025 if (flags & EXT2_NOATIME_FL)
1026 inode->i_flags |= S_NOATIME;
1027 if (flags & EXT2_DIRSYNC_FL)
1028 inode->i_flags |= S_DIRSYNC;
1029}
1030
1031void ext2_read_inode (struct inode * inode)
1032{
1033 struct ext2_inode_info *ei = EXT2_I(inode);
1034 ino_t ino = inode->i_ino;
1035 struct buffer_head * bh;
1036 struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
1037 int n;
1038
1039#ifdef CONFIG_EXT2_FS_POSIX_ACL
1040 ei->i_acl = EXT2_ACL_NOT_CACHED;
1041 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
1042#endif
1043 if (IS_ERR(raw_inode))
1044 goto bad_inode;
1045
1046 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
1047 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
1048 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
1049 if (!(test_opt (inode->i_sb, NO_UID32))) {
1050 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
1051 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
1052 }
1053 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
1054 inode->i_size = le32_to_cpu(raw_inode->i_size);
1055 inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
1056 inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
1057 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
1058 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1059 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
1060 /* We now have enough fields to check if the inode was active or not.
1061 * This is needed because nfsd might try to access dead inodes
1062 * the test is that same one that e2fsck uses
1063 * NeilBrown 1999oct15
1064 */
1065 if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
1066 /* this inode is deleted */
1067 brelse (bh);
1068 goto bad_inode;
1069 }
1070 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
1071 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
1072 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
1073 ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
1074 ei->i_frag_no = raw_inode->i_frag;
1075 ei->i_frag_size = raw_inode->i_fsize;
1076 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
1077 ei->i_dir_acl = 0;
1078 if (S_ISREG(inode->i_mode))
1079 inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
1080 else
1081 ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
1082 ei->i_dtime = 0;
1083 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
1084 ei->i_state = 0;
1085 ei->i_next_alloc_block = 0;
1086 ei->i_next_alloc_goal = 0;
1087 ei->i_prealloc_count = 0;
1088 ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
1089 ei->i_dir_start_lookup = 0;
1090
1091 /*
1092 * NOTE! The in-memory inode i_data array is in little-endian order
1093 * even on big-endian machines: we do NOT byteswap the block numbers!
1094 */
1095 for (n = 0; n < EXT2_N_BLOCKS; n++)
1096 ei->i_data[n] = raw_inode->i_block[n];
1097
1098 if (S_ISREG(inode->i_mode)) {
1099 inode->i_op = &ext2_file_inode_operations;
1100 inode->i_fop = &ext2_file_operations;
1101 if (test_opt(inode->i_sb, NOBH))
1102 inode->i_mapping->a_ops = &ext2_nobh_aops;
1103 else
1104 inode->i_mapping->a_ops = &ext2_aops;
1105 } else if (S_ISDIR(inode->i_mode)) {
1106 inode->i_op = &ext2_dir_inode_operations;
1107 inode->i_fop = &ext2_dir_operations;
1108 if (test_opt(inode->i_sb, NOBH))
1109 inode->i_mapping->a_ops = &ext2_nobh_aops;
1110 else
1111 inode->i_mapping->a_ops = &ext2_aops;
1112 } else if (S_ISLNK(inode->i_mode)) {
1113 if (ext2_inode_is_fast_symlink(inode))
1114 inode->i_op = &ext2_fast_symlink_inode_operations;
1115 else {
1116 inode->i_op = &ext2_symlink_inode_operations;
1117 if (test_opt(inode->i_sb, NOBH))
1118 inode->i_mapping->a_ops = &ext2_nobh_aops;
1119 else
1120 inode->i_mapping->a_ops = &ext2_aops;
1121 }
1122 } else {
1123 inode->i_op = &ext2_special_inode_operations;
1124 if (raw_inode->i_block[0])
1125 init_special_inode(inode, inode->i_mode,
1126 old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
1127 else
1128 init_special_inode(inode, inode->i_mode,
1129 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
1130 }
1131 brelse (bh);
1132 ext2_set_inode_flags(inode);
1133 return;
1134
1135bad_inode:
1136 make_bad_inode(inode);
1137 return;
1138}
1139
1140static int ext2_update_inode(struct inode * inode, int do_sync)
1141{
1142 struct ext2_inode_info *ei = EXT2_I(inode);
1143 struct super_block *sb = inode->i_sb;
1144 ino_t ino = inode->i_ino;
1145 uid_t uid = inode->i_uid;
1146 gid_t gid = inode->i_gid;
1147 struct buffer_head * bh;
1148 struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
1149 int n;
1150 int err = 0;
1151
1152 if (IS_ERR(raw_inode))
1153 return -EIO;
1154
1155 /* For fields not not tracking in the in-memory inode,
1156 * initialise them to zero for new inodes. */
1157 if (ei->i_state & EXT2_STATE_NEW)
1158 memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size);
1159
1160 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
1161 if (!(test_opt(sb, NO_UID32))) {
1162 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
1163 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
1164/*
1165 * Fix up interoperability with old kernels. Otherwise, old inodes get
1166 * re-used with the upper 16 bits of the uid/gid intact
1167 */
1168 if (!ei->i_dtime) {
1169 raw_inode->i_uid_high = cpu_to_le16(high_16_bits(uid));
1170 raw_inode->i_gid_high = cpu_to_le16(high_16_bits(gid));
1171 } else {
1172 raw_inode->i_uid_high = 0;
1173 raw_inode->i_gid_high = 0;
1174 }
1175 } else {
1176 raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(uid));
1177 raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(gid));
1178 raw_inode->i_uid_high = 0;
1179 raw_inode->i_gid_high = 0;
1180 }
1181 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
1182 raw_inode->i_size = cpu_to_le32(inode->i_size);
1183 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
1184 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
1185 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
1186
1187 raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
1188 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
1189 raw_inode->i_flags = cpu_to_le32(ei->i_flags);
1190 raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
1191 raw_inode->i_frag = ei->i_frag_no;
1192 raw_inode->i_fsize = ei->i_frag_size;
1193 raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
1194 if (!S_ISREG(inode->i_mode))
1195 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
1196 else {
1197 raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32);
1198 if (inode->i_size > 0x7fffffffULL) {
1199 if (!EXT2_HAS_RO_COMPAT_FEATURE(sb,
1200 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) ||
1201 EXT2_SB(sb)->s_es->s_rev_level ==
1202 cpu_to_le32(EXT2_GOOD_OLD_REV)) {
1203 /* If this is the first large file
1204 * created, add a flag to the superblock.
1205 */
1206 lock_kernel();
1207 ext2_update_dynamic_rev(sb);
1208 EXT2_SET_RO_COMPAT_FEATURE(sb,
1209 EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
1210 unlock_kernel();
1211 ext2_write_super(sb);
1212 }
1213 }
1214 }
1215
1216 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
1217 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
1218 if (old_valid_dev(inode->i_rdev)) {
1219 raw_inode->i_block[0] =
1220 cpu_to_le32(old_encode_dev(inode->i_rdev));
1221 raw_inode->i_block[1] = 0;
1222 } else {
1223 raw_inode->i_block[0] = 0;
1224 raw_inode->i_block[1] =
1225 cpu_to_le32(new_encode_dev(inode->i_rdev));
1226 raw_inode->i_block[2] = 0;
1227 }
1228 } else for (n = 0; n < EXT2_N_BLOCKS; n++)
1229 raw_inode->i_block[n] = ei->i_data[n];
1230 mark_buffer_dirty(bh);
1231 if (do_sync) {
1232 sync_dirty_buffer(bh);
1233 if (buffer_req(bh) && !buffer_uptodate(bh)) {
1234 printk ("IO error syncing ext2 inode [%s:%08lx]\n",
1235 sb->s_id, (unsigned long) ino);
1236 err = -EIO;
1237 }
1238 }
1239 ei->i_state &= ~EXT2_STATE_NEW;
1240 brelse (bh);
1241 return err;
1242}
1243
1244int ext2_write_inode(struct inode *inode, int wait)
1245{
1246 return ext2_update_inode(inode, wait);
1247}
1248
1249int ext2_sync_inode(struct inode *inode)
1250{
1251 struct writeback_control wbc = {
1252 .sync_mode = WB_SYNC_ALL,
1253 .nr_to_write = 0, /* sys_fsync did this */
1254 };
1255 return sync_inode(inode, &wbc);
1256}
1257
1258int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1259{
1260 struct inode *inode = dentry->d_inode;
1261 int error;
1262
1263 error = inode_change_ok(inode, iattr);
1264 if (error)
1265 return error;
1266 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
1267 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
1268 error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0;
1269 if (error)
1270 return error;
1271 }
1272 error = inode_setattr(inode, iattr);
1273 if (!error && (iattr->ia_valid & ATTR_MODE))
1274 error = ext2_acl_chmod(inode);
1275 return error;
1276}
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
new file mode 100644
index 000000000000..709d8676b962
--- /dev/null
+++ b/fs/ext2/ioctl.c
@@ -0,0 +1,81 @@
1/*
2 * linux/fs/ext2/ioctl.c
3 *
4 * Copyright (C) 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 */
9
10#include "ext2.h"
11#include <linux/time.h>
12#include <linux/sched.h>
13#include <asm/current.h>
14#include <asm/uaccess.h>
15
16
17int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
18 unsigned long arg)
19{
20 struct ext2_inode_info *ei = EXT2_I(inode);
21 unsigned int flags;
22
23 ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
24
25 switch (cmd) {
26 case EXT2_IOC_GETFLAGS:
27 flags = ei->i_flags & EXT2_FL_USER_VISIBLE;
28 return put_user(flags, (int __user *) arg);
29 case EXT2_IOC_SETFLAGS: {
30 unsigned int oldflags;
31
32 if (IS_RDONLY(inode))
33 return -EROFS;
34
35 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
36 return -EACCES;
37
38 if (get_user(flags, (int __user *) arg))
39 return -EFAULT;
40
41 if (!S_ISDIR(inode->i_mode))
42 flags &= ~EXT2_DIRSYNC_FL;
43
44 oldflags = ei->i_flags;
45
46 /*
47 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
48 * the relevant capability.
49 *
50 * This test looks nicer. Thanks to Pauline Middelink
51 */
52 if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
53 if (!capable(CAP_LINUX_IMMUTABLE))
54 return -EPERM;
55 }
56
57 flags = flags & EXT2_FL_USER_MODIFIABLE;
58 flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
59 ei->i_flags = flags;
60
61 ext2_set_inode_flags(inode);
62 inode->i_ctime = CURRENT_TIME_SEC;
63 mark_inode_dirty(inode);
64 return 0;
65 }
66 case EXT2_IOC_GETVERSION:
67 return put_user(inode->i_generation, (int __user *) arg);
68 case EXT2_IOC_SETVERSION:
69 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
70 return -EPERM;
71 if (IS_RDONLY(inode))
72 return -EROFS;
73 if (get_user(inode->i_generation, (int __user *) arg))
74 return -EFAULT;
75 inode->i_ctime = CURRENT_TIME_SEC;
76 mark_inode_dirty(inode);
77 return 0;
78 default:
79 return -ENOTTY;
80 }
81}
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
new file mode 100644
index 000000000000..3176b3d3ffa8
--- /dev/null
+++ b/fs/ext2/namei.c
@@ -0,0 +1,418 @@
1/*
2 * linux/fs/ext2/namei.c
3 *
4 * Rewrite to pagecache. Almost all code had been changed, so blame me
5 * if the things go wrong. Please, send bug reports to
6 * viro@parcelfarce.linux.theplanet.co.uk
7 *
8 * Stuff here is basically a glue between the VFS and generic UNIXish
9 * filesystem that keeps everything in pagecache. All knowledge of the
10 * directory layout is in fs/ext2/dir.c - it turned out to be easily separatable
11 * and it's easier to debug that way. In principle we might want to
12 * generalize that a bit and turn it into a library. Or not.
13 *
14 * The only non-static object here is ext2_dir_inode_operations.
15 *
16 * TODO: get rid of kmap() use, add readahead.
17 *
18 * Copyright (C) 1992, 1993, 1994, 1995
19 * Remy Card (card@masi.ibp.fr)
20 * Laboratoire MASI - Institut Blaise Pascal
21 * Universite Pierre et Marie Curie (Paris VI)
22 *
23 * from
24 *
25 * linux/fs/minix/namei.c
26 *
27 * Copyright (C) 1991, 1992 Linus Torvalds
28 *
29 * Big-endian to little-endian byte-swapping/bitmaps by
30 * David S. Miller (davem@caip.rutgers.edu), 1995
31 */
32
33#include <linux/pagemap.h>
34#include "ext2.h"
35#include "xattr.h"
36#include "acl.h"
37
38/*
39 * Couple of helper functions - make the code slightly cleaner.
40 */
41
42static inline void ext2_inc_count(struct inode *inode)
43{
44 inode->i_nlink++;
45 mark_inode_dirty(inode);
46}
47
48static inline void ext2_dec_count(struct inode *inode)
49{
50 inode->i_nlink--;
51 mark_inode_dirty(inode);
52}
53
54static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
55{
56 int err = ext2_add_link(dentry, inode);
57 if (!err) {
58 d_instantiate(dentry, inode);
59 return 0;
60 }
61 ext2_dec_count(inode);
62 iput(inode);
63 return err;
64}
65
66/*
67 * Methods themselves.
68 */
69
70static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
71{
72 struct inode * inode;
73 ino_t ino;
74
75 if (dentry->d_name.len > EXT2_NAME_LEN)
76 return ERR_PTR(-ENAMETOOLONG);
77
78 ino = ext2_inode_by_name(dir, dentry);
79 inode = NULL;
80 if (ino) {
81 inode = iget(dir->i_sb, ino);
82 if (!inode)
83 return ERR_PTR(-EACCES);
84 }
85 if (inode)
86 return d_splice_alias(inode, dentry);
87 d_add(dentry, inode);
88 return NULL;
89}
90
91struct dentry *ext2_get_parent(struct dentry *child)
92{
93 unsigned long ino;
94 struct dentry *parent;
95 struct inode *inode;
96 struct dentry dotdot;
97
98 dotdot.d_name.name = "..";
99 dotdot.d_name.len = 2;
100
101 ino = ext2_inode_by_name(child->d_inode, &dotdot);
102 if (!ino)
103 return ERR_PTR(-ENOENT);
104 inode = iget(child->d_inode->i_sb, ino);
105
106 if (!inode)
107 return ERR_PTR(-EACCES);
108 parent = d_alloc_anon(inode);
109 if (!parent) {
110 iput(inode);
111 parent = ERR_PTR(-ENOMEM);
112 }
113 return parent;
114}
115
116/*
117 * By the time this is called, we already have created
118 * the directory cache entry for the new file, but it
119 * is so far negative - it has no inode.
120 *
121 * If the create succeeds, we fill in the inode information
122 * with d_instantiate().
123 */
124static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
125{
126 struct inode * inode = ext2_new_inode (dir, mode);
127 int err = PTR_ERR(inode);
128 if (!IS_ERR(inode)) {
129 inode->i_op = &ext2_file_inode_operations;
130 inode->i_fop = &ext2_file_operations;
131 if (test_opt(inode->i_sb, NOBH))
132 inode->i_mapping->a_ops = &ext2_nobh_aops;
133 else
134 inode->i_mapping->a_ops = &ext2_aops;
135 mark_inode_dirty(inode);
136 err = ext2_add_nondir(dentry, inode);
137 }
138 return err;
139}
140
141static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
142{
143 struct inode * inode;
144 int err;
145
146 if (!new_valid_dev(rdev))
147 return -EINVAL;
148
149 inode = ext2_new_inode (dir, mode);
150 err = PTR_ERR(inode);
151 if (!IS_ERR(inode)) {
152 init_special_inode(inode, inode->i_mode, rdev);
153#ifdef CONFIG_EXT2_FS_XATTR
154 inode->i_op = &ext2_special_inode_operations;
155#endif
156 mark_inode_dirty(inode);
157 err = ext2_add_nondir(dentry, inode);
158 }
159 return err;
160}
161
162static int ext2_symlink (struct inode * dir, struct dentry * dentry,
163 const char * symname)
164{
165 struct super_block * sb = dir->i_sb;
166 int err = -ENAMETOOLONG;
167 unsigned l = strlen(symname)+1;
168 struct inode * inode;
169
170 if (l > sb->s_blocksize)
171 goto out;
172
173 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
174 err = PTR_ERR(inode);
175 if (IS_ERR(inode))
176 goto out;
177
178 if (l > sizeof (EXT2_I(inode)->i_data)) {
179 /* slow symlink */
180 inode->i_op = &ext2_symlink_inode_operations;
181 if (test_opt(inode->i_sb, NOBH))
182 inode->i_mapping->a_ops = &ext2_nobh_aops;
183 else
184 inode->i_mapping->a_ops = &ext2_aops;
185 err = page_symlink(inode, symname, l);
186 if (err)
187 goto out_fail;
188 } else {
189 /* fast symlink */
190 inode->i_op = &ext2_fast_symlink_inode_operations;
191 memcpy((char*)(EXT2_I(inode)->i_data),symname,l);
192 inode->i_size = l-1;
193 }
194 mark_inode_dirty(inode);
195
196 err = ext2_add_nondir(dentry, inode);
197out:
198 return err;
199
200out_fail:
201 ext2_dec_count(inode);
202 iput (inode);
203 goto out;
204}
205
206static int ext2_link (struct dentry * old_dentry, struct inode * dir,
207 struct dentry *dentry)
208{
209 struct inode *inode = old_dentry->d_inode;
210
211 if (inode->i_nlink >= EXT2_LINK_MAX)
212 return -EMLINK;
213
214 inode->i_ctime = CURRENT_TIME_SEC;
215 ext2_inc_count(inode);
216 atomic_inc(&inode->i_count);
217
218 return ext2_add_nondir(dentry, inode);
219}
220
221static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
222{
223 struct inode * inode;
224 int err = -EMLINK;
225
226 if (dir->i_nlink >= EXT2_LINK_MAX)
227 goto out;
228
229 ext2_inc_count(dir);
230
231 inode = ext2_new_inode (dir, S_IFDIR | mode);
232 err = PTR_ERR(inode);
233 if (IS_ERR(inode))
234 goto out_dir;
235
236 inode->i_op = &ext2_dir_inode_operations;
237 inode->i_fop = &ext2_dir_operations;
238 if (test_opt(inode->i_sb, NOBH))
239 inode->i_mapping->a_ops = &ext2_nobh_aops;
240 else
241 inode->i_mapping->a_ops = &ext2_aops;
242
243 ext2_inc_count(inode);
244
245 err = ext2_make_empty(inode, dir);
246 if (err)
247 goto out_fail;
248
249 err = ext2_add_link(dentry, inode);
250 if (err)
251 goto out_fail;
252
253 d_instantiate(dentry, inode);
254out:
255 return err;
256
257out_fail:
258 ext2_dec_count(inode);
259 ext2_dec_count(inode);
260 iput(inode);
261out_dir:
262 ext2_dec_count(dir);
263 goto out;
264}
265
266static int ext2_unlink(struct inode * dir, struct dentry *dentry)
267{
268 struct inode * inode = dentry->d_inode;
269 struct ext2_dir_entry_2 * de;
270 struct page * page;
271 int err = -ENOENT;
272
273 de = ext2_find_entry (dir, dentry, &page);
274 if (!de)
275 goto out;
276
277 err = ext2_delete_entry (de, page);
278 if (err)
279 goto out;
280
281 inode->i_ctime = dir->i_ctime;
282 ext2_dec_count(inode);
283 err = 0;
284out:
285 return err;
286}
287
288static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
289{
290 struct inode * inode = dentry->d_inode;
291 int err = -ENOTEMPTY;
292
293 if (ext2_empty_dir(inode)) {
294 err = ext2_unlink(dir, dentry);
295 if (!err) {
296 inode->i_size = 0;
297 ext2_dec_count(inode);
298 ext2_dec_count(dir);
299 }
300 }
301 return err;
302}
303
304static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
305 struct inode * new_dir, struct dentry * new_dentry )
306{
307 struct inode * old_inode = old_dentry->d_inode;
308 struct inode * new_inode = new_dentry->d_inode;
309 struct page * dir_page = NULL;
310 struct ext2_dir_entry_2 * dir_de = NULL;
311 struct page * old_page;
312 struct ext2_dir_entry_2 * old_de;
313 int err = -ENOENT;
314
315 old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
316 if (!old_de)
317 goto out;
318
319 if (S_ISDIR(old_inode->i_mode)) {
320 err = -EIO;
321 dir_de = ext2_dotdot(old_inode, &dir_page);
322 if (!dir_de)
323 goto out_old;
324 }
325
326 if (new_inode) {
327 struct page *new_page;
328 struct ext2_dir_entry_2 *new_de;
329
330 err = -ENOTEMPTY;
331 if (dir_de && !ext2_empty_dir (new_inode))
332 goto out_dir;
333
334 err = -ENOENT;
335 new_de = ext2_find_entry (new_dir, new_dentry, &new_page);
336 if (!new_de)
337 goto out_dir;
338 ext2_inc_count(old_inode);
339 ext2_set_link(new_dir, new_de, new_page, old_inode);
340 new_inode->i_ctime = CURRENT_TIME_SEC;
341 if (dir_de)
342 new_inode->i_nlink--;
343 ext2_dec_count(new_inode);
344 } else {
345 if (dir_de) {
346 err = -EMLINK;
347 if (new_dir->i_nlink >= EXT2_LINK_MAX)
348 goto out_dir;
349 }
350 ext2_inc_count(old_inode);
351 err = ext2_add_link(new_dentry, old_inode);
352 if (err) {
353 ext2_dec_count(old_inode);
354 goto out_dir;
355 }
356 if (dir_de)
357 ext2_inc_count(new_dir);
358 }
359
360 /*
361 * Like most other Unix systems, set the ctime for inodes on a
362 * rename.
363 * ext2_dec_count() will mark the inode dirty.
364 */
365 old_inode->i_ctime = CURRENT_TIME_SEC;
366
367 ext2_delete_entry (old_de, old_page);
368 ext2_dec_count(old_inode);
369
370 if (dir_de) {
371 ext2_set_link(old_inode, dir_de, dir_page, new_dir);
372 ext2_dec_count(old_dir);
373 }
374 return 0;
375
376
377out_dir:
378 if (dir_de) {
379 kunmap(dir_page);
380 page_cache_release(dir_page);
381 }
382out_old:
383 kunmap(old_page);
384 page_cache_release(old_page);
385out:
386 return err;
387}
388
389struct inode_operations ext2_dir_inode_operations = {
390 .create = ext2_create,
391 .lookup = ext2_lookup,
392 .link = ext2_link,
393 .unlink = ext2_unlink,
394 .symlink = ext2_symlink,
395 .mkdir = ext2_mkdir,
396 .rmdir = ext2_rmdir,
397 .mknod = ext2_mknod,
398 .rename = ext2_rename,
399#ifdef CONFIG_EXT2_FS_XATTR
400 .setxattr = generic_setxattr,
401 .getxattr = generic_getxattr,
402 .listxattr = ext2_listxattr,
403 .removexattr = generic_removexattr,
404#endif
405 .setattr = ext2_setattr,
406 .permission = ext2_permission,
407};
408
409struct inode_operations ext2_special_inode_operations = {
410#ifdef CONFIG_EXT2_FS_XATTR
411 .setxattr = generic_setxattr,
412 .getxattr = generic_getxattr,
413 .listxattr = ext2_listxattr,
414 .removexattr = generic_removexattr,
415#endif
416 .setattr = ext2_setattr,
417 .permission = ext2_permission,
418};
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
new file mode 100644
index 000000000000..37ca77a157ba
--- /dev/null
+++ b/fs/ext2/super.c
@@ -0,0 +1,1161 @@
1/*
2 * linux/fs/ext2/super.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/fs/minix/inode.c
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 *
15 * Big-endian to little-endian byte-swapping/bitmaps by
16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 */
18
19#include <linux/config.h>
20#include <linux/module.h>
21#include <linux/string.h>
22#include <linux/slab.h>
23#include <linux/init.h>
24#include <linux/blkdev.h>
25#include <linux/parser.h>
26#include <linux/random.h>
27#include <linux/buffer_head.h>
28#include <linux/smp_lock.h>
29#include <linux/vfs.h>
30#include <asm/uaccess.h>
31#include "ext2.h"
32#include "xattr.h"
33#include "acl.h"
34
35static void ext2_sync_super(struct super_block *sb,
36 struct ext2_super_block *es);
37static int ext2_remount (struct super_block * sb, int * flags, char * data);
38static int ext2_statfs (struct super_block * sb, struct kstatfs * buf);
39
40void ext2_error (struct super_block * sb, const char * function,
41 const char * fmt, ...)
42{
43 va_list args;
44 struct ext2_sb_info *sbi = EXT2_SB(sb);
45 struct ext2_super_block *es = sbi->s_es;
46
47 if (!(sb->s_flags & MS_RDONLY)) {
48 sbi->s_mount_state |= EXT2_ERROR_FS;
49 es->s_state =
50 cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS);
51 ext2_sync_super(sb, es);
52 }
53
54 va_start(args, fmt);
55 printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function);
56 vprintk(fmt, args);
57 printk("\n");
58 va_end(args);
59
60 if (test_opt(sb, ERRORS_PANIC))
61 panic("EXT2-fs panic from previous error\n");
62 if (test_opt(sb, ERRORS_RO)) {
63 printk("Remounting filesystem read-only\n");
64 sb->s_flags |= MS_RDONLY;
65 }
66}
67
68void ext2_warning (struct super_block * sb, const char * function,
69 const char * fmt, ...)
70{
71 va_list args;
72
73 va_start(args, fmt);
74 printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ",
75 sb->s_id, function);
76 vprintk(fmt, args);
77 printk("\n");
78 va_end(args);
79}
80
81void ext2_update_dynamic_rev(struct super_block *sb)
82{
83 struct ext2_super_block *es = EXT2_SB(sb)->s_es;
84
85 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
86 return;
87
88 ext2_warning(sb, __FUNCTION__,
89 "updating to rev %d because of new feature flag, "
90 "running e2fsck is recommended",
91 EXT2_DYNAMIC_REV);
92
93 es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
94 es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE);
95 es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV);
96 /* leave es->s_feature_*compat flags alone */
97 /* es->s_uuid will be set by e2fsck if empty */
98
99 /*
100 * The rest of the superblock fields should be zero, and if not it
101 * means they are likely already in use, so leave them alone. We
102 * can leave it up to e2fsck to clean up any inconsistencies there.
103 */
104}
105
106static void ext2_put_super (struct super_block * sb)
107{
108 int db_count;
109 int i;
110 struct ext2_sb_info *sbi = EXT2_SB(sb);
111
112 ext2_xattr_put_super(sb);
113 if (!(sb->s_flags & MS_RDONLY)) {
114 struct ext2_super_block *es = sbi->s_es;
115
116 es->s_state = cpu_to_le16(sbi->s_mount_state);
117 ext2_sync_super(sb, es);
118 }
119 db_count = sbi->s_gdb_count;
120 for (i = 0; i < db_count; i++)
121 if (sbi->s_group_desc[i])
122 brelse (sbi->s_group_desc[i]);
123 kfree(sbi->s_group_desc);
124 kfree(sbi->s_debts);
125 percpu_counter_destroy(&sbi->s_freeblocks_counter);
126 percpu_counter_destroy(&sbi->s_freeinodes_counter);
127 percpu_counter_destroy(&sbi->s_dirs_counter);
128 brelse (sbi->s_sbh);
129 sb->s_fs_info = NULL;
130 kfree(sbi);
131
132 return;
133}
134
135static kmem_cache_t * ext2_inode_cachep;
136
137static struct inode *ext2_alloc_inode(struct super_block *sb)
138{
139 struct ext2_inode_info *ei;
140 ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL);
141 if (!ei)
142 return NULL;
143#ifdef CONFIG_EXT2_FS_POSIX_ACL
144 ei->i_acl = EXT2_ACL_NOT_CACHED;
145 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
146#endif
147 ei->vfs_inode.i_version = 1;
148 return &ei->vfs_inode;
149}
150
151static void ext2_destroy_inode(struct inode *inode)
152{
153 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
154}
155
156static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
157{
158 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
159
160 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
161 SLAB_CTOR_CONSTRUCTOR) {
162 rwlock_init(&ei->i_meta_lock);
163#ifdef CONFIG_EXT2_FS_XATTR
164 init_rwsem(&ei->xattr_sem);
165#endif
166 inode_init_once(&ei->vfs_inode);
167 }
168}
169
170static int init_inodecache(void)
171{
172 ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
173 sizeof(struct ext2_inode_info),
174 0, SLAB_RECLAIM_ACCOUNT,
175 init_once, NULL);
176 if (ext2_inode_cachep == NULL)
177 return -ENOMEM;
178 return 0;
179}
180
181static void destroy_inodecache(void)
182{
183 if (kmem_cache_destroy(ext2_inode_cachep))
184 printk(KERN_INFO "ext2_inode_cache: not all structures were freed\n");
185}
186
187static void ext2_clear_inode(struct inode *inode)
188{
189#ifdef CONFIG_EXT2_FS_POSIX_ACL
190 struct ext2_inode_info *ei = EXT2_I(inode);
191
192 if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
193 posix_acl_release(ei->i_acl);
194 ei->i_acl = EXT2_ACL_NOT_CACHED;
195 }
196 if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
197 posix_acl_release(ei->i_default_acl);
198 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
199 }
200#endif
201 if (!is_bad_inode(inode))
202 ext2_discard_prealloc(inode);
203}
204
205
206#ifdef CONFIG_QUOTA
207static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
208static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
209#endif
210
211static struct super_operations ext2_sops = {
212 .alloc_inode = ext2_alloc_inode,
213 .destroy_inode = ext2_destroy_inode,
214 .read_inode = ext2_read_inode,
215 .write_inode = ext2_write_inode,
216 .delete_inode = ext2_delete_inode,
217 .put_super = ext2_put_super,
218 .write_super = ext2_write_super,
219 .statfs = ext2_statfs,
220 .remount_fs = ext2_remount,
221 .clear_inode = ext2_clear_inode,
222#ifdef CONFIG_QUOTA
223 .quota_read = ext2_quota_read,
224 .quota_write = ext2_quota_write,
225#endif
226};
227
228/* Yes, most of these are left as NULL!!
229 * A NULL value implies the default, which works with ext2-like file
230 * systems, but can be improved upon.
231 * Currently only get_parent is required.
232 */
233struct dentry *ext2_get_parent(struct dentry *child);
234static struct export_operations ext2_export_ops = {
235 .get_parent = ext2_get_parent,
236};
237
238static unsigned long get_sb_block(void **data)
239{
240 unsigned long sb_block;
241 char *options = (char *) *data;
242
243 if (!options || strncmp(options, "sb=", 3) != 0)
244 return 1; /* Default location */
245 options += 3;
246 sb_block = simple_strtoul(options, &options, 0);
247 if (*options && *options != ',') {
248 printk("EXT2-fs: Invalid sb specification: %s\n",
249 (char *) *data);
250 return 1;
251 }
252 if (*options == ',')
253 options++;
254 *data = (void *) options;
255 return sb_block;
256}
257
258enum {
259 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
260 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
261 Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
262 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
263 Opt_ignore, Opt_err,
264};
265
266static match_table_t tokens = {
267 {Opt_bsd_df, "bsddf"},
268 {Opt_minix_df, "minixdf"},
269 {Opt_grpid, "grpid"},
270 {Opt_grpid, "bsdgroups"},
271 {Opt_nogrpid, "nogrpid"},
272 {Opt_nogrpid, "sysvgroups"},
273 {Opt_resgid, "resgid=%u"},
274 {Opt_resuid, "resuid=%u"},
275 {Opt_sb, "sb=%u"},
276 {Opt_err_cont, "errors=continue"},
277 {Opt_err_panic, "errors=panic"},
278 {Opt_err_ro, "errors=remount-ro"},
279 {Opt_nouid32, "nouid32"},
280 {Opt_nocheck, "check=none"},
281 {Opt_nocheck, "nocheck"},
282 {Opt_check, "check"},
283 {Opt_debug, "debug"},
284 {Opt_oldalloc, "oldalloc"},
285 {Opt_orlov, "orlov"},
286 {Opt_nobh, "nobh"},
287 {Opt_user_xattr, "user_xattr"},
288 {Opt_nouser_xattr, "nouser_xattr"},
289 {Opt_acl, "acl"},
290 {Opt_noacl, "noacl"},
291 {Opt_ignore, "grpquota"},
292 {Opt_ignore, "noquota"},
293 {Opt_ignore, "quota"},
294 {Opt_ignore, "usrquota"},
295 {Opt_err, NULL}
296};
297
298static int parse_options (char * options,
299 struct ext2_sb_info *sbi)
300{
301 char * p;
302 substring_t args[MAX_OPT_ARGS];
303 unsigned long kind = EXT2_MOUNT_ERRORS_CONT;
304 int option;
305
306 if (!options)
307 return 1;
308
309 while ((p = strsep (&options, ",")) != NULL) {
310 int token;
311 if (!*p)
312 continue;
313
314 token = match_token(p, tokens, args);
315 switch (token) {
316 case Opt_bsd_df:
317 clear_opt (sbi->s_mount_opt, MINIX_DF);
318 break;
319 case Opt_minix_df:
320 set_opt (sbi->s_mount_opt, MINIX_DF);
321 break;
322 case Opt_grpid:
323 set_opt (sbi->s_mount_opt, GRPID);
324 break;
325 case Opt_nogrpid:
326 clear_opt (sbi->s_mount_opt, GRPID);
327 break;
328 case Opt_resuid:
329 if (match_int(&args[0], &option))
330 return 0;
331 sbi->s_resuid = option;
332 break;
333 case Opt_resgid:
334 if (match_int(&args[0], &option))
335 return 0;
336 sbi->s_resgid = option;
337 break;
338 case Opt_sb:
339 /* handled by get_sb_block() instead of here */
340 /* *sb_block = match_int(&args[0]); */
341 break;
342 case Opt_err_panic:
343 kind = EXT2_MOUNT_ERRORS_PANIC;
344 break;
345 case Opt_err_ro:
346 kind = EXT2_MOUNT_ERRORS_RO;
347 break;
348 case Opt_err_cont:
349 kind = EXT2_MOUNT_ERRORS_CONT;
350 break;
351 case Opt_nouid32:
352 set_opt (sbi->s_mount_opt, NO_UID32);
353 break;
354 case Opt_check:
355#ifdef CONFIG_EXT2_CHECK
356 set_opt (sbi->s_mount_opt, CHECK);
357#else
358 printk("EXT2 Check option not supported\n");
359#endif
360 break;
361 case Opt_nocheck:
362 clear_opt (sbi->s_mount_opt, CHECK);
363 break;
364 case Opt_debug:
365 set_opt (sbi->s_mount_opt, DEBUG);
366 break;
367 case Opt_oldalloc:
368 set_opt (sbi->s_mount_opt, OLDALLOC);
369 break;
370 case Opt_orlov:
371 clear_opt (sbi->s_mount_opt, OLDALLOC);
372 break;
373 case Opt_nobh:
374 set_opt (sbi->s_mount_opt, NOBH);
375 break;
376#ifdef CONFIG_EXT2_FS_XATTR
377 case Opt_user_xattr:
378 set_opt (sbi->s_mount_opt, XATTR_USER);
379 break;
380 case Opt_nouser_xattr:
381 clear_opt (sbi->s_mount_opt, XATTR_USER);
382 break;
383#else
384 case Opt_user_xattr:
385 case Opt_nouser_xattr:
386 printk("EXT2 (no)user_xattr options not supported\n");
387 break;
388#endif
389#ifdef CONFIG_EXT2_FS_POSIX_ACL
390 case Opt_acl:
391 set_opt(sbi->s_mount_opt, POSIX_ACL);
392 break;
393 case Opt_noacl:
394 clear_opt(sbi->s_mount_opt, POSIX_ACL);
395 break;
396#else
397 case Opt_acl:
398 case Opt_noacl:
399 printk("EXT2 (no)acl options not supported\n");
400 break;
401#endif
402 case Opt_ignore:
403 break;
404 default:
405 return 0;
406 }
407 }
408 sbi->s_mount_opt |= kind;
409 return 1;
410}
411
412static int ext2_setup_super (struct super_block * sb,
413 struct ext2_super_block * es,
414 int read_only)
415{
416 int res = 0;
417 struct ext2_sb_info *sbi = EXT2_SB(sb);
418
419 if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
420 printk ("EXT2-fs warning: revision level too high, "
421 "forcing read-only mode\n");
422 res = MS_RDONLY;
423 }
424 if (read_only)
425 return res;
426 if (!(sbi->s_mount_state & EXT2_VALID_FS))
427 printk ("EXT2-fs warning: mounting unchecked fs, "
428 "running e2fsck is recommended\n");
429 else if ((sbi->s_mount_state & EXT2_ERROR_FS))
430 printk ("EXT2-fs warning: mounting fs with errors, "
431 "running e2fsck is recommended\n");
432 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
433 le16_to_cpu(es->s_mnt_count) >=
434 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
435 printk ("EXT2-fs warning: maximal mount count reached, "
436 "running e2fsck is recommended\n");
437 else if (le32_to_cpu(es->s_checkinterval) &&
438 (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds()))
439 printk ("EXT2-fs warning: checktime reached, "
440 "running e2fsck is recommended\n");
441 if (!le16_to_cpu(es->s_max_mnt_count))
442 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
443 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
444 ext2_write_super(sb);
445 if (test_opt (sb, DEBUG))
446 printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, "
447 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
448 EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
449 sbi->s_frag_size,
450 sbi->s_groups_count,
451 EXT2_BLOCKS_PER_GROUP(sb),
452 EXT2_INODES_PER_GROUP(sb),
453 sbi->s_mount_opt);
454#ifdef CONFIG_EXT2_CHECK
455 if (test_opt (sb, CHECK)) {
456 ext2_check_blocks_bitmap (sb);
457 ext2_check_inodes_bitmap (sb);
458 }
459#endif
460 return res;
461}
462
463static int ext2_check_descriptors (struct super_block * sb)
464{
465 int i;
466 int desc_block = 0;
467 struct ext2_sb_info *sbi = EXT2_SB(sb);
468 unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
469 struct ext2_group_desc * gdp = NULL;
470
471 ext2_debug ("Checking group descriptors");
472
473 for (i = 0; i < sbi->s_groups_count; i++)
474 {
475 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
476 gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data;
477 if (le32_to_cpu(gdp->bg_block_bitmap) < block ||
478 le32_to_cpu(gdp->bg_block_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb))
479 {
480 ext2_error (sb, "ext2_check_descriptors",
481 "Block bitmap for group %d"
482 " not in group (block %lu)!",
483 i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap));
484 return 0;
485 }
486 if (le32_to_cpu(gdp->bg_inode_bitmap) < block ||
487 le32_to_cpu(gdp->bg_inode_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb))
488 {
489 ext2_error (sb, "ext2_check_descriptors",
490 "Inode bitmap for group %d"
491 " not in group (block %lu)!",
492 i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap));
493 return 0;
494 }
495 if (le32_to_cpu(gdp->bg_inode_table) < block ||
496 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >=
497 block + EXT2_BLOCKS_PER_GROUP(sb))
498 {
499 ext2_error (sb, "ext2_check_descriptors",
500 "Inode table for group %d"
501 " not in group (block %lu)!",
502 i, (unsigned long) le32_to_cpu(gdp->bg_inode_table));
503 return 0;
504 }
505 block += EXT2_BLOCKS_PER_GROUP(sb);
506 gdp++;
507 }
508 return 1;
509}
510
511#define log2(n) ffz(~(n))
512
513/*
514 * Maximal file size. There is a direct, and {,double-,triple-}indirect
515 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
516 * We need to be 1 filesystem block less than the 2^32 sector limit.
517 */
518static loff_t ext2_max_size(int bits)
519{
520 loff_t res = EXT2_NDIR_BLOCKS;
521 /* This constant is calculated to be the largest file size for a
522 * dense, 4k-blocksize file such that the total number of
523 * sectors in the file, including data and all indirect blocks,
524 * does not exceed 2^32. */
525 const loff_t upper_limit = 0x1ff7fffd000LL;
526
527 res += 1LL << (bits-2);
528 res += 1LL << (2*(bits-2));
529 res += 1LL << (3*(bits-2));
530 res <<= bits;
531 if (res > upper_limit)
532 res = upper_limit;
533 return res;
534}
535
536static unsigned long descriptor_loc(struct super_block *sb,
537 unsigned long logic_sb_block,
538 int nr)
539{
540 struct ext2_sb_info *sbi = EXT2_SB(sb);
541 unsigned long bg, first_data_block, first_meta_bg;
542 int has_super = 0;
543
544 first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block);
545 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
546
547 if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) ||
548 nr < first_meta_bg)
549 return (logic_sb_block + nr + 1);
550 bg = sbi->s_desc_per_block * nr;
551 if (ext2_bg_has_super(sb, bg))
552 has_super = 1;
553 return (first_data_block + has_super + (bg * sbi->s_blocks_per_group));
554}
555
556static int ext2_fill_super(struct super_block *sb, void *data, int silent)
557{
558 struct buffer_head * bh;
559 struct ext2_sb_info * sbi;
560 struct ext2_super_block * es;
561 struct inode *root;
562 unsigned long block;
563 unsigned long sb_block = get_sb_block(&data);
564 unsigned long logic_sb_block;
565 unsigned long offset = 0;
566 unsigned long def_mount_opts;
567 int blocksize = BLOCK_SIZE;
568 int db_count;
569 int i, j;
570 __le32 features;
571
572 sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
573 if (!sbi)
574 return -ENOMEM;
575 sb->s_fs_info = sbi;
576 memset(sbi, 0, sizeof(*sbi));
577
578 /*
579 * See what the current blocksize for the device is, and
580 * use that as the blocksize. Otherwise (or if the blocksize
581 * is smaller than the default) use the default.
582 * This is important for devices that have a hardware
583 * sectorsize that is larger than the default.
584 */
585 blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
586 if (!blocksize) {
587 printk ("EXT2-fs: unable to set blocksize\n");
588 goto failed_sbi;
589 }
590
591 /*
592 * If the superblock doesn't start on a hardware sector boundary,
593 * calculate the offset.
594 */
595 if (blocksize != BLOCK_SIZE) {
596 logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
597 offset = (sb_block*BLOCK_SIZE) % blocksize;
598 } else {
599 logic_sb_block = sb_block;
600 }
601
602 if (!(bh = sb_bread(sb, logic_sb_block))) {
603 printk ("EXT2-fs: unable to read superblock\n");
604 goto failed_sbi;
605 }
606 /*
607 * Note: s_es must be initialized as soon as possible because
608 * some ext2 macro-instructions depend on its value
609 */
610 es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
611 sbi->s_es = es;
612 sb->s_magic = le16_to_cpu(es->s_magic);
613
614 if (sb->s_magic != EXT2_SUPER_MAGIC)
615 goto cantfind_ext2;
616
617 /* Set defaults before we parse the mount options */
618 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
619 if (def_mount_opts & EXT2_DEFM_DEBUG)
620 set_opt(sbi->s_mount_opt, DEBUG);
621 if (def_mount_opts & EXT2_DEFM_BSDGROUPS)
622 set_opt(sbi->s_mount_opt, GRPID);
623 if (def_mount_opts & EXT2_DEFM_UID16)
624 set_opt(sbi->s_mount_opt, NO_UID32);
625 if (def_mount_opts & EXT2_DEFM_XATTR_USER)
626 set_opt(sbi->s_mount_opt, XATTR_USER);
627 if (def_mount_opts & EXT2_DEFM_ACL)
628 set_opt(sbi->s_mount_opt, POSIX_ACL);
629
630 if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
631 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
632 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO)
633 set_opt(sbi->s_mount_opt, ERRORS_RO);
634
635 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
636 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
637
638 if (!parse_options ((char *) data, sbi))
639 goto failed_mount;
640
641 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
642 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
643 MS_POSIXACL : 0);
644
645 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
646 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
647 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
648 EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
649 printk("EXT2-fs warning: feature flags set on rev 0 fs, "
650 "running e2fsck is recommended\n");
651 /*
652 * Check feature flags regardless of the revision level, since we
653 * previously didn't change the revision level when setting the flags,
654 * so there is a chance incompat flags are set on a rev 0 filesystem.
655 */
656 features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
657 if (features) {
658 printk("EXT2-fs: %s: couldn't mount because of "
659 "unsupported optional features (%x).\n",
660 sb->s_id, le32_to_cpu(features));
661 goto failed_mount;
662 }
663 if (!(sb->s_flags & MS_RDONLY) &&
664 (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
665 printk("EXT2-fs: %s: couldn't mount RDWR because of "
666 "unsupported optional features (%x).\n",
667 sb->s_id, le32_to_cpu(features));
668 goto failed_mount;
669 }
670
671 blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
672
673 /* If the blocksize doesn't match, re-read the thing.. */
674 if (sb->s_blocksize != blocksize) {
675 brelse(bh);
676
677 if (!sb_set_blocksize(sb, blocksize)) {
678 printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n");
679 goto failed_sbi;
680 }
681
682 logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
683 offset = (sb_block*BLOCK_SIZE) % blocksize;
684 bh = sb_bread(sb, logic_sb_block);
685 if(!bh) {
686 printk("EXT2-fs: Couldn't read superblock on "
687 "2nd try.\n");
688 goto failed_sbi;
689 }
690 es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
691 sbi->s_es = es;
692 if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
693 printk ("EXT2-fs: Magic mismatch, very weird !\n");
694 goto failed_mount;
695 }
696 }
697
698 sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
699
700 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
701 sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
702 sbi->s_first_ino = EXT2_GOOD_OLD_FIRST_INO;
703 } else {
704 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
705 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
706 if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
707 (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
708 (sbi->s_inode_size > blocksize)) {
709 printk ("EXT2-fs: unsupported inode size: %d\n",
710 sbi->s_inode_size);
711 goto failed_mount;
712 }
713 }
714
715 sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
716 le32_to_cpu(es->s_log_frag_size);
717 if (sbi->s_frag_size == 0)
718 goto cantfind_ext2;
719 sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
720
721 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
722 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
723 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
724
725 if (EXT2_INODE_SIZE(sb) == 0)
726 goto cantfind_ext2;
727 sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
728 if (sbi->s_inodes_per_block == 0)
729 goto cantfind_ext2;
730 sbi->s_itb_per_group = sbi->s_inodes_per_group /
731 sbi->s_inodes_per_block;
732 sbi->s_desc_per_block = sb->s_blocksize /
733 sizeof (struct ext2_group_desc);
734 sbi->s_sbh = bh;
735 sbi->s_mount_state = le16_to_cpu(es->s_state);
736 sbi->s_addr_per_block_bits =
737 log2 (EXT2_ADDR_PER_BLOCK(sb));
738 sbi->s_desc_per_block_bits =
739 log2 (EXT2_DESC_PER_BLOCK(sb));
740
741 if (sb->s_magic != EXT2_SUPER_MAGIC)
742 goto cantfind_ext2;
743
744 if (sb->s_blocksize != bh->b_size) {
745 if (!silent)
746 printk ("VFS: Unsupported blocksize on dev "
747 "%s.\n", sb->s_id);
748 goto failed_mount;
749 }
750
751 if (sb->s_blocksize != sbi->s_frag_size) {
752 printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n",
753 sbi->s_frag_size, sb->s_blocksize);
754 goto failed_mount;
755 }
756
757 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
758 printk ("EXT2-fs: #blocks per group too big: %lu\n",
759 sbi->s_blocks_per_group);
760 goto failed_mount;
761 }
762 if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
763 printk ("EXT2-fs: #fragments per group too big: %lu\n",
764 sbi->s_frags_per_group);
765 goto failed_mount;
766 }
767 if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
768 printk ("EXT2-fs: #inodes per group too big: %lu\n",
769 sbi->s_inodes_per_group);
770 goto failed_mount;
771 }
772
773 if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
774 goto cantfind_ext2;
775 sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
776 le32_to_cpu(es->s_first_data_block) +
777 EXT2_BLOCKS_PER_GROUP(sb) - 1) /
778 EXT2_BLOCKS_PER_GROUP(sb);
779 db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
780 EXT2_DESC_PER_BLOCK(sb);
781 sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
782 if (sbi->s_group_desc == NULL) {
783 printk ("EXT2-fs: not enough memory\n");
784 goto failed_mount;
785 }
786 percpu_counter_init(&sbi->s_freeblocks_counter);
787 percpu_counter_init(&sbi->s_freeinodes_counter);
788 percpu_counter_init(&sbi->s_dirs_counter);
789 bgl_lock_init(&sbi->s_blockgroup_lock);
790 sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
791 GFP_KERNEL);
792 if (!sbi->s_debts) {
793 printk ("EXT2-fs: not enough memory\n");
794 goto failed_mount_group_desc;
795 }
796 memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
797 for (i = 0; i < db_count; i++) {
798 block = descriptor_loc(sb, logic_sb_block, i);
799 sbi->s_group_desc[i] = sb_bread(sb, block);
800 if (!sbi->s_group_desc[i]) {
801 for (j = 0; j < i; j++)
802 brelse (sbi->s_group_desc[j]);
803 printk ("EXT2-fs: unable to read group descriptors\n");
804 goto failed_mount_group_desc;
805 }
806 }
807 if (!ext2_check_descriptors (sb)) {
808 printk ("EXT2-fs: group descriptors corrupted!\n");
809 db_count = i;
810 goto failed_mount2;
811 }
812 sbi->s_gdb_count = db_count;
813 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
814 spin_lock_init(&sbi->s_next_gen_lock);
815 /*
816 * set up enough so that it can read an inode
817 */
818 sb->s_op = &ext2_sops;
819 sb->s_export_op = &ext2_export_ops;
820 sb->s_xattr = ext2_xattr_handlers;
821 root = iget(sb, EXT2_ROOT_INO);
822 sb->s_root = d_alloc_root(root);
823 if (!sb->s_root) {
824 iput(root);
825 printk(KERN_ERR "EXT2-fs: get root inode failed\n");
826 goto failed_mount2;
827 }
828 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
829 dput(sb->s_root);
830 sb->s_root = NULL;
831 printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n");
832 goto failed_mount2;
833 }
834 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
835 ext2_warning(sb, __FUNCTION__,
836 "mounting ext3 filesystem as ext2\n");
837 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
838 percpu_counter_mod(&sbi->s_freeblocks_counter,
839 ext2_count_free_blocks(sb));
840 percpu_counter_mod(&sbi->s_freeinodes_counter,
841 ext2_count_free_inodes(sb));
842 percpu_counter_mod(&sbi->s_dirs_counter,
843 ext2_count_dirs(sb));
844 return 0;
845
846cantfind_ext2:
847 if (!silent)
848 printk("VFS: Can't find an ext2 filesystem on dev %s.\n",
849 sb->s_id);
850 goto failed_mount;
851
852failed_mount2:
853 for (i = 0; i < db_count; i++)
854 brelse(sbi->s_group_desc[i]);
855failed_mount_group_desc:
856 kfree(sbi->s_group_desc);
857 kfree(sbi->s_debts);
858failed_mount:
859 brelse(bh);
860failed_sbi:
861 sb->s_fs_info = NULL;
862 kfree(sbi);
863 return -EINVAL;
864}
865
866static void ext2_commit_super (struct super_block * sb,
867 struct ext2_super_block * es)
868{
869 es->s_wtime = cpu_to_le32(get_seconds());
870 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
871 sb->s_dirt = 0;
872}
873
874static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
875{
876 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
877 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
878 es->s_wtime = cpu_to_le32(get_seconds());
879 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
880 sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
881 sb->s_dirt = 0;
882}
883
884/*
885 * In the second extended file system, it is not necessary to
886 * write the super block since we use a mapping of the
887 * disk super block in a buffer.
888 *
889 * However, this function is still used to set the fs valid
890 * flags to 0. We need to set this flag to 0 since the fs
891 * may have been checked while mounted and e2fsck may have
892 * set s_state to EXT2_VALID_FS after some corrections.
893 */
894
895void ext2_write_super (struct super_block * sb)
896{
897 struct ext2_super_block * es;
898 lock_kernel();
899 if (!(sb->s_flags & MS_RDONLY)) {
900 es = EXT2_SB(sb)->s_es;
901
902 if (le16_to_cpu(es->s_state) & EXT2_VALID_FS) {
903 ext2_debug ("setting valid to 0\n");
904 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) &
905 ~EXT2_VALID_FS);
906 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
907 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
908 es->s_mtime = cpu_to_le32(get_seconds());
909 ext2_sync_super(sb, es);
910 } else
911 ext2_commit_super (sb, es);
912 }
913 sb->s_dirt = 0;
914 unlock_kernel();
915}
916
917static int ext2_remount (struct super_block * sb, int * flags, char * data)
918{
919 struct ext2_sb_info * sbi = EXT2_SB(sb);
920 struct ext2_super_block * es;
921
922 /*
923 * Allow the "check" option to be passed as a remount option.
924 */
925 if (!parse_options (data, sbi))
926 return -EINVAL;
927
928 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
929 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
930
931 es = sbi->s_es;
932 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
933 return 0;
934 if (*flags & MS_RDONLY) {
935 if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
936 !(sbi->s_mount_state & EXT2_VALID_FS))
937 return 0;
938 /*
939 * OK, we are remounting a valid rw partition rdonly, so set
940 * the rdonly flag and then mark the partition as valid again.
941 */
942 es->s_state = cpu_to_le16(sbi->s_mount_state);
943 es->s_mtime = cpu_to_le32(get_seconds());
944 } else {
945 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
946 ~EXT2_FEATURE_RO_COMPAT_SUPP);
947 if (ret) {
948 printk("EXT2-fs: %s: couldn't remount RDWR because of "
949 "unsupported optional features (%x).\n",
950 sb->s_id, le32_to_cpu(ret));
951 return -EROFS;
952 }
953 /*
954 * Mounting a RDONLY partition read-write, so reread and
955 * store the current valid flag. (It may have been changed
956 * by e2fsck since we originally mounted the partition.)
957 */
958 sbi->s_mount_state = le16_to_cpu(es->s_state);
959 if (!ext2_setup_super (sb, es, 0))
960 sb->s_flags &= ~MS_RDONLY;
961 }
962 ext2_sync_super(sb, es);
963 return 0;
964}
965
966static int ext2_statfs (struct super_block * sb, struct kstatfs * buf)
967{
968 struct ext2_sb_info *sbi = EXT2_SB(sb);
969 unsigned long overhead;
970 int i;
971
972 if (test_opt (sb, MINIX_DF))
973 overhead = 0;
974 else {
975 /*
976 * Compute the overhead (FS structures)
977 */
978
979 /*
980 * All of the blocks before first_data_block are
981 * overhead
982 */
983 overhead = le32_to_cpu(sbi->s_es->s_first_data_block);
984
985 /*
986 * Add the overhead attributed to the superblock and
987 * block group descriptors. If the sparse superblocks
988 * feature is turned on, then not all groups have this.
989 */
990 for (i = 0; i < sbi->s_groups_count; i++)
991 overhead += ext2_bg_has_super(sb, i) +
992 ext2_bg_num_gdb(sb, i);
993
994 /*
995 * Every block group has an inode bitmap, a block
996 * bitmap, and an inode table.
997 */
998 overhead += (sbi->s_groups_count *
999 (2 + sbi->s_itb_per_group));
1000 }
1001
1002 buf->f_type = EXT2_SUPER_MAGIC;
1003 buf->f_bsize = sb->s_blocksize;
1004 buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead;
1005 buf->f_bfree = ext2_count_free_blocks(sb);
1006 buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count);
1007 if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count))
1008 buf->f_bavail = 0;
1009 buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
1010 buf->f_ffree = ext2_count_free_inodes (sb);
1011 buf->f_namelen = EXT2_NAME_LEN;
1012 return 0;
1013}
1014
1015static struct super_block *ext2_get_sb(struct file_system_type *fs_type,
1016 int flags, const char *dev_name, void *data)
1017{
1018 return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
1019}
1020
1021#ifdef CONFIG_QUOTA
1022
1023/* Read data from quotafile - avoid pagecache and such because we cannot afford
1024 * acquiring the locks... As quota files are never truncated and quota code
1025 * itself serializes the operations (and noone else should touch the files)
1026 * we don't have to be afraid of races */
1027static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
1028 size_t len, loff_t off)
1029{
1030 struct inode *inode = sb_dqopt(sb)->files[type];
1031 sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
1032 int err = 0;
1033 int offset = off & (sb->s_blocksize - 1);
1034 int tocopy;
1035 size_t toread;
1036 struct buffer_head tmp_bh;
1037 struct buffer_head *bh;
1038 loff_t i_size = i_size_read(inode);
1039
1040 if (off > i_size)
1041 return 0;
1042 if (off+len > i_size)
1043 len = i_size-off;
1044 toread = len;
1045 while (toread > 0) {
1046 tocopy = sb->s_blocksize - offset < toread ?
1047 sb->s_blocksize - offset : toread;
1048
1049 tmp_bh.b_state = 0;
1050 err = ext2_get_block(inode, blk, &tmp_bh, 0);
1051 if (err)
1052 return err;
1053 if (!buffer_mapped(&tmp_bh)) /* A hole? */
1054 memset(data, 0, tocopy);
1055 else {
1056 bh = sb_bread(sb, tmp_bh.b_blocknr);
1057 if (!bh)
1058 return -EIO;
1059 memcpy(data, bh->b_data+offset, tocopy);
1060 brelse(bh);
1061 }
1062 offset = 0;
1063 toread -= tocopy;
1064 data += tocopy;
1065 blk++;
1066 }
1067 return len;
1068}
1069
1070/* Write to quotafile */
1071static ssize_t ext2_quota_write(struct super_block *sb, int type,
1072 const char *data, size_t len, loff_t off)
1073{
1074 struct inode *inode = sb_dqopt(sb)->files[type];
1075 sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
1076 int err = 0;
1077 int offset = off & (sb->s_blocksize - 1);
1078 int tocopy;
1079 size_t towrite = len;
1080 struct buffer_head tmp_bh;
1081 struct buffer_head *bh;
1082
1083 down(&inode->i_sem);
1084 while (towrite > 0) {
1085 tocopy = sb->s_blocksize - offset < towrite ?
1086 sb->s_blocksize - offset : towrite;
1087
1088 tmp_bh.b_state = 0;
1089 err = ext2_get_block(inode, blk, &tmp_bh, 1);
1090 if (err)
1091 goto out;
1092 if (offset || tocopy != EXT2_BLOCK_SIZE(sb))
1093 bh = sb_bread(sb, tmp_bh.b_blocknr);
1094 else
1095 bh = sb_getblk(sb, tmp_bh.b_blocknr);
1096 if (!bh) {
1097 err = -EIO;
1098 goto out;
1099 }
1100 lock_buffer(bh);
1101 memcpy(bh->b_data+offset, data, tocopy);
1102 flush_dcache_page(bh->b_page);
1103 set_buffer_uptodate(bh);
1104 mark_buffer_dirty(bh);
1105 unlock_buffer(bh);
1106 brelse(bh);
1107 offset = 0;
1108 towrite -= tocopy;
1109 data += tocopy;
1110 blk++;
1111 }
1112out:
1113 if (len == towrite)
1114 return err;
1115 if (inode->i_size < off+len-towrite)
1116 i_size_write(inode, off+len-towrite);
1117 inode->i_version++;
1118 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1119 mark_inode_dirty(inode);
1120 up(&inode->i_sem);
1121 return len - towrite;
1122}
1123
1124#endif
1125
1126static struct file_system_type ext2_fs_type = {
1127 .owner = THIS_MODULE,
1128 .name = "ext2",
1129 .get_sb = ext2_get_sb,
1130 .kill_sb = kill_block_super,
1131 .fs_flags = FS_REQUIRES_DEV,
1132};
1133
1134static int __init init_ext2_fs(void)
1135{
1136 int err = init_ext2_xattr();
1137 if (err)
1138 return err;
1139 err = init_inodecache();
1140 if (err)
1141 goto out1;
1142 err = register_filesystem(&ext2_fs_type);
1143 if (err)
1144 goto out;
1145 return 0;
1146out:
1147 destroy_inodecache();
1148out1:
1149 exit_ext2_xattr();
1150 return err;
1151}
1152
1153static void __exit exit_ext2_fs(void)
1154{
1155 unregister_filesystem(&ext2_fs_type);
1156 destroy_inodecache();
1157 exit_ext2_xattr();
1158}
1159
1160module_init(init_ext2_fs)
1161module_exit(exit_ext2_fs)
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
new file mode 100644
index 000000000000..9f7bac01d557
--- /dev/null
+++ b/fs/ext2/symlink.c
@@ -0,0 +1,52 @@
1/*
2 * linux/fs/ext2/symlink.c
3 *
4 * Only fast symlinks left here - the rest is done by generic code. AV, 1999
5 *
6 * Copyright (C) 1992, 1993, 1994, 1995
7 * Remy Card (card@masi.ibp.fr)
8 * Laboratoire MASI - Institut Blaise Pascal
9 * Universite Pierre et Marie Curie (Paris VI)
10 *
11 * from
12 *
13 * linux/fs/minix/symlink.c
14 *
15 * Copyright (C) 1991, 1992 Linus Torvalds
16 *
17 * ext2 symlink handling code
18 */
19
20#include "ext2.h"
21#include "xattr.h"
22#include <linux/namei.h>
23
24static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
25{
26 struct ext2_inode_info *ei = EXT2_I(dentry->d_inode);
27 nd_set_link(nd, (char *)ei->i_data);
28 return 0;
29}
30
31struct inode_operations ext2_symlink_inode_operations = {
32 .readlink = generic_readlink,
33 .follow_link = page_follow_link_light,
34 .put_link = page_put_link,
35#ifdef CONFIG_EXT2_FS_XATTR
36 .setxattr = generic_setxattr,
37 .getxattr = generic_getxattr,
38 .listxattr = ext2_listxattr,
39 .removexattr = generic_removexattr,
40#endif
41};
42
43struct inode_operations ext2_fast_symlink_inode_operations = {
44 .readlink = generic_readlink,
45 .follow_link = ext2_follow_link,
46#ifdef CONFIG_EXT2_FS_XATTR
47 .setxattr = generic_setxattr,
48 .getxattr = generic_getxattr,
49 .listxattr = ext2_listxattr,
50 .removexattr = generic_removexattr,
51#endif
52};
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
new file mode 100644
index 000000000000..27982b500e84
--- /dev/null
+++ b/fs/ext2/xattr.c
@@ -0,0 +1,1043 @@
1/*
2 * linux/fs/ext2/xattr.c
3 *
4 * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
5 *
6 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7 * Extended attributes for symlinks and special files added per
8 * suggestion of Luka Renko <luka.renko@hermes.si>.
9 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
10 * Red Hat Inc.
11 *
12 */
13
14/*
15 * Extended attributes are stored on disk blocks allocated outside of
16 * any inode. The i_file_acl field is then made to point to this allocated
17 * block. If all extended attributes of an inode are identical, these
18 * inodes may share the same extended attribute block. Such situations
19 * are automatically detected by keeping a cache of recent attribute block
20 * numbers and hashes over the block's contents in memory.
21 *
22 *
23 * Extended attribute block layout:
24 *
25 * +------------------+
26 * | header |
27 * | entry 1 | |
28 * | entry 2 | | growing downwards
29 * | entry 3 | v
30 * | four null bytes |
31 * | . . . |
32 * | value 1 | ^
33 * | value 3 | | growing upwards
34 * | value 2 | |
35 * +------------------+
36 *
37 * The block header is followed by multiple entry descriptors. These entry
38 * descriptors are variable in size, and alligned to EXT2_XATTR_PAD
39 * byte boundaries. The entry descriptors are sorted by attribute name,
40 * so that two extended attribute blocks can be compared efficiently.
41 *
42 * Attribute values are aligned to the end of the block, stored in
43 * no specific order. They are also padded to EXT2_XATTR_PAD byte
44 * boundaries. No additional gaps are left between them.
45 *
46 * Locking strategy
47 * ----------------
48 * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
49 * EA blocks are only changed if they are exclusive to an inode, so
50 * holding xattr_sem also means that nothing but the EA block's reference
51 * count will change. Multiple writers to an EA block are synchronized
52 * by the bh lock. No more than a single bh lock is held at any time
53 * to avoid deadlocks.
54 */
55
56#include <linux/buffer_head.h>
57#include <linux/module.h>
58#include <linux/init.h>
59#include <linux/slab.h>
60#include <linux/mbcache.h>
61#include <linux/quotaops.h>
62#include <linux/rwsem.h>
63#include "ext2.h"
64#include "xattr.h"
65#include "acl.h"
66
67#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data))
68#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr))
69#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
70#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
71
72#ifdef EXT2_XATTR_DEBUG
73# define ea_idebug(inode, f...) do { \
74 printk(KERN_DEBUG "inode %s:%ld: ", \
75 inode->i_sb->s_id, inode->i_ino); \
76 printk(f); \
77 printk("\n"); \
78 } while (0)
79# define ea_bdebug(bh, f...) do { \
80 char b[BDEVNAME_SIZE]; \
81 printk(KERN_DEBUG "block %s:%lu: ", \
82 bdevname(bh->b_bdev, b), \
83 (unsigned long) bh->b_blocknr); \
84 printk(f); \
85 printk("\n"); \
86 } while (0)
87#else
88# define ea_idebug(f...)
89# define ea_bdebug(f...)
90#endif
91
92static int ext2_xattr_set2(struct inode *, struct buffer_head *,
93 struct ext2_xattr_header *);
94
95static int ext2_xattr_cache_insert(struct buffer_head *);
96static struct buffer_head *ext2_xattr_cache_find(struct inode *,
97 struct ext2_xattr_header *);
98static void ext2_xattr_rehash(struct ext2_xattr_header *,
99 struct ext2_xattr_entry *);
100
101static struct mb_cache *ext2_xattr_cache;
102
103static struct xattr_handler *ext2_xattr_handler_map[] = {
104 [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
105#ifdef CONFIG_EXT2_FS_POSIX_ACL
106 [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler,
107 [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext2_xattr_acl_default_handler,
108#endif
109 [EXT2_XATTR_INDEX_TRUSTED] = &ext2_xattr_trusted_handler,
110#ifdef CONFIG_EXT2_FS_SECURITY
111 [EXT2_XATTR_INDEX_SECURITY] = &ext2_xattr_security_handler,
112#endif
113};
114
115struct xattr_handler *ext2_xattr_handlers[] = {
116 &ext2_xattr_user_handler,
117 &ext2_xattr_trusted_handler,
118#ifdef CONFIG_EXT2_FS_POSIX_ACL
119 &ext2_xattr_acl_access_handler,
120 &ext2_xattr_acl_default_handler,
121#endif
122#ifdef CONFIG_EXT2_FS_SECURITY
123 &ext2_xattr_security_handler,
124#endif
125 NULL
126};
127
128static inline struct xattr_handler *
129ext2_xattr_handler(int name_index)
130{
131 struct xattr_handler *handler = NULL;
132
133 if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map))
134 handler = ext2_xattr_handler_map[name_index];
135 return handler;
136}
137
138/*
139 * ext2_xattr_get()
140 *
141 * Copy an extended attribute into the buffer
142 * provided, or compute the buffer size required.
143 * Buffer is NULL to compute the size of the buffer required.
144 *
145 * Returns a negative error number on failure, or the number of bytes
146 * used / required on success.
147 */
148int
149ext2_xattr_get(struct inode *inode, int name_index, const char *name,
150 void *buffer, size_t buffer_size)
151{
152 struct buffer_head *bh = NULL;
153 struct ext2_xattr_entry *entry;
154 size_t name_len, size;
155 char *end;
156 int error;
157
158 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
159 name_index, name, buffer, (long)buffer_size);
160
161 if (name == NULL)
162 return -EINVAL;
163 down_read(&EXT2_I(inode)->xattr_sem);
164 error = -ENODATA;
165 if (!EXT2_I(inode)->i_file_acl)
166 goto cleanup;
167 ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
168 bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
169 error = -EIO;
170 if (!bh)
171 goto cleanup;
172 ea_bdebug(bh, "b_count=%d, refcount=%d",
173 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
174 end = bh->b_data + bh->b_size;
175 if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
176 HDR(bh)->h_blocks != cpu_to_le32(1)) {
177bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
178 "inode %ld: bad block %d", inode->i_ino,
179 EXT2_I(inode)->i_file_acl);
180 error = -EIO;
181 goto cleanup;
182 }
183 /* find named attribute */
184 name_len = strlen(name);
185
186 error = -ERANGE;
187 if (name_len > 255)
188 goto cleanup;
189 entry = FIRST_ENTRY(bh);
190 while (!IS_LAST_ENTRY(entry)) {
191 struct ext2_xattr_entry *next =
192 EXT2_XATTR_NEXT(entry);
193 if ((char *)next >= end)
194 goto bad_block;
195 if (name_index == entry->e_name_index &&
196 name_len == entry->e_name_len &&
197 memcmp(name, entry->e_name, name_len) == 0)
198 goto found;
199 entry = next;
200 }
201 /* Check the remaining name entries */
202 while (!IS_LAST_ENTRY(entry)) {
203 struct ext2_xattr_entry *next =
204 EXT2_XATTR_NEXT(entry);
205 if ((char *)next >= end)
206 goto bad_block;
207 entry = next;
208 }
209 if (ext2_xattr_cache_insert(bh))
210 ea_idebug(inode, "cache insert failed");
211 error = -ENODATA;
212 goto cleanup;
213found:
214 /* check the buffer size */
215 if (entry->e_value_block != 0)
216 goto bad_block;
217 size = le32_to_cpu(entry->e_value_size);
218 if (size > inode->i_sb->s_blocksize ||
219 le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
220 goto bad_block;
221
222 if (ext2_xattr_cache_insert(bh))
223 ea_idebug(inode, "cache insert failed");
224 if (buffer) {
225 error = -ERANGE;
226 if (size > buffer_size)
227 goto cleanup;
228 /* return value of attribute */
229 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
230 size);
231 }
232 error = size;
233
234cleanup:
235 brelse(bh);
236 up_read(&EXT2_I(inode)->xattr_sem);
237
238 return error;
239}
240
241/*
242 * ext2_xattr_list()
243 *
244 * Copy a list of attribute names into the buffer
245 * provided, or compute the buffer size required.
246 * Buffer is NULL to compute the size of the buffer required.
247 *
248 * Returns a negative error number on failure, or the number of bytes
249 * used / required on success.
250 */
251static int
252ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
253{
254 struct buffer_head *bh = NULL;
255 struct ext2_xattr_entry *entry;
256 char *end;
257 size_t rest = buffer_size;
258 int error;
259
260 ea_idebug(inode, "buffer=%p, buffer_size=%ld",
261 buffer, (long)buffer_size);
262
263 down_read(&EXT2_I(inode)->xattr_sem);
264 error = 0;
265 if (!EXT2_I(inode)->i_file_acl)
266 goto cleanup;
267 ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
268 bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
269 error = -EIO;
270 if (!bh)
271 goto cleanup;
272 ea_bdebug(bh, "b_count=%d, refcount=%d",
273 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
274 end = bh->b_data + bh->b_size;
275 if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
276 HDR(bh)->h_blocks != cpu_to_le32(1)) {
277bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
278 "inode %ld: bad block %d", inode->i_ino,
279 EXT2_I(inode)->i_file_acl);
280 error = -EIO;
281 goto cleanup;
282 }
283
284 /* check the on-disk data structure */
285 entry = FIRST_ENTRY(bh);
286 while (!IS_LAST_ENTRY(entry)) {
287 struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry);
288
289 if ((char *)next >= end)
290 goto bad_block;
291 entry = next;
292 }
293 if (ext2_xattr_cache_insert(bh))
294 ea_idebug(inode, "cache insert failed");
295
296 /* list the attribute names */
297 for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
298 entry = EXT2_XATTR_NEXT(entry)) {
299 struct xattr_handler *handler =
300 ext2_xattr_handler(entry->e_name_index);
301
302 if (handler) {
303 size_t size = handler->list(inode, buffer, rest,
304 entry->e_name,
305 entry->e_name_len);
306 if (buffer) {
307 if (size > rest) {
308 error = -ERANGE;
309 goto cleanup;
310 }
311 buffer += size;
312 }
313 rest -= size;
314 }
315 }
316 error = buffer_size - rest; /* total size */
317
318cleanup:
319 brelse(bh);
320 up_read(&EXT2_I(inode)->xattr_sem);
321
322 return error;
323}
324
325/*
326 * Inode operation listxattr()
327 *
328 * dentry->d_inode->i_sem: don't care
329 */
330ssize_t
331ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
332{
333 return ext2_xattr_list(dentry->d_inode, buffer, size);
334}
335
336/*
337 * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is
338 * not set, set it.
339 */
340static void ext2_xattr_update_super_block(struct super_block *sb)
341{
342 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
343 return;
344
345 lock_super(sb);
346 EXT2_SB(sb)->s_es->s_feature_compat |=
347 cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR);
348 sb->s_dirt = 1;
349 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
350 unlock_super(sb);
351}
352
353/*
354 * ext2_xattr_set()
355 *
356 * Create, replace or remove an extended attribute for this inode. Buffer
357 * is NULL to remove an existing extended attribute, and non-NULL to
358 * either replace an existing extended attribute, or create a new extended
359 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
360 * specify that an extended attribute must exist and must not exist
361 * previous to the call, respectively.
362 *
363 * Returns 0, or a negative error number on failure.
364 */
365int
366ext2_xattr_set(struct inode *inode, int name_index, const char *name,
367 const void *value, size_t value_len, int flags)
368{
369 struct super_block *sb = inode->i_sb;
370 struct buffer_head *bh = NULL;
371 struct ext2_xattr_header *header = NULL;
372 struct ext2_xattr_entry *here, *last;
373 size_t name_len, free, min_offs = sb->s_blocksize;
374 int not_found = 1, error;
375 char *end;
376
377 /*
378 * header -- Points either into bh, or to a temporarily
379 * allocated buffer.
380 * here -- The named entry found, or the place for inserting, within
381 * the block pointed to by header.
382 * last -- Points right after the last named entry within the block
383 * pointed to by header.
384 * min_offs -- The offset of the first value (values are aligned
385 * towards the end of the block).
386 * end -- Points right after the block pointed to by header.
387 */
388
389 ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
390 name_index, name, value, (long)value_len);
391
392 if (IS_RDONLY(inode))
393 return -EROFS;
394 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
395 return -EPERM;
396 if (value == NULL)
397 value_len = 0;
398 if (name == NULL)
399 return -EINVAL;
400 name_len = strlen(name);
401 if (name_len > 255 || value_len > sb->s_blocksize)
402 return -ERANGE;
403 down_write(&EXT2_I(inode)->xattr_sem);
404 if (EXT2_I(inode)->i_file_acl) {
405 /* The inode already has an extended attribute block. */
406 bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
407 error = -EIO;
408 if (!bh)
409 goto cleanup;
410 ea_bdebug(bh, "b_count=%d, refcount=%d",
411 atomic_read(&(bh->b_count)),
412 le32_to_cpu(HDR(bh)->h_refcount));
413 header = HDR(bh);
414 end = bh->b_data + bh->b_size;
415 if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
416 header->h_blocks != cpu_to_le32(1)) {
417bad_block: ext2_error(sb, "ext2_xattr_set",
418 "inode %ld: bad block %d", inode->i_ino,
419 EXT2_I(inode)->i_file_acl);
420 error = -EIO;
421 goto cleanup;
422 }
423 /* Find the named attribute. */
424 here = FIRST_ENTRY(bh);
425 while (!IS_LAST_ENTRY(here)) {
426 struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
427 if ((char *)next >= end)
428 goto bad_block;
429 if (!here->e_value_block && here->e_value_size) {
430 size_t offs = le16_to_cpu(here->e_value_offs);
431 if (offs < min_offs)
432 min_offs = offs;
433 }
434 not_found = name_index - here->e_name_index;
435 if (!not_found)
436 not_found = name_len - here->e_name_len;
437 if (!not_found)
438 not_found = memcmp(name, here->e_name,name_len);
439 if (not_found <= 0)
440 break;
441 here = next;
442 }
443 last = here;
444 /* We still need to compute min_offs and last. */
445 while (!IS_LAST_ENTRY(last)) {
446 struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
447 if ((char *)next >= end)
448 goto bad_block;
449 if (!last->e_value_block && last->e_value_size) {
450 size_t offs = le16_to_cpu(last->e_value_offs);
451 if (offs < min_offs)
452 min_offs = offs;
453 }
454 last = next;
455 }
456
457 /* Check whether we have enough space left. */
458 free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
459 } else {
460 /* We will use a new extended attribute block. */
461 free = sb->s_blocksize -
462 sizeof(struct ext2_xattr_header) - sizeof(__u32);
463 here = last = NULL; /* avoid gcc uninitialized warning. */
464 }
465
466 if (not_found) {
467 /* Request to remove a nonexistent attribute? */
468 error = -ENODATA;
469 if (flags & XATTR_REPLACE)
470 goto cleanup;
471 error = 0;
472 if (value == NULL)
473 goto cleanup;
474 } else {
475 /* Request to create an existing attribute? */
476 error = -EEXIST;
477 if (flags & XATTR_CREATE)
478 goto cleanup;
479 if (!here->e_value_block && here->e_value_size) {
480 size_t size = le32_to_cpu(here->e_value_size);
481
482 if (le16_to_cpu(here->e_value_offs) + size >
483 sb->s_blocksize || size > sb->s_blocksize)
484 goto bad_block;
485 free += EXT2_XATTR_SIZE(size);
486 }
487 free += EXT2_XATTR_LEN(name_len);
488 }
489 error = -ENOSPC;
490 if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len))
491 goto cleanup;
492
493 /* Here we know that we can set the new attribute. */
494
495 if (header) {
496 struct mb_cache_entry *ce;
497
498 /* assert(header == HDR(bh)); */
499 ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
500 bh->b_blocknr);
501 lock_buffer(bh);
502 if (header->h_refcount == cpu_to_le32(1)) {
503 ea_bdebug(bh, "modifying in-place");
504 if (ce)
505 mb_cache_entry_free(ce);
506 /* keep the buffer locked while modifying it. */
507 } else {
508 int offset;
509
510 if (ce)
511 mb_cache_entry_release(ce);
512 unlock_buffer(bh);
513 ea_bdebug(bh, "cloning");
514 header = kmalloc(bh->b_size, GFP_KERNEL);
515 error = -ENOMEM;
516 if (header == NULL)
517 goto cleanup;
518 memcpy(header, HDR(bh), bh->b_size);
519 header->h_refcount = cpu_to_le32(1);
520
521 offset = (char *)here - bh->b_data;
522 here = ENTRY((char *)header + offset);
523 offset = (char *)last - bh->b_data;
524 last = ENTRY((char *)header + offset);
525 }
526 } else {
527 /* Allocate a buffer where we construct the new block. */
528 header = kmalloc(sb->s_blocksize, GFP_KERNEL);
529 error = -ENOMEM;
530 if (header == NULL)
531 goto cleanup;
532 memset(header, 0, sb->s_blocksize);
533 end = (char *)header + sb->s_blocksize;
534 header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
535 header->h_blocks = header->h_refcount = cpu_to_le32(1);
536 last = here = ENTRY(header+1);
537 }
538
539 /* Iff we are modifying the block in-place, bh is locked here. */
540
541 if (not_found) {
542 /* Insert the new name. */
543 size_t size = EXT2_XATTR_LEN(name_len);
544 size_t rest = (char *)last - (char *)here;
545 memmove((char *)here + size, here, rest);
546 memset(here, 0, size);
547 here->e_name_index = name_index;
548 here->e_name_len = name_len;
549 memcpy(here->e_name, name, name_len);
550 } else {
551 if (!here->e_value_block && here->e_value_size) {
552 char *first_val = (char *)header + min_offs;
553 size_t offs = le16_to_cpu(here->e_value_offs);
554 char *val = (char *)header + offs;
555 size_t size = EXT2_XATTR_SIZE(
556 le32_to_cpu(here->e_value_size));
557
558 if (size == EXT2_XATTR_SIZE(value_len)) {
559 /* The old and the new value have the same
560 size. Just replace. */
561 here->e_value_size = cpu_to_le32(value_len);
562 memset(val + size - EXT2_XATTR_PAD, 0,
563 EXT2_XATTR_PAD); /* Clear pad bytes. */
564 memcpy(val, value, value_len);
565 goto skip_replace;
566 }
567
568 /* Remove the old value. */
569 memmove(first_val + size, first_val, val - first_val);
570 memset(first_val, 0, size);
571 here->e_value_offs = 0;
572 min_offs += size;
573
574 /* Adjust all value offsets. */
575 last = ENTRY(header+1);
576 while (!IS_LAST_ENTRY(last)) {
577 size_t o = le16_to_cpu(last->e_value_offs);
578 if (!last->e_value_block && o < offs)
579 last->e_value_offs =
580 cpu_to_le16(o + size);
581 last = EXT2_XATTR_NEXT(last);
582 }
583 }
584 if (value == NULL) {
585 /* Remove the old name. */
586 size_t size = EXT2_XATTR_LEN(name_len);
587 last = ENTRY((char *)last - size);
588 memmove(here, (char*)here + size,
589 (char*)last - (char*)here);
590 memset(last, 0, size);
591 }
592 }
593
594 if (value != NULL) {
595 /* Insert the new value. */
596 here->e_value_size = cpu_to_le32(value_len);
597 if (value_len) {
598 size_t size = EXT2_XATTR_SIZE(value_len);
599 char *val = (char *)header + min_offs - size;
600 here->e_value_offs =
601 cpu_to_le16((char *)val - (char *)header);
602 memset(val + size - EXT2_XATTR_PAD, 0,
603 EXT2_XATTR_PAD); /* Clear the pad bytes. */
604 memcpy(val, value, value_len);
605 }
606 }
607
608skip_replace:
609 if (IS_LAST_ENTRY(ENTRY(header+1))) {
610 /* This block is now empty. */
611 if (bh && header == HDR(bh))
612 unlock_buffer(bh); /* we were modifying in-place. */
613 error = ext2_xattr_set2(inode, bh, NULL);
614 } else {
615 ext2_xattr_rehash(header, here);
616 if (bh && header == HDR(bh))
617 unlock_buffer(bh); /* we were modifying in-place. */
618 error = ext2_xattr_set2(inode, bh, header);
619 }
620
621cleanup:
622 brelse(bh);
623 if (!(bh && header == HDR(bh)))
624 kfree(header);
625 up_write(&EXT2_I(inode)->xattr_sem);
626
627 return error;
628}
629
630/*
631 * Second half of ext2_xattr_set(): Update the file system.
632 */
633static int
634ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
635 struct ext2_xattr_header *header)
636{
637 struct super_block *sb = inode->i_sb;
638 struct buffer_head *new_bh = NULL;
639 int error;
640
641 if (header) {
642 new_bh = ext2_xattr_cache_find(inode, header);
643 if (new_bh) {
644 /* We found an identical block in the cache. */
645 if (new_bh == old_bh) {
646 ea_bdebug(new_bh, "keeping this block");
647 } else {
648 /* The old block is released after updating
649 the inode. */
650 ea_bdebug(new_bh, "reusing block");
651
652 error = -EDQUOT;
653 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
654 unlock_buffer(new_bh);
655 goto cleanup;
656 }
657 HDR(new_bh)->h_refcount = cpu_to_le32(1 +
658 le32_to_cpu(HDR(new_bh)->h_refcount));
659 ea_bdebug(new_bh, "refcount now=%d",
660 le32_to_cpu(HDR(new_bh)->h_refcount));
661 }
662 unlock_buffer(new_bh);
663 } else if (old_bh && header == HDR(old_bh)) {
664 /* Keep this block. No need to lock the block as we
665 don't need to change the reference count. */
666 new_bh = old_bh;
667 get_bh(new_bh);
668 ext2_xattr_cache_insert(new_bh);
669 } else {
670 /* We need to allocate a new block */
671 int goal = le32_to_cpu(EXT2_SB(sb)->s_es->
672 s_first_data_block) +
673 EXT2_I(inode)->i_block_group *
674 EXT2_BLOCKS_PER_GROUP(sb);
675 int block = ext2_new_block(inode, goal,
676 NULL, NULL, &error);
677 if (error)
678 goto cleanup;
679 ea_idebug(inode, "creating block %d", block);
680
681 new_bh = sb_getblk(sb, block);
682 if (!new_bh) {
683 ext2_free_blocks(inode, block, 1);
684 error = -EIO;
685 goto cleanup;
686 }
687 lock_buffer(new_bh);
688 memcpy(new_bh->b_data, header, new_bh->b_size);
689 set_buffer_uptodate(new_bh);
690 unlock_buffer(new_bh);
691 ext2_xattr_cache_insert(new_bh);
692
693 ext2_xattr_update_super_block(sb);
694 }
695 mark_buffer_dirty(new_bh);
696 if (IS_SYNC(inode)) {
697 sync_dirty_buffer(new_bh);
698 error = -EIO;
699 if (buffer_req(new_bh) && !buffer_uptodate(new_bh))
700 goto cleanup;
701 }
702 }
703
704 /* Update the inode. */
705 EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
706 inode->i_ctime = CURRENT_TIME_SEC;
707 if (IS_SYNC(inode)) {
708 error = ext2_sync_inode (inode);
709 /* In case sync failed due to ENOSPC the inode was actually
710 * written (only some dirty data were not) so we just proceed
711 * as if nothing happened and cleanup the unused block */
712 if (error && error != -ENOSPC) {
713 if (new_bh && new_bh != old_bh)
714 DQUOT_FREE_BLOCK(inode, 1);
715 goto cleanup;
716 }
717 } else
718 mark_inode_dirty(inode);
719
720 error = 0;
721 if (old_bh && old_bh != new_bh) {
722 struct mb_cache_entry *ce;
723
724 /*
725 * If there was an old block and we are no longer using it,
726 * release the old block.
727 */
728 ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
729 old_bh->b_blocknr);
730 lock_buffer(old_bh);
731 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
732 /* Free the old block. */
733 if (ce)
734 mb_cache_entry_free(ce);
735 ea_bdebug(old_bh, "freeing");
736 ext2_free_blocks(inode, old_bh->b_blocknr, 1);
737 /* We let our caller release old_bh, so we
738 * need to duplicate the buffer before. */
739 get_bh(old_bh);
740 bforget(old_bh);
741 } else {
742 /* Decrement the refcount only. */
743 HDR(old_bh)->h_refcount = cpu_to_le32(
744 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
745 if (ce)
746 mb_cache_entry_release(ce);
747 DQUOT_FREE_BLOCK(inode, 1);
748 mark_buffer_dirty(old_bh);
749 ea_bdebug(old_bh, "refcount now=%d",
750 le32_to_cpu(HDR(old_bh)->h_refcount));
751 }
752 unlock_buffer(old_bh);
753 }
754
755cleanup:
756 brelse(new_bh);
757
758 return error;
759}
760
761/*
762 * ext2_xattr_delete_inode()
763 *
764 * Free extended attribute resources associated with this inode. This
765 * is called immediately before an inode is freed.
766 */
767void
768ext2_xattr_delete_inode(struct inode *inode)
769{
770 struct buffer_head *bh = NULL;
771 struct mb_cache_entry *ce;
772
773 down_write(&EXT2_I(inode)->xattr_sem);
774 if (!EXT2_I(inode)->i_file_acl)
775 goto cleanup;
776 bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
777 if (!bh) {
778 ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
779 "inode %ld: block %d read error", inode->i_ino,
780 EXT2_I(inode)->i_file_acl);
781 goto cleanup;
782 }
783 ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
784 if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
785 HDR(bh)->h_blocks != cpu_to_le32(1)) {
786 ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
787 "inode %ld: bad block %d", inode->i_ino,
788 EXT2_I(inode)->i_file_acl);
789 goto cleanup;
790 }
791 ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
792 lock_buffer(bh);
793 if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
794 if (ce)
795 mb_cache_entry_free(ce);
796 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
797 get_bh(bh);
798 bforget(bh);
799 } else {
800 HDR(bh)->h_refcount = cpu_to_le32(
801 le32_to_cpu(HDR(bh)->h_refcount) - 1);
802 if (ce)
803 mb_cache_entry_release(ce);
804 mark_buffer_dirty(bh);
805 if (IS_SYNC(inode))
806 sync_dirty_buffer(bh);
807 DQUOT_FREE_BLOCK(inode, 1);
808 }
809 ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
810 unlock_buffer(bh);
811 EXT2_I(inode)->i_file_acl = 0;
812
813cleanup:
814 brelse(bh);
815 up_write(&EXT2_I(inode)->xattr_sem);
816}
817
818/*
819 * ext2_xattr_put_super()
820 *
821 * This is called when a file system is unmounted.
822 */
823void
824ext2_xattr_put_super(struct super_block *sb)
825{
826 mb_cache_shrink(ext2_xattr_cache, sb->s_bdev);
827}
828
829
830/*
831 * ext2_xattr_cache_insert()
832 *
833 * Create a new entry in the extended attribute cache, and insert
834 * it unless such an entry is already in the cache.
835 *
836 * Returns 0, or a negative error number on failure.
837 */
838static int
839ext2_xattr_cache_insert(struct buffer_head *bh)
840{
841 __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
842 struct mb_cache_entry *ce;
843 int error;
844
845 ce = mb_cache_entry_alloc(ext2_xattr_cache);
846 if (!ce)
847 return -ENOMEM;
848 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
849 if (error) {
850 mb_cache_entry_free(ce);
851 if (error == -EBUSY) {
852 ea_bdebug(bh, "already in cache (%d cache entries)",
853 atomic_read(&ext2_xattr_cache->c_entry_count));
854 error = 0;
855 }
856 } else {
857 ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
858 atomic_read(&ext2_xattr_cache->c_entry_count));
859 mb_cache_entry_release(ce);
860 }
861 return error;
862}
863
864/*
865 * ext2_xattr_cmp()
866 *
867 * Compare two extended attribute blocks for equality.
868 *
869 * Returns 0 if the blocks are equal, 1 if they differ, and
870 * a negative error number on errors.
871 */
872static int
873ext2_xattr_cmp(struct ext2_xattr_header *header1,
874 struct ext2_xattr_header *header2)
875{
876 struct ext2_xattr_entry *entry1, *entry2;
877
878 entry1 = ENTRY(header1+1);
879 entry2 = ENTRY(header2+1);
880 while (!IS_LAST_ENTRY(entry1)) {
881 if (IS_LAST_ENTRY(entry2))
882 return 1;
883 if (entry1->e_hash != entry2->e_hash ||
884 entry1->e_name_index != entry2->e_name_index ||
885 entry1->e_name_len != entry2->e_name_len ||
886 entry1->e_value_size != entry2->e_value_size ||
887 memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
888 return 1;
889 if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
890 return -EIO;
891 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
892 (char *)header2 + le16_to_cpu(entry2->e_value_offs),
893 le32_to_cpu(entry1->e_value_size)))
894 return 1;
895
896 entry1 = EXT2_XATTR_NEXT(entry1);
897 entry2 = EXT2_XATTR_NEXT(entry2);
898 }
899 if (!IS_LAST_ENTRY(entry2))
900 return 1;
901 return 0;
902}
903
904/*
905 * ext2_xattr_cache_find()
906 *
907 * Find an identical extended attribute block.
908 *
909 * Returns a locked buffer head to the block found, or NULL if such
910 * a block was not found or an error occurred.
911 */
912static struct buffer_head *
913ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
914{
915 __u32 hash = le32_to_cpu(header->h_hash);
916 struct mb_cache_entry *ce;
917
918 if (!header->h_hash)
919 return NULL; /* never share */
920 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
921again:
922 ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
923 inode->i_sb->s_bdev, hash);
924 while (ce) {
925 struct buffer_head *bh;
926
927 if (IS_ERR(ce)) {
928 if (PTR_ERR(ce) == -EAGAIN)
929 goto again;
930 break;
931 }
932
933 bh = sb_bread(inode->i_sb, ce->e_block);
934 if (!bh) {
935 ext2_error(inode->i_sb, "ext2_xattr_cache_find",
936 "inode %ld: block %ld read error",
937 inode->i_ino, (unsigned long) ce->e_block);
938 } else {
939 lock_buffer(bh);
940 if (le32_to_cpu(HDR(bh)->h_refcount) >
941 EXT2_XATTR_REFCOUNT_MAX) {
942 ea_idebug(inode, "block %ld refcount %d>%d",
943 (unsigned long) ce->e_block,
944 le32_to_cpu(HDR(bh)->h_refcount),
945 EXT2_XATTR_REFCOUNT_MAX);
946 } else if (!ext2_xattr_cmp(header, HDR(bh))) {
947 ea_bdebug(bh, "b_count=%d",
948 atomic_read(&(bh->b_count)));
949 mb_cache_entry_release(ce);
950 return bh;
951 }
952 unlock_buffer(bh);
953 brelse(bh);
954 }
955 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
956 }
957 return NULL;
958}
959
960#define NAME_HASH_SHIFT 5
961#define VALUE_HASH_SHIFT 16
962
963/*
964 * ext2_xattr_hash_entry()
965 *
966 * Compute the hash of an extended attribute.
967 */
968static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header,
969 struct ext2_xattr_entry *entry)
970{
971 __u32 hash = 0;
972 char *name = entry->e_name;
973 int n;
974
975 for (n=0; n < entry->e_name_len; n++) {
976 hash = (hash << NAME_HASH_SHIFT) ^
977 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
978 *name++;
979 }
980
981 if (entry->e_value_block == 0 && entry->e_value_size != 0) {
982 __le32 *value = (__le32 *)((char *)header +
983 le16_to_cpu(entry->e_value_offs));
984 for (n = (le32_to_cpu(entry->e_value_size) +
985 EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
986 hash = (hash << VALUE_HASH_SHIFT) ^
987 (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
988 le32_to_cpu(*value++);
989 }
990 }
991 entry->e_hash = cpu_to_le32(hash);
992}
993
994#undef NAME_HASH_SHIFT
995#undef VALUE_HASH_SHIFT
996
997#define BLOCK_HASH_SHIFT 16
998
999/*
1000 * ext2_xattr_rehash()
1001 *
1002 * Re-compute the extended attribute hash value after an entry has changed.
1003 */
1004static void ext2_xattr_rehash(struct ext2_xattr_header *header,
1005 struct ext2_xattr_entry *entry)
1006{
1007 struct ext2_xattr_entry *here;
1008 __u32 hash = 0;
1009
1010 ext2_xattr_hash_entry(header, entry);
1011 here = ENTRY(header+1);
1012 while (!IS_LAST_ENTRY(here)) {
1013 if (!here->e_hash) {
1014 /* Block is not shared if an entry's hash value == 0 */
1015 hash = 0;
1016 break;
1017 }
1018 hash = (hash << BLOCK_HASH_SHIFT) ^
1019 (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1020 le32_to_cpu(here->e_hash);
1021 here = EXT2_XATTR_NEXT(here);
1022 }
1023 header->h_hash = cpu_to_le32(hash);
1024}
1025
1026#undef BLOCK_HASH_SHIFT
1027
1028int __init
1029init_ext2_xattr(void)
1030{
1031 ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
1032 sizeof(struct mb_cache_entry) +
1033 sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
1034 if (!ext2_xattr_cache)
1035 return -ENOMEM;
1036 return 0;
1037}
1038
1039void
1040exit_ext2_xattr(void)
1041{
1042 mb_cache_destroy(ext2_xattr_cache);
1043}
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
new file mode 100644
index 000000000000..5f3bfde3b810
--- /dev/null
+++ b/fs/ext2/xattr.h
@@ -0,0 +1,118 @@
1/*
2 File: linux/ext2_xattr.h
3
4 On-disk format of extended attributes for the ext2 filesystem.
5
6 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
7*/
8
9#include <linux/config.h>
10#include <linux/init.h>
11#include <linux/xattr.h>
12
13/* Magic value in attribute blocks */
14#define EXT2_XATTR_MAGIC 0xEA020000
15
16/* Maximum number of references to one attribute block */
17#define EXT2_XATTR_REFCOUNT_MAX 1024
18
19/* Name indexes */
20#define EXT2_XATTR_INDEX_USER 1
21#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2
22#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3
23#define EXT2_XATTR_INDEX_TRUSTED 4
24#define EXT2_XATTR_INDEX_LUSTRE 5
25#define EXT2_XATTR_INDEX_SECURITY 6
26
27struct ext2_xattr_header {
28 __le32 h_magic; /* magic number for identification */
29 __le32 h_refcount; /* reference count */
30 __le32 h_blocks; /* number of disk blocks used */
31 __le32 h_hash; /* hash value of all attributes */
32 __u32 h_reserved[4]; /* zero right now */
33};
34
35struct ext2_xattr_entry {
36 __u8 e_name_len; /* length of name */
37 __u8 e_name_index; /* attribute name index */
38 __le16 e_value_offs; /* offset in disk block of value */
39 __le32 e_value_block; /* disk block attribute is stored on (n/i) */
40 __le32 e_value_size; /* size of attribute value */
41 __le32 e_hash; /* hash value of name and value */
42 char e_name[0]; /* attribute name */
43};
44
45#define EXT2_XATTR_PAD_BITS 2
46#define EXT2_XATTR_PAD (1<<EXT2_XATTR_PAD_BITS)
47#define EXT2_XATTR_ROUND (EXT2_XATTR_PAD-1)
48#define EXT2_XATTR_LEN(name_len) \
49 (((name_len) + EXT2_XATTR_ROUND + \
50 sizeof(struct ext2_xattr_entry)) & ~EXT2_XATTR_ROUND)
51#define EXT2_XATTR_NEXT(entry) \
52 ( (struct ext2_xattr_entry *)( \
53 (char *)(entry) + EXT2_XATTR_LEN((entry)->e_name_len)) )
54#define EXT2_XATTR_SIZE(size) \
55 (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
56
57# ifdef CONFIG_EXT2_FS_XATTR
58
59extern struct xattr_handler ext2_xattr_user_handler;
60extern struct xattr_handler ext2_xattr_trusted_handler;
61extern struct xattr_handler ext2_xattr_acl_access_handler;
62extern struct xattr_handler ext2_xattr_acl_default_handler;
63extern struct xattr_handler ext2_xattr_security_handler;
64
65extern ssize_t ext2_listxattr(struct dentry *, char *, size_t);
66
67extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
68extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
69
70extern void ext2_xattr_delete_inode(struct inode *);
71extern void ext2_xattr_put_super(struct super_block *);
72
73extern int init_ext2_xattr(void);
74extern void exit_ext2_xattr(void);
75
76extern struct xattr_handler *ext2_xattr_handlers[];
77
78# else /* CONFIG_EXT2_FS_XATTR */
79
80static inline int
81ext2_xattr_get(struct inode *inode, int name_index,
82 const char *name, void *buffer, size_t size)
83{
84 return -EOPNOTSUPP;
85}
86
87static inline int
88ext2_xattr_set(struct inode *inode, int name_index, const char *name,
89 const void *value, size_t size, int flags)
90{
91 return -EOPNOTSUPP;
92}
93
94static inline void
95ext2_xattr_delete_inode(struct inode *inode)
96{
97}
98
99static inline void
100ext2_xattr_put_super(struct super_block *sb)
101{
102}
103
104static inline int
105init_ext2_xattr(void)
106{
107 return 0;
108}
109
110static inline void
111exit_ext2_xattr(void)
112{
113}
114
115#define ext2_xattr_handlers NULL
116
117# endif /* CONFIG_EXT2_FS_XATTR */
118
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
new file mode 100644
index 000000000000..6a6c59fbe599
--- /dev/null
+++ b/fs/ext2/xattr_security.c
@@ -0,0 +1,53 @@
1/*
2 * linux/fs/ext2/xattr_security.c
3 * Handler for storing security labels as extended attributes.
4 */
5
6#include <linux/module.h>
7#include <linux/string.h>
8#include <linux/fs.h>
9#include <linux/smp_lock.h>
10#include <linux/ext2_fs.h>
11#include "xattr.h"
12
13static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
15 const char *name, size_t name_len)
16{
17 const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
18 const size_t total_len = prefix_len + name_len + 1;
19
20 if (list && total_len <= list_size) {
21 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
22 memcpy(list+prefix_len, name, name_len);
23 list[prefix_len + name_len] = '\0';
24 }
25 return total_len;
26}
27
28static int
29ext2_xattr_security_get(struct inode *inode, const char *name,
30 void *buffer, size_t size)
31{
32 if (strcmp(name, "") == 0)
33 return -EINVAL;
34 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name,
35 buffer, size);
36}
37
38static int
39ext2_xattr_security_set(struct inode *inode, const char *name,
40 const void *value, size_t size, int flags)
41{
42 if (strcmp(name, "") == 0)
43 return -EINVAL;
44 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name,
45 value, size, flags);
46}
47
48struct xattr_handler ext2_xattr_security_handler = {
49 .prefix = XATTR_SECURITY_PREFIX,
50 .list = ext2_xattr_security_list,
51 .get = ext2_xattr_security_get,
52 .set = ext2_xattr_security_set,
53};
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
new file mode 100644
index 000000000000..52b30ee6a25f
--- /dev/null
+++ b/fs/ext2/xattr_trusted.c
@@ -0,0 +1,64 @@
1/*
2 * linux/fs/ext2/xattr_trusted.c
3 * Handler for trusted extended attributes.
4 *
5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */
7
8#include <linux/module.h>
9#include <linux/string.h>
10#include <linux/fs.h>
11#include <linux/smp_lock.h>
12#include <linux/ext2_fs.h>
13#include "xattr.h"
14
15#define XATTR_TRUSTED_PREFIX "trusted."
16
17static size_t
18ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len)
20{
21 const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
22 const size_t total_len = prefix_len + name_len + 1;
23
24 if (!capable(CAP_SYS_ADMIN))
25 return 0;
26
27 if (list && total_len <= list_size) {
28 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
29 memcpy(list+prefix_len, name, name_len);
30 list[prefix_len + name_len] = '\0';
31 }
32 return total_len;
33}
34
35static int
36ext2_xattr_trusted_get(struct inode *inode, const char *name,
37 void *buffer, size_t size)
38{
39 if (strcmp(name, "") == 0)
40 return -EINVAL;
41 if (!capable(CAP_SYS_ADMIN))
42 return -EPERM;
43 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
44 buffer, size);
45}
46
47static int
48ext2_xattr_trusted_set(struct inode *inode, const char *name,
49 const void *value, size_t size, int flags)
50{
51 if (strcmp(name, "") == 0)
52 return -EINVAL;
53 if (!capable(CAP_SYS_ADMIN))
54 return -EPERM;
55 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
56 value, size, flags);
57}
58
59struct xattr_handler ext2_xattr_trusted_handler = {
60 .prefix = XATTR_TRUSTED_PREFIX,
61 .list = ext2_xattr_trusted_list,
62 .get = ext2_xattr_trusted_get,
63 .set = ext2_xattr_trusted_set,
64};
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
new file mode 100644
index 000000000000..0c03ea131a94
--- /dev/null
+++ b/fs/ext2/xattr_user.c
@@ -0,0 +1,77 @@
1/*
2 * linux/fs/ext2/xattr_user.c
3 * Handler for extended user attributes.
4 *
5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */
7
8#include <linux/init.h>
9#include <linux/module.h>
10#include <linux/string.h>
11#include "ext2.h"
12#include "xattr.h"
13
14#define XATTR_USER_PREFIX "user."
15
16static size_t
17ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
18 const char *name, size_t name_len)
19{
20 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
21 const size_t total_len = prefix_len + name_len + 1;
22
23 if (!test_opt(inode->i_sb, XATTR_USER))
24 return 0;
25
26 if (list && total_len <= list_size) {
27 memcpy(list, XATTR_USER_PREFIX, prefix_len);
28 memcpy(list+prefix_len, name, name_len);
29 list[prefix_len + name_len] = '\0';
30 }
31 return total_len;
32}
33
34static int
35ext2_xattr_user_get(struct inode *inode, const char *name,
36 void *buffer, size_t size)
37{
38 int error;
39
40 if (strcmp(name, "") == 0)
41 return -EINVAL;
42 if (!test_opt(inode->i_sb, XATTR_USER))
43 return -EOPNOTSUPP;
44 error = permission(inode, MAY_READ, NULL);
45 if (error)
46 return error;
47
48 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
49}
50
51static int
52ext2_xattr_user_set(struct inode *inode, const char *name,
53 const void *value, size_t size, int flags)
54{
55 int error;
56
57 if (strcmp(name, "") == 0)
58 return -EINVAL;
59 if (!test_opt(inode->i_sb, XATTR_USER))
60 return -EOPNOTSUPP;
61 if ( !S_ISREG(inode->i_mode) &&
62 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
63 return -EPERM;
64 error = permission(inode, MAY_WRITE, NULL);
65 if (error)
66 return error;
67
68 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
69 value, size, flags);
70}
71
72struct xattr_handler ext2_xattr_user_handler = {
73 .prefix = XATTR_USER_PREFIX,
74 .list = ext2_xattr_user_list,
75 .get = ext2_xattr_user_get,
76 .set = ext2_xattr_user_set,
77};