diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/ext2 |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/ext2')
-rw-r--r-- | fs/ext2/CHANGES | 157 | ||||
-rw-r--r-- | fs/ext2/Makefile | 12 | ||||
-rw-r--r-- | fs/ext2/acl.c | 518 | ||||
-rw-r--r-- | fs/ext2/acl.h | 82 | ||||
-rw-r--r-- | fs/ext2/balloc.c | 699 | ||||
-rw-r--r-- | fs/ext2/bitmap.c | 25 | ||||
-rw-r--r-- | fs/ext2/dir.c | 673 | ||||
-rw-r--r-- | fs/ext2/ext2.h | 160 | ||||
-rw-r--r-- | fs/ext2/file.c | 68 | ||||
-rw-r--r-- | fs/ext2/fsync.c | 51 | ||||
-rw-r--r-- | fs/ext2/ialloc.c | 735 | ||||
-rw-r--r-- | fs/ext2/inode.c | 1276 | ||||
-rw-r--r-- | fs/ext2/ioctl.c | 81 | ||||
-rw-r--r-- | fs/ext2/namei.c | 418 | ||||
-rw-r--r-- | fs/ext2/super.c | 1161 | ||||
-rw-r--r-- | fs/ext2/symlink.c | 52 | ||||
-rw-r--r-- | fs/ext2/xattr.c | 1043 | ||||
-rw-r--r-- | fs/ext2/xattr.h | 118 | ||||
-rw-r--r-- | fs/ext2/xattr_security.c | 53 | ||||
-rw-r--r-- | fs/ext2/xattr_trusted.c | 64 | ||||
-rw-r--r-- | fs/ext2/xattr_user.c | 77 |
21 files changed, 7523 insertions, 0 deletions
diff --git a/fs/ext2/CHANGES b/fs/ext2/CHANGES new file mode 100644 index 000000000000..aa5aaf0e5911 --- /dev/null +++ b/fs/ext2/CHANGES | |||
@@ -0,0 +1,157 @@ | |||
1 | Changes from version 0.5a to version 0.5b | ||
2 | ========================================= | ||
3 | - Now that we have sysctl(), the immutable flag cannot be changed when | ||
4 | the system is running at security level > 0. | ||
5 | - Some cleanups in the code. | ||
6 | - More consistency checks on directories. | ||
7 | - The ext2.diff patch from Tom May <ftom@netcom.com> has been | ||
8 | integrated. This patch replaces expensive "/" and "%" with | ||
9 | cheap ">>" and "&" where possible. | ||
10 | |||
11 | Changes from version 0.5 to version 0.5a | ||
12 | ======================================== | ||
13 | - Zero the partial block following the end of the file when a file | ||
14 | is truncated. | ||
15 | - Dates updated in the copyright. | ||
16 | - More checks when the filesystem is mounted: the count of blocks, | ||
17 | fragments, and inodes per group is checked against the block size. | ||
18 | - The buffers used by the error routines are now static variables, to | ||
19 | avoid using space on the kernel stack, as requested by Linus. | ||
20 | - Some cleanups in the error messages (some versions of syslog contain | ||
21 | a bug which truncates an error message if it contains '\n'). | ||
22 | - Check that no data can be written to a file past the 2GB limit. | ||
23 | - The famous readdir() bug has been fixed by Stephen Tweedie. | ||
24 | - Added a revision level in the superblock. | ||
25 | - Full support for O_SYNC flag of the open system call. | ||
26 | - New mount options: `resuid=#uid' and `resgid=#gid'. `resuid' causes | ||
27 | ext2fs to consider user #uid like root for the reserved blocks. | ||
28 | `resgid' acts the same way with group #gid. New fields in the | ||
29 | superblock contain default values for resuid and resgid and can | ||
30 | be modified by tune2fs. | ||
31 | Idea comes from Rene Cougnenc <cougnenc@renux.frmug.fr.net>. | ||
32 | - New mount options: `bsddf' and `minixdf'. `bsddf' causes ext2fs | ||
33 | to remove the blocks used for FS structures from the total block | ||
34 | count in statfs. With `minixdf', ext2fs mimics Minix behavior | ||
35 | in statfs (i.e. it returns the total number of blocks on the | ||
36 | partition). This is intended to make bde happy :-) | ||
37 | - New file attributes: | ||
38 | - Immutable files cannot be modified. Data cannot be written to | ||
39 | these files. They cannot be removed, renamed and new links cannot | ||
40 | be created. Even root cannot modify the files. He has to remove | ||
41 | the immutable attribute first. | ||
42 | - Append-only files: can only be written in append-mode when writing. | ||
43 | They cannot be removed, renamed and new links cannot be created. | ||
44 | Note: files may only be added to an append-only directory. | ||
45 | - No-dump files: the attribute is not used by the kernel. My port | ||
46 | of dump uses it to avoid backing up files which are not important. | ||
47 | - New check in ext2_check_dir_entry: the inode number is checked. | ||
48 | - Support for big file systems: the copy of the FS descriptor is now | ||
49 | dynamically allocated (previous versions used a fixed size array). | ||
50 | This allows to mount 2GB+ FS. | ||
51 | - Reorganization of the ext2_inode structure to allow other operating | ||
52 | systems to create specific fields if they use ext2fs as their native | ||
53 | file system. Currently, ext2fs is only implemented in Linux but | ||
54 | will soon be part of Gnu Hurd and of Masix. | ||
55 | |||
56 | Changes from version 0.4b to version 0.5 | ||
57 | ======================================== | ||
58 | - New superblock fields: s_lastcheck and s_checkinterval added | ||
59 | by Uwe Ohse <uwe@tirka.gun.de> to implement timedependent checks | ||
60 | of the file system | ||
61 | - Real random numbers for secure rm added by Pierre del Perugia | ||
62 | <delperug@gla.ecoledoc.ibp.fr> | ||
63 | - The mount warnings related to the state of a fs are not printed | ||
64 | if the fs is mounted read-only, idea by Nick Holloway | ||
65 | <alfie@dcs.warwick.ac.uk> | ||
66 | |||
67 | Changes from version 0.4a to version 0.4b | ||
68 | ========================================= | ||
69 | - Copyrights changed to include the name of my laboratory. | ||
70 | - Clean up of balloc.c and ialloc.c. | ||
71 | - More consistency checks. | ||
72 | - Block preallocation added by Stephen Tweedie. | ||
73 | - Direct reads of directories disallowed. | ||
74 | - Readahead implemented in readdir by Stephen Tweedie. | ||
75 | - Bugs in block and inodes allocation fixed. | ||
76 | - Readahead implemented in ext2_find_entry by Chip Salzenberg. | ||
77 | - New mount options: | ||
78 | `check=none|normal|strict' | ||
79 | `debug' | ||
80 | `errors=continue|remount-ro|panic' | ||
81 | `grpid', `bsdgroups' | ||
82 | `nocheck' | ||
83 | `nogrpid', `sysvgroups' | ||
84 | - truncate() now tries to deallocate contiguous blocks in a single call | ||
85 | to ext2_free_blocks(). | ||
86 | - lots of cosmetic changes. | ||
87 | |||
88 | Changes from version 0.4 to version 0.4a | ||
89 | ======================================== | ||
90 | - the `sync' option support is now complete. Version 0.4 was not | ||
91 | supporting it when truncating a file. I have tested the synchronous | ||
92 | writes and they work but they make the system very slow :-( I have | ||
93 | to work again on this to make it faster. | ||
94 | - when detecting an error on a mounted filesystem, version 0.4 used | ||
95 | to try to write a flag in the super block even if the filesystem had | ||
96 | been mounted read-only. This is fixed. | ||
97 | - the `sb=#' option now causes the kernel code to use the filesystem | ||
98 | descriptors located at block #+1. Version 0.4 used the superblock | ||
99 | backup located at block # but used the main copy of the descriptors. | ||
100 | - a new file attribute `S' is supported. This attribute causes | ||
101 | synchronous writes but is applied to a file not to the entire file | ||
102 | system (thanks to Michael Kraehe <kraehe@bakunin.north.de> for | ||
103 | suggesting it). | ||
104 | - the directory cache is inhibited by default. The cache management | ||
105 | code seems to be buggy and I have to look at it carefully before | ||
106 | using it again. | ||
107 | - deleting a file with the `s' attribute (secure deletion) causes its | ||
108 | blocks to be overwritten with random values not with zeros (thanks to | ||
109 | Michael A. Griffith <grif@cs.ucr.edu> for suggesting it). | ||
110 | - lots of cosmetic changes have been made. | ||
111 | |||
112 | Changes from version 0.3 to version 0.4 | ||
113 | ======================================= | ||
114 | - Three new mount options are supported: `check', `sync' and `sb=#'. | ||
115 | `check' tells the kernel code to make more consistency checks | ||
116 | when the file system is mounted. Currently, the kernel code checks | ||
117 | that the blocks and inodes bitmaps are consistent with the free | ||
118 | blocks and inodes counts. More checks will be added in future | ||
119 | releases. | ||
120 | `sync' tells the kernel code to use synchronous writes when updating | ||
121 | an inode, a bitmap, a directory entry or an indirect block. This | ||
122 | can make the file system much slower but can be a big win for files | ||
123 | recovery in case of a crash (and we can now say to the BSD folks | ||
124 | that Linux also supports synchronous updates :-). | ||
125 | `sb=#' tells the kernel code to use an alternate super block instead | ||
126 | of its master copy. `#' is the number of the block (counted in | ||
127 | 1024 bytes blocks) which contains the alternate super block. | ||
128 | An ext2 file system typically contains backups of the super block | ||
129 | at blocks 8193, 16385, and so on. | ||
130 | - I have change the meaning of the valid flag used by e2fsck. it | ||
131 | now contains the state of the file system. If the kernel code | ||
132 | detects an inconsistency while the file system is mounted, it flags | ||
133 | it as erroneous and e2fsck will detect that on next run. | ||
134 | - The super block now contains a mount counter. This counter is | ||
135 | incremented each time the file system is mounted read/write. When | ||
136 | this counter becomes bigger than a maximal mount counts (also stored | ||
137 | in the super block), e2fsck checks the file system, even if it had | ||
138 | been unmounted cleanly, and resets this counter to 0. | ||
139 | - File attributes are now supported. One can associate a set of | ||
140 | attributes to a file. Three attributes are defined: | ||
141 | `c': the file is marked for automatic compression, | ||
142 | `s': the file is marked for secure deletion: when the file is | ||
143 | deleted, its blocks are zeroed and written back to the disk, | ||
144 | `u': the file is marked for undeletion: when the file is deleted, | ||
145 | its contents are saved to allow a future undeletion. | ||
146 | Currently, only the `s' attribute is implemented in the kernel | ||
147 | code. Support for the other attributes will be added in a future | ||
148 | release. | ||
149 | - a few bugs related to times updates have been fixed by Bruce | ||
150 | Evans and me. | ||
151 | - a bug related to the links count of deleted inodes has been fixed. | ||
152 | Previous versions used to keep the links count set to 1 when a file | ||
153 | was deleted. The new version now sets links_count to 0 when deleting | ||
154 | the last link. | ||
155 | - a race condition when deallocating an inode has been fixed by | ||
156 | Stephen Tweedie. | ||
157 | |||
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile new file mode 100644 index 000000000000..ee240a14e70f --- /dev/null +++ b/fs/ext2/Makefile | |||
@@ -0,0 +1,12 @@ | |||
1 | # | ||
2 | # Makefile for the linux ext2-filesystem routines. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_EXT2_FS) += ext2.o | ||
6 | |||
7 | ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | ||
8 | ioctl.o namei.o super.o symlink.o | ||
9 | |||
10 | ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | ||
11 | ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o | ||
12 | ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o | ||
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c new file mode 100644 index 000000000000..8369ee8d28c4 --- /dev/null +++ b/fs/ext2/acl.c | |||
@@ -0,0 +1,518 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/acl.c | ||
3 | * | ||
4 | * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> | ||
5 | */ | ||
6 | |||
7 | #include <linux/init.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/slab.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include "ext2.h" | ||
12 | #include "xattr.h" | ||
13 | #include "acl.h" | ||
14 | |||
15 | /* | ||
16 | * Convert from filesystem to in-memory representation. | ||
17 | */ | ||
18 | static struct posix_acl * | ||
19 | ext2_acl_from_disk(const void *value, size_t size) | ||
20 | { | ||
21 | const char *end = (char *)value + size; | ||
22 | int n, count; | ||
23 | struct posix_acl *acl; | ||
24 | |||
25 | if (!value) | ||
26 | return NULL; | ||
27 | if (size < sizeof(ext2_acl_header)) | ||
28 | return ERR_PTR(-EINVAL); | ||
29 | if (((ext2_acl_header *)value)->a_version != | ||
30 | cpu_to_le32(EXT2_ACL_VERSION)) | ||
31 | return ERR_PTR(-EINVAL); | ||
32 | value = (char *)value + sizeof(ext2_acl_header); | ||
33 | count = ext2_acl_count(size); | ||
34 | if (count < 0) | ||
35 | return ERR_PTR(-EINVAL); | ||
36 | if (count == 0) | ||
37 | return NULL; | ||
38 | acl = posix_acl_alloc(count, GFP_KERNEL); | ||
39 | if (!acl) | ||
40 | return ERR_PTR(-ENOMEM); | ||
41 | for (n=0; n < count; n++) { | ||
42 | ext2_acl_entry *entry = | ||
43 | (ext2_acl_entry *)value; | ||
44 | if ((char *)value + sizeof(ext2_acl_entry_short) > end) | ||
45 | goto fail; | ||
46 | acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); | ||
47 | acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); | ||
48 | switch(acl->a_entries[n].e_tag) { | ||
49 | case ACL_USER_OBJ: | ||
50 | case ACL_GROUP_OBJ: | ||
51 | case ACL_MASK: | ||
52 | case ACL_OTHER: | ||
53 | value = (char *)value + | ||
54 | sizeof(ext2_acl_entry_short); | ||
55 | acl->a_entries[n].e_id = ACL_UNDEFINED_ID; | ||
56 | break; | ||
57 | |||
58 | case ACL_USER: | ||
59 | case ACL_GROUP: | ||
60 | value = (char *)value + sizeof(ext2_acl_entry); | ||
61 | if ((char *)value > end) | ||
62 | goto fail; | ||
63 | acl->a_entries[n].e_id = | ||
64 | le32_to_cpu(entry->e_id); | ||
65 | break; | ||
66 | |||
67 | default: | ||
68 | goto fail; | ||
69 | } | ||
70 | } | ||
71 | if (value != end) | ||
72 | goto fail; | ||
73 | return acl; | ||
74 | |||
75 | fail: | ||
76 | posix_acl_release(acl); | ||
77 | return ERR_PTR(-EINVAL); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Convert from in-memory to filesystem representation. | ||
82 | */ | ||
83 | static void * | ||
84 | ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) | ||
85 | { | ||
86 | ext2_acl_header *ext_acl; | ||
87 | char *e; | ||
88 | size_t n; | ||
89 | |||
90 | *size = ext2_acl_size(acl->a_count); | ||
91 | ext_acl = (ext2_acl_header *)kmalloc(sizeof(ext2_acl_header) + | ||
92 | acl->a_count * sizeof(ext2_acl_entry), GFP_KERNEL); | ||
93 | if (!ext_acl) | ||
94 | return ERR_PTR(-ENOMEM); | ||
95 | ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); | ||
96 | e = (char *)ext_acl + sizeof(ext2_acl_header); | ||
97 | for (n=0; n < acl->a_count; n++) { | ||
98 | ext2_acl_entry *entry = (ext2_acl_entry *)e; | ||
99 | entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); | ||
100 | entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); | ||
101 | switch(acl->a_entries[n].e_tag) { | ||
102 | case ACL_USER: | ||
103 | case ACL_GROUP: | ||
104 | entry->e_id = | ||
105 | cpu_to_le32(acl->a_entries[n].e_id); | ||
106 | e += sizeof(ext2_acl_entry); | ||
107 | break; | ||
108 | |||
109 | case ACL_USER_OBJ: | ||
110 | case ACL_GROUP_OBJ: | ||
111 | case ACL_MASK: | ||
112 | case ACL_OTHER: | ||
113 | e += sizeof(ext2_acl_entry_short); | ||
114 | break; | ||
115 | |||
116 | default: | ||
117 | goto fail; | ||
118 | } | ||
119 | } | ||
120 | return (char *)ext_acl; | ||
121 | |||
122 | fail: | ||
123 | kfree(ext_acl); | ||
124 | return ERR_PTR(-EINVAL); | ||
125 | } | ||
126 | |||
127 | static inline struct posix_acl * | ||
128 | ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl) | ||
129 | { | ||
130 | struct posix_acl *acl = EXT2_ACL_NOT_CACHED; | ||
131 | |||
132 | spin_lock(&inode->i_lock); | ||
133 | if (*i_acl != EXT2_ACL_NOT_CACHED) | ||
134 | acl = posix_acl_dup(*i_acl); | ||
135 | spin_unlock(&inode->i_lock); | ||
136 | |||
137 | return acl; | ||
138 | } | ||
139 | |||
140 | static inline void | ||
141 | ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, | ||
142 | struct posix_acl *acl) | ||
143 | { | ||
144 | spin_lock(&inode->i_lock); | ||
145 | if (*i_acl != EXT2_ACL_NOT_CACHED) | ||
146 | posix_acl_release(*i_acl); | ||
147 | *i_acl = posix_acl_dup(acl); | ||
148 | spin_unlock(&inode->i_lock); | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * inode->i_sem: don't care | ||
153 | */ | ||
154 | static struct posix_acl * | ||
155 | ext2_get_acl(struct inode *inode, int type) | ||
156 | { | ||
157 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
158 | int name_index; | ||
159 | char *value = NULL; | ||
160 | struct posix_acl *acl; | ||
161 | int retval; | ||
162 | |||
163 | if (!test_opt(inode->i_sb, POSIX_ACL)) | ||
164 | return NULL; | ||
165 | |||
166 | switch(type) { | ||
167 | case ACL_TYPE_ACCESS: | ||
168 | acl = ext2_iget_acl(inode, &ei->i_acl); | ||
169 | if (acl != EXT2_ACL_NOT_CACHED) | ||
170 | return acl; | ||
171 | name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; | ||
172 | break; | ||
173 | |||
174 | case ACL_TYPE_DEFAULT: | ||
175 | acl = ext2_iget_acl(inode, &ei->i_default_acl); | ||
176 | if (acl != EXT2_ACL_NOT_CACHED) | ||
177 | return acl; | ||
178 | name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; | ||
179 | break; | ||
180 | |||
181 | default: | ||
182 | return ERR_PTR(-EINVAL); | ||
183 | } | ||
184 | retval = ext2_xattr_get(inode, name_index, "", NULL, 0); | ||
185 | if (retval > 0) { | ||
186 | value = kmalloc(retval, GFP_KERNEL); | ||
187 | if (!value) | ||
188 | return ERR_PTR(-ENOMEM); | ||
189 | retval = ext2_xattr_get(inode, name_index, "", value, retval); | ||
190 | } | ||
191 | if (retval > 0) | ||
192 | acl = ext2_acl_from_disk(value, retval); | ||
193 | else if (retval == -ENODATA || retval == -ENOSYS) | ||
194 | acl = NULL; | ||
195 | else | ||
196 | acl = ERR_PTR(retval); | ||
197 | if (value) | ||
198 | kfree(value); | ||
199 | |||
200 | if (!IS_ERR(acl)) { | ||
201 | switch(type) { | ||
202 | case ACL_TYPE_ACCESS: | ||
203 | ext2_iset_acl(inode, &ei->i_acl, acl); | ||
204 | break; | ||
205 | |||
206 | case ACL_TYPE_DEFAULT: | ||
207 | ext2_iset_acl(inode, &ei->i_default_acl, acl); | ||
208 | break; | ||
209 | } | ||
210 | } | ||
211 | return acl; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * inode->i_sem: down | ||
216 | */ | ||
217 | static int | ||
218 | ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) | ||
219 | { | ||
220 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
221 | int name_index; | ||
222 | void *value = NULL; | ||
223 | size_t size; | ||
224 | int error; | ||
225 | |||
226 | if (S_ISLNK(inode->i_mode)) | ||
227 | return -EOPNOTSUPP; | ||
228 | if (!test_opt(inode->i_sb, POSIX_ACL)) | ||
229 | return 0; | ||
230 | |||
231 | switch(type) { | ||
232 | case ACL_TYPE_ACCESS: | ||
233 | name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; | ||
234 | if (acl) { | ||
235 | mode_t mode = inode->i_mode; | ||
236 | error = posix_acl_equiv_mode(acl, &mode); | ||
237 | if (error < 0) | ||
238 | return error; | ||
239 | else { | ||
240 | inode->i_mode = mode; | ||
241 | mark_inode_dirty(inode); | ||
242 | if (error == 0) | ||
243 | acl = NULL; | ||
244 | } | ||
245 | } | ||
246 | break; | ||
247 | |||
248 | case ACL_TYPE_DEFAULT: | ||
249 | name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; | ||
250 | if (!S_ISDIR(inode->i_mode)) | ||
251 | return acl ? -EACCES : 0; | ||
252 | break; | ||
253 | |||
254 | default: | ||
255 | return -EINVAL; | ||
256 | } | ||
257 | if (acl) { | ||
258 | value = ext2_acl_to_disk(acl, &size); | ||
259 | if (IS_ERR(value)) | ||
260 | return (int)PTR_ERR(value); | ||
261 | } | ||
262 | |||
263 | error = ext2_xattr_set(inode, name_index, "", value, size, 0); | ||
264 | |||
265 | if (value) | ||
266 | kfree(value); | ||
267 | if (!error) { | ||
268 | switch(type) { | ||
269 | case ACL_TYPE_ACCESS: | ||
270 | ext2_iset_acl(inode, &ei->i_acl, acl); | ||
271 | break; | ||
272 | |||
273 | case ACL_TYPE_DEFAULT: | ||
274 | ext2_iset_acl(inode, &ei->i_default_acl, acl); | ||
275 | break; | ||
276 | } | ||
277 | } | ||
278 | return error; | ||
279 | } | ||
280 | |||
281 | static int | ||
282 | ext2_check_acl(struct inode *inode, int mask) | ||
283 | { | ||
284 | struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); | ||
285 | |||
286 | if (acl) { | ||
287 | int error = posix_acl_permission(inode, acl, mask); | ||
288 | posix_acl_release(acl); | ||
289 | return error; | ||
290 | } | ||
291 | |||
292 | return -EAGAIN; | ||
293 | } | ||
294 | |||
295 | int | ||
296 | ext2_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
297 | { | ||
298 | return generic_permission(inode, mask, ext2_check_acl); | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | * Initialize the ACLs of a new inode. Called from ext2_new_inode. | ||
303 | * | ||
304 | * dir->i_sem: down | ||
305 | * inode->i_sem: up (access to inode is still exclusive) | ||
306 | */ | ||
307 | int | ||
308 | ext2_init_acl(struct inode *inode, struct inode *dir) | ||
309 | { | ||
310 | struct posix_acl *acl = NULL; | ||
311 | int error = 0; | ||
312 | |||
313 | if (!S_ISLNK(inode->i_mode)) { | ||
314 | if (test_opt(dir->i_sb, POSIX_ACL)) { | ||
315 | acl = ext2_get_acl(dir, ACL_TYPE_DEFAULT); | ||
316 | if (IS_ERR(acl)) | ||
317 | return PTR_ERR(acl); | ||
318 | } | ||
319 | if (!acl) | ||
320 | inode->i_mode &= ~current->fs->umask; | ||
321 | } | ||
322 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { | ||
323 | struct posix_acl *clone; | ||
324 | mode_t mode; | ||
325 | |||
326 | if (S_ISDIR(inode->i_mode)) { | ||
327 | error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); | ||
328 | if (error) | ||
329 | goto cleanup; | ||
330 | } | ||
331 | clone = posix_acl_clone(acl, GFP_KERNEL); | ||
332 | error = -ENOMEM; | ||
333 | if (!clone) | ||
334 | goto cleanup; | ||
335 | mode = inode->i_mode; | ||
336 | error = posix_acl_create_masq(clone, &mode); | ||
337 | if (error >= 0) { | ||
338 | inode->i_mode = mode; | ||
339 | if (error > 0) { | ||
340 | /* This is an extended ACL */ | ||
341 | error = ext2_set_acl(inode, | ||
342 | ACL_TYPE_ACCESS, clone); | ||
343 | } | ||
344 | } | ||
345 | posix_acl_release(clone); | ||
346 | } | ||
347 | cleanup: | ||
348 | posix_acl_release(acl); | ||
349 | return error; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * Does chmod for an inode that may have an Access Control List. The | ||
354 | * inode->i_mode field must be updated to the desired value by the caller | ||
355 | * before calling this function. | ||
356 | * Returns 0 on success, or a negative error number. | ||
357 | * | ||
358 | * We change the ACL rather than storing some ACL entries in the file | ||
359 | * mode permission bits (which would be more efficient), because that | ||
360 | * would break once additional permissions (like ACL_APPEND, ACL_DELETE | ||
361 | * for directories) are added. There are no more bits available in the | ||
362 | * file mode. | ||
363 | * | ||
364 | * inode->i_sem: down | ||
365 | */ | ||
366 | int | ||
367 | ext2_acl_chmod(struct inode *inode) | ||
368 | { | ||
369 | struct posix_acl *acl, *clone; | ||
370 | int error; | ||
371 | |||
372 | if (!test_opt(inode->i_sb, POSIX_ACL)) | ||
373 | return 0; | ||
374 | if (S_ISLNK(inode->i_mode)) | ||
375 | return -EOPNOTSUPP; | ||
376 | acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); | ||
377 | if (IS_ERR(acl) || !acl) | ||
378 | return PTR_ERR(acl); | ||
379 | clone = posix_acl_clone(acl, GFP_KERNEL); | ||
380 | posix_acl_release(acl); | ||
381 | if (!clone) | ||
382 | return -ENOMEM; | ||
383 | error = posix_acl_chmod_masq(clone, inode->i_mode); | ||
384 | if (!error) | ||
385 | error = ext2_set_acl(inode, ACL_TYPE_ACCESS, clone); | ||
386 | posix_acl_release(clone); | ||
387 | return error; | ||
388 | } | ||
389 | |||
390 | /* | ||
391 | * Extended attribut handlers | ||
392 | */ | ||
393 | static size_t | ||
394 | ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, | ||
395 | const char *name, size_t name_len) | ||
396 | { | ||
397 | const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); | ||
398 | |||
399 | if (!test_opt(inode->i_sb, POSIX_ACL)) | ||
400 | return 0; | ||
401 | if (list && size <= list_size) | ||
402 | memcpy(list, XATTR_NAME_ACL_ACCESS, size); | ||
403 | return size; | ||
404 | } | ||
405 | |||
406 | static size_t | ||
407 | ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, | ||
408 | const char *name, size_t name_len) | ||
409 | { | ||
410 | const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); | ||
411 | |||
412 | if (!test_opt(inode->i_sb, POSIX_ACL)) | ||
413 | return 0; | ||
414 | if (list && size <= list_size) | ||
415 | memcpy(list, XATTR_NAME_ACL_DEFAULT, size); | ||
416 | return size; | ||
417 | } | ||
418 | |||
419 | static int | ||
420 | ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) | ||
421 | { | ||
422 | struct posix_acl *acl; | ||
423 | int error; | ||
424 | |||
425 | if (!test_opt(inode->i_sb, POSIX_ACL)) | ||
426 | return -EOPNOTSUPP; | ||
427 | |||
428 | acl = ext2_get_acl(inode, type); | ||
429 | if (IS_ERR(acl)) | ||
430 | return PTR_ERR(acl); | ||
431 | if (acl == NULL) | ||
432 | return -ENODATA; | ||
433 | error = posix_acl_to_xattr(acl, buffer, size); | ||
434 | posix_acl_release(acl); | ||
435 | |||
436 | return error; | ||
437 | } | ||
438 | |||
439 | static int | ||
440 | ext2_xattr_get_acl_access(struct inode *inode, const char *name, | ||
441 | void *buffer, size_t size) | ||
442 | { | ||
443 | if (strcmp(name, "") != 0) | ||
444 | return -EINVAL; | ||
445 | return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); | ||
446 | } | ||
447 | |||
448 | static int | ||
449 | ext2_xattr_get_acl_default(struct inode *inode, const char *name, | ||
450 | void *buffer, size_t size) | ||
451 | { | ||
452 | if (strcmp(name, "") != 0) | ||
453 | return -EINVAL; | ||
454 | return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); | ||
455 | } | ||
456 | |||
457 | static int | ||
458 | ext2_xattr_set_acl(struct inode *inode, int type, const void *value, | ||
459 | size_t size) | ||
460 | { | ||
461 | struct posix_acl *acl; | ||
462 | int error; | ||
463 | |||
464 | if (!test_opt(inode->i_sb, POSIX_ACL)) | ||
465 | return -EOPNOTSUPP; | ||
466 | if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) | ||
467 | return -EPERM; | ||
468 | |||
469 | if (value) { | ||
470 | acl = posix_acl_from_xattr(value, size); | ||
471 | if (IS_ERR(acl)) | ||
472 | return PTR_ERR(acl); | ||
473 | else if (acl) { | ||
474 | error = posix_acl_valid(acl); | ||
475 | if (error) | ||
476 | goto release_and_out; | ||
477 | } | ||
478 | } else | ||
479 | acl = NULL; | ||
480 | |||
481 | error = ext2_set_acl(inode, type, acl); | ||
482 | |||
483 | release_and_out: | ||
484 | posix_acl_release(acl); | ||
485 | return error; | ||
486 | } | ||
487 | |||
488 | static int | ||
489 | ext2_xattr_set_acl_access(struct inode *inode, const char *name, | ||
490 | const void *value, size_t size, int flags) | ||
491 | { | ||
492 | if (strcmp(name, "") != 0) | ||
493 | return -EINVAL; | ||
494 | return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); | ||
495 | } | ||
496 | |||
497 | static int | ||
498 | ext2_xattr_set_acl_default(struct inode *inode, const char *name, | ||
499 | const void *value, size_t size, int flags) | ||
500 | { | ||
501 | if (strcmp(name, "") != 0) | ||
502 | return -EINVAL; | ||
503 | return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); | ||
504 | } | ||
505 | |||
506 | struct xattr_handler ext2_xattr_acl_access_handler = { | ||
507 | .prefix = XATTR_NAME_ACL_ACCESS, | ||
508 | .list = ext2_xattr_list_acl_access, | ||
509 | .get = ext2_xattr_get_acl_access, | ||
510 | .set = ext2_xattr_set_acl_access, | ||
511 | }; | ||
512 | |||
513 | struct xattr_handler ext2_xattr_acl_default_handler = { | ||
514 | .prefix = XATTR_NAME_ACL_DEFAULT, | ||
515 | .list = ext2_xattr_list_acl_default, | ||
516 | .get = ext2_xattr_get_acl_default, | ||
517 | .set = ext2_xattr_set_acl_default, | ||
518 | }; | ||
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h new file mode 100644 index 000000000000..fed96ae81a7d --- /dev/null +++ b/fs/ext2/acl.h | |||
@@ -0,0 +1,82 @@ | |||
1 | /* | ||
2 | File: fs/ext2/acl.h | ||
3 | |||
4 | (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> | ||
5 | */ | ||
6 | |||
7 | #include <linux/xattr_acl.h> | ||
8 | |||
9 | #define EXT2_ACL_VERSION 0x0001 | ||
10 | |||
11 | typedef struct { | ||
12 | __le16 e_tag; | ||
13 | __le16 e_perm; | ||
14 | __le32 e_id; | ||
15 | } ext2_acl_entry; | ||
16 | |||
17 | typedef struct { | ||
18 | __le16 e_tag; | ||
19 | __le16 e_perm; | ||
20 | } ext2_acl_entry_short; | ||
21 | |||
22 | typedef struct { | ||
23 | __le32 a_version; | ||
24 | } ext2_acl_header; | ||
25 | |||
26 | static inline size_t ext2_acl_size(int count) | ||
27 | { | ||
28 | if (count <= 4) { | ||
29 | return sizeof(ext2_acl_header) + | ||
30 | count * sizeof(ext2_acl_entry_short); | ||
31 | } else { | ||
32 | return sizeof(ext2_acl_header) + | ||
33 | 4 * sizeof(ext2_acl_entry_short) + | ||
34 | (count - 4) * sizeof(ext2_acl_entry); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | static inline int ext2_acl_count(size_t size) | ||
39 | { | ||
40 | ssize_t s; | ||
41 | size -= sizeof(ext2_acl_header); | ||
42 | s = size - 4 * sizeof(ext2_acl_entry_short); | ||
43 | if (s < 0) { | ||
44 | if (size % sizeof(ext2_acl_entry_short)) | ||
45 | return -1; | ||
46 | return size / sizeof(ext2_acl_entry_short); | ||
47 | } else { | ||
48 | if (s % sizeof(ext2_acl_entry)) | ||
49 | return -1; | ||
50 | return s / sizeof(ext2_acl_entry) + 4; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | ||
55 | |||
56 | /* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl | ||
57 | if the ACL has not been cached */ | ||
58 | #define EXT2_ACL_NOT_CACHED ((void *)-1) | ||
59 | |||
60 | /* acl.c */ | ||
61 | extern int ext2_permission (struct inode *, int, struct nameidata *); | ||
62 | extern int ext2_acl_chmod (struct inode *); | ||
63 | extern int ext2_init_acl (struct inode *, struct inode *); | ||
64 | |||
65 | #else | ||
66 | #include <linux/sched.h> | ||
67 | #define ext2_permission NULL | ||
68 | #define ext2_get_acl NULL | ||
69 | #define ext2_set_acl NULL | ||
70 | |||
71 | static inline int | ||
72 | ext2_acl_chmod (struct inode *inode) | ||
73 | { | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | static inline int ext2_init_acl (struct inode *inode, struct inode *dir) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | #endif | ||
82 | |||
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c new file mode 100644 index 000000000000..6591abef64d0 --- /dev/null +++ b/fs/ext2/balloc.c | |||
@@ -0,0 +1,699 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/balloc.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 | ||
10 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
11 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include "ext2.h" | ||
16 | #include <linux/quotaops.h> | ||
17 | #include <linux/sched.h> | ||
18 | #include <linux/buffer_head.h> | ||
19 | |||
20 | /* | ||
21 | * balloc.c contains the blocks allocation and deallocation routines | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * The free blocks are managed by bitmaps. A file system contains several | ||
26 | * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap | ||
27 | * block for inodes, N blocks for the inode table and data blocks. | ||
28 | * | ||
29 | * The file system contains group descriptors which are located after the | ||
30 | * super block. Each descriptor contains the number of the bitmap block and | ||
31 | * the free blocks count in the block. The descriptors are loaded in memory | ||
32 | * when a file system is mounted (see ext2_read_super). | ||
33 | */ | ||
34 | |||
35 | |||
36 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | ||
37 | |||
38 | struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, | ||
39 | unsigned int block_group, | ||
40 | struct buffer_head ** bh) | ||
41 | { | ||
42 | unsigned long group_desc; | ||
43 | unsigned long offset; | ||
44 | struct ext2_group_desc * desc; | ||
45 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
46 | |||
47 | if (block_group >= sbi->s_groups_count) { | ||
48 | ext2_error (sb, "ext2_get_group_desc", | ||
49 | "block_group >= groups_count - " | ||
50 | "block_group = %d, groups_count = %lu", | ||
51 | block_group, sbi->s_groups_count); | ||
52 | |||
53 | return NULL; | ||
54 | } | ||
55 | |||
56 | group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); | ||
57 | offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); | ||
58 | if (!sbi->s_group_desc[group_desc]) { | ||
59 | ext2_error (sb, "ext2_get_group_desc", | ||
60 | "Group descriptor not loaded - " | ||
61 | "block_group = %d, group_desc = %lu, desc = %lu", | ||
62 | block_group, group_desc, offset); | ||
63 | return NULL; | ||
64 | } | ||
65 | |||
66 | desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data; | ||
67 | if (bh) | ||
68 | *bh = sbi->s_group_desc[group_desc]; | ||
69 | return desc + offset; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Read the bitmap for a given block_group, reading into the specified | ||
74 | * slot in the superblock's bitmap cache. | ||
75 | * | ||
76 | * Return buffer_head on success or NULL in case of failure. | ||
77 | */ | ||
78 | static struct buffer_head * | ||
79 | read_block_bitmap(struct super_block *sb, unsigned int block_group) | ||
80 | { | ||
81 | struct ext2_group_desc * desc; | ||
82 | struct buffer_head * bh = NULL; | ||
83 | |||
84 | desc = ext2_get_group_desc (sb, block_group, NULL); | ||
85 | if (!desc) | ||
86 | goto error_out; | ||
87 | bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); | ||
88 | if (!bh) | ||
89 | ext2_error (sb, "read_block_bitmap", | ||
90 | "Cannot read block bitmap - " | ||
91 | "block_group = %d, block_bitmap = %u", | ||
92 | block_group, le32_to_cpu(desc->bg_block_bitmap)); | ||
93 | error_out: | ||
94 | return bh; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Set sb->s_dirt here because the superblock was "logically" altered. We | ||
99 | * need to recalculate its free blocks count and flush it out. | ||
100 | */ | ||
101 | static int reserve_blocks(struct super_block *sb, int count) | ||
102 | { | ||
103 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
104 | struct ext2_super_block *es = sbi->s_es; | ||
105 | unsigned free_blocks; | ||
106 | unsigned root_blocks; | ||
107 | |||
108 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | ||
109 | root_blocks = le32_to_cpu(es->s_r_blocks_count); | ||
110 | |||
111 | if (free_blocks < count) | ||
112 | count = free_blocks; | ||
113 | |||
114 | if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) && | ||
115 | sbi->s_resuid != current->fsuid && | ||
116 | (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { | ||
117 | /* | ||
118 | * We are too close to reserve and we are not privileged. | ||
119 | * Can we allocate anything at all? | ||
120 | */ | ||
121 | if (free_blocks > root_blocks) | ||
122 | count = free_blocks - root_blocks; | ||
123 | else | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | percpu_counter_mod(&sbi->s_freeblocks_counter, -count); | ||
128 | sb->s_dirt = 1; | ||
129 | return count; | ||
130 | } | ||
131 | |||
132 | static void release_blocks(struct super_block *sb, int count) | ||
133 | { | ||
134 | if (count) { | ||
135 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
136 | |||
137 | percpu_counter_mod(&sbi->s_freeblocks_counter, count); | ||
138 | sb->s_dirt = 1; | ||
139 | } | ||
140 | } | ||
141 | |||
142 | static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no, | ||
143 | struct ext2_group_desc *desc, struct buffer_head *bh, int count) | ||
144 | { | ||
145 | unsigned free_blocks; | ||
146 | |||
147 | if (!desc->bg_free_blocks_count) | ||
148 | return 0; | ||
149 | |||
150 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
151 | free_blocks = le16_to_cpu(desc->bg_free_blocks_count); | ||
152 | if (free_blocks < count) | ||
153 | count = free_blocks; | ||
154 | desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count); | ||
155 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
156 | mark_buffer_dirty(bh); | ||
157 | return count; | ||
158 | } | ||
159 | |||
160 | static void group_release_blocks(struct super_block *sb, int group_no, | ||
161 | struct ext2_group_desc *desc, struct buffer_head *bh, int count) | ||
162 | { | ||
163 | if (count) { | ||
164 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
165 | unsigned free_blocks; | ||
166 | |||
167 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
168 | free_blocks = le16_to_cpu(desc->bg_free_blocks_count); | ||
169 | desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); | ||
170 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
171 | sb->s_dirt = 1; | ||
172 | mark_buffer_dirty(bh); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | /* Free given blocks, update quota and i_blocks field */ | ||
177 | void ext2_free_blocks (struct inode * inode, unsigned long block, | ||
178 | unsigned long count) | ||
179 | { | ||
180 | struct buffer_head *bitmap_bh = NULL; | ||
181 | struct buffer_head * bh2; | ||
182 | unsigned long block_group; | ||
183 | unsigned long bit; | ||
184 | unsigned long i; | ||
185 | unsigned long overflow; | ||
186 | struct super_block * sb = inode->i_sb; | ||
187 | struct ext2_sb_info * sbi = EXT2_SB(sb); | ||
188 | struct ext2_group_desc * desc; | ||
189 | struct ext2_super_block * es = sbi->s_es; | ||
190 | unsigned freed = 0, group_freed; | ||
191 | |||
192 | if (block < le32_to_cpu(es->s_first_data_block) || | ||
193 | block + count < block || | ||
194 | block + count > le32_to_cpu(es->s_blocks_count)) { | ||
195 | ext2_error (sb, "ext2_free_blocks", | ||
196 | "Freeing blocks not in datazone - " | ||
197 | "block = %lu, count = %lu", block, count); | ||
198 | goto error_return; | ||
199 | } | ||
200 | |||
201 | ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1); | ||
202 | |||
203 | do_more: | ||
204 | overflow = 0; | ||
205 | block_group = (block - le32_to_cpu(es->s_first_data_block)) / | ||
206 | EXT2_BLOCKS_PER_GROUP(sb); | ||
207 | bit = (block - le32_to_cpu(es->s_first_data_block)) % | ||
208 | EXT2_BLOCKS_PER_GROUP(sb); | ||
209 | /* | ||
210 | * Check to see if we are freeing blocks across a group | ||
211 | * boundary. | ||
212 | */ | ||
213 | if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) { | ||
214 | overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb); | ||
215 | count -= overflow; | ||
216 | } | ||
217 | brelse(bitmap_bh); | ||
218 | bitmap_bh = read_block_bitmap(sb, block_group); | ||
219 | if (!bitmap_bh) | ||
220 | goto error_return; | ||
221 | |||
222 | desc = ext2_get_group_desc (sb, block_group, &bh2); | ||
223 | if (!desc) | ||
224 | goto error_return; | ||
225 | |||
226 | if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || | ||
227 | in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || | ||
228 | in_range (block, le32_to_cpu(desc->bg_inode_table), | ||
229 | sbi->s_itb_per_group) || | ||
230 | in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), | ||
231 | sbi->s_itb_per_group)) | ||
232 | ext2_error (sb, "ext2_free_blocks", | ||
233 | "Freeing blocks in system zones - " | ||
234 | "Block = %lu, count = %lu", | ||
235 | block, count); | ||
236 | |||
237 | for (i = 0, group_freed = 0; i < count; i++) { | ||
238 | if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | ||
239 | bit + i, bitmap_bh->b_data)) { | ||
240 | ext2_error(sb, __FUNCTION__, | ||
241 | "bit already cleared for block %lu", block + i); | ||
242 | } else { | ||
243 | group_freed++; | ||
244 | } | ||
245 | } | ||
246 | |||
247 | mark_buffer_dirty(bitmap_bh); | ||
248 | if (sb->s_flags & MS_SYNCHRONOUS) | ||
249 | sync_dirty_buffer(bitmap_bh); | ||
250 | |||
251 | group_release_blocks(sb, block_group, desc, bh2, group_freed); | ||
252 | freed += group_freed; | ||
253 | |||
254 | if (overflow) { | ||
255 | block += count; | ||
256 | count = overflow; | ||
257 | goto do_more; | ||
258 | } | ||
259 | error_return: | ||
260 | brelse(bitmap_bh); | ||
261 | release_blocks(sb, freed); | ||
262 | DQUOT_FREE_BLOCK(inode, freed); | ||
263 | } | ||
264 | |||
265 | static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal) | ||
266 | { | ||
267 | int k; | ||
268 | char *p, *r; | ||
269 | |||
270 | if (!ext2_test_bit(goal, map)) | ||
271 | goto got_it; | ||
272 | |||
273 | repeat: | ||
274 | if (goal) { | ||
275 | /* | ||
276 | * The goal was occupied; search forward for a free | ||
277 | * block within the next XX blocks. | ||
278 | * | ||
279 | * end_goal is more or less random, but it has to be | ||
280 | * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the | ||
281 | * next 64-bit boundary is simple.. | ||
282 | */ | ||
283 | k = (goal + 63) & ~63; | ||
284 | goal = ext2_find_next_zero_bit(map, k, goal); | ||
285 | if (goal < k) | ||
286 | goto got_it; | ||
287 | /* | ||
288 | * Search in the remainder of the current group. | ||
289 | */ | ||
290 | } | ||
291 | |||
292 | p = map + (goal >> 3); | ||
293 | r = memscan(p, 0, (size - goal + 7) >> 3); | ||
294 | k = (r - map) << 3; | ||
295 | if (k < size) { | ||
296 | /* | ||
297 | * We have succeeded in finding a free byte in the block | ||
298 | * bitmap. Now search backwards to find the start of this | ||
299 | * group of free blocks - won't take more than 7 iterations. | ||
300 | */ | ||
301 | for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--) | ||
302 | ; | ||
303 | goto got_it; | ||
304 | } | ||
305 | |||
306 | k = ext2_find_next_zero_bit ((u32 *)map, size, goal); | ||
307 | if (k < size) { | ||
308 | goal = k; | ||
309 | goto got_it; | ||
310 | } | ||
311 | return -1; | ||
312 | got_it: | ||
313 | if (ext2_set_bit_atomic(lock, goal, (void *) map)) | ||
314 | goto repeat; | ||
315 | return goal; | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * ext2_new_block uses a goal block to assist allocation. If the goal is | ||
320 | * free, or there is a free block within 32 blocks of the goal, that block | ||
321 | * is allocated. Otherwise a forward search is made for a free block; within | ||
322 | * each block group the search first looks for an entire free byte in the block | ||
323 | * bitmap, and then for any free bit if that fails. | ||
324 | * This function also updates quota and i_blocks field. | ||
325 | */ | ||
326 | int ext2_new_block(struct inode *inode, unsigned long goal, | ||
327 | u32 *prealloc_count, u32 *prealloc_block, int *err) | ||
328 | { | ||
329 | struct buffer_head *bitmap_bh = NULL; | ||
330 | struct buffer_head *gdp_bh; /* bh2 */ | ||
331 | struct ext2_group_desc *desc; | ||
332 | int group_no; /* i */ | ||
333 | int ret_block; /* j */ | ||
334 | int group_idx; /* k */ | ||
335 | int target_block; /* tmp */ | ||
336 | int block = 0; | ||
337 | struct super_block *sb = inode->i_sb; | ||
338 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
339 | struct ext2_super_block *es = sbi->s_es; | ||
340 | unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb); | ||
341 | unsigned prealloc_goal = es->s_prealloc_blocks; | ||
342 | unsigned group_alloc = 0, es_alloc, dq_alloc; | ||
343 | int nr_scanned_groups; | ||
344 | |||
345 | if (!prealloc_goal--) | ||
346 | prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1; | ||
347 | if (!prealloc_count || *prealloc_count) | ||
348 | prealloc_goal = 0; | ||
349 | |||
350 | if (DQUOT_ALLOC_BLOCK(inode, 1)) { | ||
351 | *err = -EDQUOT; | ||
352 | goto out; | ||
353 | } | ||
354 | |||
355 | while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal)) | ||
356 | prealloc_goal--; | ||
357 | |||
358 | dq_alloc = prealloc_goal + 1; | ||
359 | es_alloc = reserve_blocks(sb, dq_alloc); | ||
360 | if (!es_alloc) { | ||
361 | *err = -ENOSPC; | ||
362 | goto out_dquot; | ||
363 | } | ||
364 | |||
365 | ext2_debug ("goal=%lu.\n", goal); | ||
366 | |||
367 | if (goal < le32_to_cpu(es->s_first_data_block) || | ||
368 | goal >= le32_to_cpu(es->s_blocks_count)) | ||
369 | goal = le32_to_cpu(es->s_first_data_block); | ||
370 | group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size; | ||
371 | desc = ext2_get_group_desc (sb, group_no, &gdp_bh); | ||
372 | if (!desc) { | ||
373 | /* | ||
374 | * gdp_bh may still be uninitialised. But group_release_blocks | ||
375 | * will not touch it because group_alloc is zero. | ||
376 | */ | ||
377 | goto io_error; | ||
378 | } | ||
379 | |||
380 | group_alloc = group_reserve_blocks(sbi, group_no, desc, | ||
381 | gdp_bh, es_alloc); | ||
382 | if (group_alloc) { | ||
383 | ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % | ||
384 | group_size); | ||
385 | brelse(bitmap_bh); | ||
386 | bitmap_bh = read_block_bitmap(sb, group_no); | ||
387 | if (!bitmap_bh) | ||
388 | goto io_error; | ||
389 | |||
390 | ext2_debug("goal is at %d:%d.\n", group_no, ret_block); | ||
391 | |||
392 | ret_block = grab_block(sb_bgl_lock(sbi, group_no), | ||
393 | bitmap_bh->b_data, group_size, ret_block); | ||
394 | if (ret_block >= 0) | ||
395 | goto got_block; | ||
396 | group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); | ||
397 | group_alloc = 0; | ||
398 | } | ||
399 | |||
400 | ext2_debug ("Bit not found in block group %d.\n", group_no); | ||
401 | |||
402 | /* | ||
403 | * Now search the rest of the groups. We assume that | ||
404 | * i and desc correctly point to the last group visited. | ||
405 | */ | ||
406 | nr_scanned_groups = 0; | ||
407 | retry: | ||
408 | for (group_idx = 0; !group_alloc && | ||
409 | group_idx < sbi->s_groups_count; group_idx++) { | ||
410 | group_no++; | ||
411 | if (group_no >= sbi->s_groups_count) | ||
412 | group_no = 0; | ||
413 | desc = ext2_get_group_desc(sb, group_no, &gdp_bh); | ||
414 | if (!desc) | ||
415 | goto io_error; | ||
416 | group_alloc = group_reserve_blocks(sbi, group_no, desc, | ||
417 | gdp_bh, es_alloc); | ||
418 | } | ||
419 | if (!group_alloc) { | ||
420 | *err = -ENOSPC; | ||
421 | goto out_release; | ||
422 | } | ||
423 | brelse(bitmap_bh); | ||
424 | bitmap_bh = read_block_bitmap(sb, group_no); | ||
425 | if (!bitmap_bh) | ||
426 | goto io_error; | ||
427 | |||
428 | ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data, | ||
429 | group_size, 0); | ||
430 | if (ret_block < 0) { | ||
431 | /* | ||
432 | * If a free block counter is corrupted we can loop inifintely. | ||
433 | * Detect that here. | ||
434 | */ | ||
435 | nr_scanned_groups++; | ||
436 | if (nr_scanned_groups > 2 * sbi->s_groups_count) { | ||
437 | ext2_error(sb, "ext2_new_block", | ||
438 | "corrupted free blocks counters"); | ||
439 | goto io_error; | ||
440 | } | ||
441 | /* | ||
442 | * Someone else grabbed the last free block in this blockgroup | ||
443 | * before us. Retry the scan. | ||
444 | */ | ||
445 | group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); | ||
446 | group_alloc = 0; | ||
447 | goto retry; | ||
448 | } | ||
449 | |||
450 | got_block: | ||
451 | ext2_debug("using block group %d(%d)\n", | ||
452 | group_no, desc->bg_free_blocks_count); | ||
453 | |||
454 | target_block = ret_block + group_no * group_size + | ||
455 | le32_to_cpu(es->s_first_data_block); | ||
456 | |||
457 | if (target_block == le32_to_cpu(desc->bg_block_bitmap) || | ||
458 | target_block == le32_to_cpu(desc->bg_inode_bitmap) || | ||
459 | in_range(target_block, le32_to_cpu(desc->bg_inode_table), | ||
460 | sbi->s_itb_per_group)) | ||
461 | ext2_error (sb, "ext2_new_block", | ||
462 | "Allocating block in system zone - " | ||
463 | "block = %u", target_block); | ||
464 | |||
465 | if (target_block >= le32_to_cpu(es->s_blocks_count)) { | ||
466 | ext2_error (sb, "ext2_new_block", | ||
467 | "block(%d) >= blocks count(%d) - " | ||
468 | "block_group = %d, es == %p ", ret_block, | ||
469 | le32_to_cpu(es->s_blocks_count), group_no, es); | ||
470 | goto io_error; | ||
471 | } | ||
472 | block = target_block; | ||
473 | |||
474 | /* OK, we _had_ allocated something */ | ||
475 | ext2_debug("found bit %d\n", ret_block); | ||
476 | |||
477 | dq_alloc--; | ||
478 | es_alloc--; | ||
479 | group_alloc--; | ||
480 | |||
481 | /* | ||
482 | * Do block preallocation now if required. | ||
483 | */ | ||
484 | write_lock(&EXT2_I(inode)->i_meta_lock); | ||
485 | if (group_alloc && !*prealloc_count) { | ||
486 | unsigned n; | ||
487 | |||
488 | for (n = 0; n < group_alloc && ++ret_block < group_size; n++) { | ||
489 | if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no), | ||
490 | ret_block, | ||
491 | (void*) bitmap_bh->b_data)) | ||
492 | break; | ||
493 | } | ||
494 | *prealloc_block = block + 1; | ||
495 | *prealloc_count = n; | ||
496 | es_alloc -= n; | ||
497 | dq_alloc -= n; | ||
498 | group_alloc -= n; | ||
499 | } | ||
500 | write_unlock(&EXT2_I(inode)->i_meta_lock); | ||
501 | |||
502 | mark_buffer_dirty(bitmap_bh); | ||
503 | if (sb->s_flags & MS_SYNCHRONOUS) | ||
504 | sync_dirty_buffer(bitmap_bh); | ||
505 | |||
506 | ext2_debug ("allocating block %d. ", block); | ||
507 | |||
508 | *err = 0; | ||
509 | out_release: | ||
510 | group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); | ||
511 | release_blocks(sb, es_alloc); | ||
512 | out_dquot: | ||
513 | DQUOT_FREE_BLOCK(inode, dq_alloc); | ||
514 | out: | ||
515 | brelse(bitmap_bh); | ||
516 | return block; | ||
517 | |||
518 | io_error: | ||
519 | *err = -EIO; | ||
520 | goto out_release; | ||
521 | } | ||
522 | |||
523 | unsigned long ext2_count_free_blocks (struct super_block * sb) | ||
524 | { | ||
525 | struct ext2_group_desc * desc; | ||
526 | unsigned long desc_count = 0; | ||
527 | int i; | ||
528 | #ifdef EXT2FS_DEBUG | ||
529 | unsigned long bitmap_count, x; | ||
530 | struct ext2_super_block *es; | ||
531 | |||
532 | lock_super (sb); | ||
533 | es = EXT2_SB(sb)->s_es; | ||
534 | desc_count = 0; | ||
535 | bitmap_count = 0; | ||
536 | desc = NULL; | ||
537 | for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { | ||
538 | struct buffer_head *bitmap_bh; | ||
539 | desc = ext2_get_group_desc (sb, i, NULL); | ||
540 | if (!desc) | ||
541 | continue; | ||
542 | desc_count += le16_to_cpu(desc->bg_free_blocks_count); | ||
543 | bitmap_bh = read_block_bitmap(sb, i); | ||
544 | if (!bitmap_bh) | ||
545 | continue; | ||
546 | |||
547 | x = ext2_count_free(bitmap_bh, sb->s_blocksize); | ||
548 | printk ("group %d: stored = %d, counted = %lu\n", | ||
549 | i, le16_to_cpu(desc->bg_free_blocks_count), x); | ||
550 | bitmap_count += x; | ||
551 | brelse(bitmap_bh); | ||
552 | } | ||
553 | printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n", | ||
554 | (long)le32_to_cpu(es->s_free_blocks_count), | ||
555 | desc_count, bitmap_count); | ||
556 | unlock_super (sb); | ||
557 | return bitmap_count; | ||
558 | #else | ||
559 | for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { | ||
560 | desc = ext2_get_group_desc (sb, i, NULL); | ||
561 | if (!desc) | ||
562 | continue; | ||
563 | desc_count += le16_to_cpu(desc->bg_free_blocks_count); | ||
564 | } | ||
565 | return desc_count; | ||
566 | #endif | ||
567 | } | ||
568 | |||
569 | static inline int | ||
570 | block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) | ||
571 | { | ||
572 | return ext2_test_bit ((block - | ||
573 | le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) % | ||
574 | EXT2_BLOCKS_PER_GROUP(sb), map); | ||
575 | } | ||
576 | |||
577 | static inline int test_root(int a, int b) | ||
578 | { | ||
579 | int num = b; | ||
580 | |||
581 | while (a > num) | ||
582 | num *= b; | ||
583 | return num == a; | ||
584 | } | ||
585 | |||
586 | static int ext2_group_sparse(int group) | ||
587 | { | ||
588 | if (group <= 1) | ||
589 | return 1; | ||
590 | return (test_root(group, 3) || test_root(group, 5) || | ||
591 | test_root(group, 7)); | ||
592 | } | ||
593 | |||
594 | /** | ||
595 | * ext2_bg_has_super - number of blocks used by the superblock in group | ||
596 | * @sb: superblock for filesystem | ||
597 | * @group: group number to check | ||
598 | * | ||
599 | * Return the number of blocks used by the superblock (primary or backup) | ||
600 | * in this group. Currently this will be only 0 or 1. | ||
601 | */ | ||
602 | int ext2_bg_has_super(struct super_block *sb, int group) | ||
603 | { | ||
604 | if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& | ||
605 | !ext2_group_sparse(group)) | ||
606 | return 0; | ||
607 | return 1; | ||
608 | } | ||
609 | |||
610 | /** | ||
611 | * ext2_bg_num_gdb - number of blocks used by the group table in group | ||
612 | * @sb: superblock for filesystem | ||
613 | * @group: group number to check | ||
614 | * | ||
615 | * Return the number of blocks used by the group descriptor table | ||
616 | * (primary or backup) in this group. In the future there may be a | ||
617 | * different number of descriptor blocks in each group. | ||
618 | */ | ||
619 | unsigned long ext2_bg_num_gdb(struct super_block *sb, int group) | ||
620 | { | ||
621 | if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& | ||
622 | !ext2_group_sparse(group)) | ||
623 | return 0; | ||
624 | return EXT2_SB(sb)->s_gdb_count; | ||
625 | } | ||
626 | |||
627 | #ifdef CONFIG_EXT2_CHECK | ||
628 | /* Called at mount-time, super-block is locked */ | ||
629 | void ext2_check_blocks_bitmap (struct super_block * sb) | ||
630 | { | ||
631 | struct buffer_head *bitmap_bh = NULL; | ||
632 | struct ext2_super_block * es; | ||
633 | unsigned long desc_count, bitmap_count, x, j; | ||
634 | unsigned long desc_blocks; | ||
635 | struct ext2_group_desc * desc; | ||
636 | int i; | ||
637 | |||
638 | es = EXT2_SB(sb)->s_es; | ||
639 | desc_count = 0; | ||
640 | bitmap_count = 0; | ||
641 | desc = NULL; | ||
642 | for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { | ||
643 | desc = ext2_get_group_desc (sb, i, NULL); | ||
644 | if (!desc) | ||
645 | continue; | ||
646 | desc_count += le16_to_cpu(desc->bg_free_blocks_count); | ||
647 | brelse(bitmap_bh); | ||
648 | bitmap_bh = read_block_bitmap(sb, i); | ||
649 | if (!bitmap_bh) | ||
650 | continue; | ||
651 | |||
652 | if (ext2_bg_has_super(sb, i) && | ||
653 | !ext2_test_bit(0, bitmap_bh->b_data)) | ||
654 | ext2_error(sb, __FUNCTION__, | ||
655 | "Superblock in group %d is marked free", i); | ||
656 | |||
657 | desc_blocks = ext2_bg_num_gdb(sb, i); | ||
658 | for (j = 0; j < desc_blocks; j++) | ||
659 | if (!ext2_test_bit(j + 1, bitmap_bh->b_data)) | ||
660 | ext2_error(sb, __FUNCTION__, | ||
661 | "Descriptor block #%ld in group " | ||
662 | "%d is marked free", j, i); | ||
663 | |||
664 | if (!block_in_use(le32_to_cpu(desc->bg_block_bitmap), | ||
665 | sb, bitmap_bh->b_data)) | ||
666 | ext2_error(sb, "ext2_check_blocks_bitmap", | ||
667 | "Block bitmap for group %d is marked free", | ||
668 | i); | ||
669 | |||
670 | if (!block_in_use(le32_to_cpu(desc->bg_inode_bitmap), | ||
671 | sb, bitmap_bh->b_data)) | ||
672 | ext2_error(sb, "ext2_check_blocks_bitmap", | ||
673 | "Inode bitmap for group %d is marked free", | ||
674 | i); | ||
675 | |||
676 | for (j = 0; j < EXT2_SB(sb)->s_itb_per_group; j++) | ||
677 | if (!block_in_use(le32_to_cpu(desc->bg_inode_table) + j, | ||
678 | sb, bitmap_bh->b_data)) | ||
679 | ext2_error (sb, "ext2_check_blocks_bitmap", | ||
680 | "Block #%ld of the inode table in " | ||
681 | "group %d is marked free", j, i); | ||
682 | |||
683 | x = ext2_count_free(bitmap_bh, sb->s_blocksize); | ||
684 | if (le16_to_cpu(desc->bg_free_blocks_count) != x) | ||
685 | ext2_error (sb, "ext2_check_blocks_bitmap", | ||
686 | "Wrong free blocks count for group %d, " | ||
687 | "stored = %d, counted = %lu", i, | ||
688 | le16_to_cpu(desc->bg_free_blocks_count), x); | ||
689 | bitmap_count += x; | ||
690 | } | ||
691 | if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count) | ||
692 | ext2_error (sb, "ext2_check_blocks_bitmap", | ||
693 | "Wrong free blocks count in super block, " | ||
694 | "stored = %lu, counted = %lu", | ||
695 | (unsigned long)le32_to_cpu(es->s_free_blocks_count), | ||
696 | bitmap_count); | ||
697 | brelse(bitmap_bh); | ||
698 | } | ||
699 | #endif | ||
diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c new file mode 100644 index 000000000000..20145b74623f --- /dev/null +++ b/fs/ext2/bitmap.c | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/bitmap.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | */ | ||
9 | |||
10 | #include <linux/buffer_head.h> | ||
11 | |||
12 | static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; | ||
13 | |||
14 | unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) | ||
15 | { | ||
16 | unsigned int i; | ||
17 | unsigned long sum = 0; | ||
18 | |||
19 | if (!map) | ||
20 | return (0); | ||
21 | for (i = 0; i < numchars; i++) | ||
22 | sum += nibblemap[map->b_data[i] & 0xf] + | ||
23 | nibblemap[(map->b_data[i] >> 4) & 0xf]; | ||
24 | return (sum); | ||
25 | } | ||
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c new file mode 100644 index 000000000000..5b5f52876b42 --- /dev/null +++ b/fs/ext2/dir.c | |||
@@ -0,0 +1,673 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/dir.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/fs/minix/dir.c | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | * | ||
15 | * ext2 directory handling functions | ||
16 | * | ||
17 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
19 | * | ||
20 | * All code that works with directory layout had been switched to pagecache | ||
21 | * and moved here. AV | ||
22 | */ | ||
23 | |||
24 | #include "ext2.h" | ||
25 | #include <linux/pagemap.h> | ||
26 | #include <linux/smp_lock.h> | ||
27 | |||
28 | typedef struct ext2_dir_entry_2 ext2_dirent; | ||
29 | |||
30 | /* | ||
31 | * ext2 uses block-sized chunks. Arguably, sector-sized ones would be | ||
32 | * more robust, but we have what we have | ||
33 | */ | ||
34 | static inline unsigned ext2_chunk_size(struct inode *inode) | ||
35 | { | ||
36 | return inode->i_sb->s_blocksize; | ||
37 | } | ||
38 | |||
39 | static inline void ext2_put_page(struct page *page) | ||
40 | { | ||
41 | kunmap(page); | ||
42 | page_cache_release(page); | ||
43 | } | ||
44 | |||
45 | static inline unsigned long dir_pages(struct inode *inode) | ||
46 | { | ||
47 | return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; | ||
48 | } | ||
49 | |||
50 | /* | ||
51 | * Return the offset into page `page_nr' of the last valid | ||
52 | * byte in that page, plus one. | ||
53 | */ | ||
54 | static unsigned | ||
55 | ext2_last_byte(struct inode *inode, unsigned long page_nr) | ||
56 | { | ||
57 | unsigned last_byte = inode->i_size; | ||
58 | |||
59 | last_byte -= page_nr << PAGE_CACHE_SHIFT; | ||
60 | if (last_byte > PAGE_CACHE_SIZE) | ||
61 | last_byte = PAGE_CACHE_SIZE; | ||
62 | return last_byte; | ||
63 | } | ||
64 | |||
65 | static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) | ||
66 | { | ||
67 | struct inode *dir = page->mapping->host; | ||
68 | int err = 0; | ||
69 | dir->i_version++; | ||
70 | page->mapping->a_ops->commit_write(NULL, page, from, to); | ||
71 | if (IS_DIRSYNC(dir)) | ||
72 | err = write_one_page(page, 1); | ||
73 | else | ||
74 | unlock_page(page); | ||
75 | return err; | ||
76 | } | ||
77 | |||
78 | static void ext2_check_page(struct page *page) | ||
79 | { | ||
80 | struct inode *dir = page->mapping->host; | ||
81 | struct super_block *sb = dir->i_sb; | ||
82 | unsigned chunk_size = ext2_chunk_size(dir); | ||
83 | char *kaddr = page_address(page); | ||
84 | u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count); | ||
85 | unsigned offs, rec_len; | ||
86 | unsigned limit = PAGE_CACHE_SIZE; | ||
87 | ext2_dirent *p; | ||
88 | char *error; | ||
89 | |||
90 | if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { | ||
91 | limit = dir->i_size & ~PAGE_CACHE_MASK; | ||
92 | if (limit & (chunk_size - 1)) | ||
93 | goto Ebadsize; | ||
94 | if (!limit) | ||
95 | goto out; | ||
96 | } | ||
97 | for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { | ||
98 | p = (ext2_dirent *)(kaddr + offs); | ||
99 | rec_len = le16_to_cpu(p->rec_len); | ||
100 | |||
101 | if (rec_len < EXT2_DIR_REC_LEN(1)) | ||
102 | goto Eshort; | ||
103 | if (rec_len & 3) | ||
104 | goto Ealign; | ||
105 | if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) | ||
106 | goto Enamelen; | ||
107 | if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) | ||
108 | goto Espan; | ||
109 | if (le32_to_cpu(p->inode) > max_inumber) | ||
110 | goto Einumber; | ||
111 | } | ||
112 | if (offs != limit) | ||
113 | goto Eend; | ||
114 | out: | ||
115 | SetPageChecked(page); | ||
116 | return; | ||
117 | |||
118 | /* Too bad, we had an error */ | ||
119 | |||
120 | Ebadsize: | ||
121 | ext2_error(sb, "ext2_check_page", | ||
122 | "size of directory #%lu is not a multiple of chunk size", | ||
123 | dir->i_ino | ||
124 | ); | ||
125 | goto fail; | ||
126 | Eshort: | ||
127 | error = "rec_len is smaller than minimal"; | ||
128 | goto bad_entry; | ||
129 | Ealign: | ||
130 | error = "unaligned directory entry"; | ||
131 | goto bad_entry; | ||
132 | Enamelen: | ||
133 | error = "rec_len is too small for name_len"; | ||
134 | goto bad_entry; | ||
135 | Espan: | ||
136 | error = "directory entry across blocks"; | ||
137 | goto bad_entry; | ||
138 | Einumber: | ||
139 | error = "inode out of bounds"; | ||
140 | bad_entry: | ||
141 | ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " | ||
142 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | ||
143 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
144 | (unsigned long) le32_to_cpu(p->inode), | ||
145 | rec_len, p->name_len); | ||
146 | goto fail; | ||
147 | Eend: | ||
148 | p = (ext2_dirent *)(kaddr + offs); | ||
149 | ext2_error (sb, "ext2_check_page", | ||
150 | "entry in directory #%lu spans the page boundary" | ||
151 | "offset=%lu, inode=%lu", | ||
152 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
153 | (unsigned long) le32_to_cpu(p->inode)); | ||
154 | fail: | ||
155 | SetPageChecked(page); | ||
156 | SetPageError(page); | ||
157 | } | ||
158 | |||
159 | static struct page * ext2_get_page(struct inode *dir, unsigned long n) | ||
160 | { | ||
161 | struct address_space *mapping = dir->i_mapping; | ||
162 | struct page *page = read_cache_page(mapping, n, | ||
163 | (filler_t*)mapping->a_ops->readpage, NULL); | ||
164 | if (!IS_ERR(page)) { | ||
165 | wait_on_page_locked(page); | ||
166 | kmap(page); | ||
167 | if (!PageUptodate(page)) | ||
168 | goto fail; | ||
169 | if (!PageChecked(page)) | ||
170 | ext2_check_page(page); | ||
171 | if (PageError(page)) | ||
172 | goto fail; | ||
173 | } | ||
174 | return page; | ||
175 | |||
176 | fail: | ||
177 | ext2_put_page(page); | ||
178 | return ERR_PTR(-EIO); | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. | ||
183 | * | ||
184 | * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller. | ||
185 | */ | ||
186 | static inline int ext2_match (int len, const char * const name, | ||
187 | struct ext2_dir_entry_2 * de) | ||
188 | { | ||
189 | if (len != de->name_len) | ||
190 | return 0; | ||
191 | if (!de->inode) | ||
192 | return 0; | ||
193 | return !memcmp(name, de->name, len); | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * p is at least 6 bytes before the end of page | ||
198 | */ | ||
199 | static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) | ||
200 | { | ||
201 | return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); | ||
202 | } | ||
203 | |||
204 | static inline unsigned | ||
205 | ext2_validate_entry(char *base, unsigned offset, unsigned mask) | ||
206 | { | ||
207 | ext2_dirent *de = (ext2_dirent*)(base + offset); | ||
208 | ext2_dirent *p = (ext2_dirent*)(base + (offset&mask)); | ||
209 | while ((char*)p < (char*)de) { | ||
210 | if (p->rec_len == 0) | ||
211 | break; | ||
212 | p = ext2_next_entry(p); | ||
213 | } | ||
214 | return (char *)p - base; | ||
215 | } | ||
216 | |||
217 | static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { | ||
218 | [EXT2_FT_UNKNOWN] = DT_UNKNOWN, | ||
219 | [EXT2_FT_REG_FILE] = DT_REG, | ||
220 | [EXT2_FT_DIR] = DT_DIR, | ||
221 | [EXT2_FT_CHRDEV] = DT_CHR, | ||
222 | [EXT2_FT_BLKDEV] = DT_BLK, | ||
223 | [EXT2_FT_FIFO] = DT_FIFO, | ||
224 | [EXT2_FT_SOCK] = DT_SOCK, | ||
225 | [EXT2_FT_SYMLINK] = DT_LNK, | ||
226 | }; | ||
227 | |||
228 | #define S_SHIFT 12 | ||
229 | static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = { | ||
230 | [S_IFREG >> S_SHIFT] = EXT2_FT_REG_FILE, | ||
231 | [S_IFDIR >> S_SHIFT] = EXT2_FT_DIR, | ||
232 | [S_IFCHR >> S_SHIFT] = EXT2_FT_CHRDEV, | ||
233 | [S_IFBLK >> S_SHIFT] = EXT2_FT_BLKDEV, | ||
234 | [S_IFIFO >> S_SHIFT] = EXT2_FT_FIFO, | ||
235 | [S_IFSOCK >> S_SHIFT] = EXT2_FT_SOCK, | ||
236 | [S_IFLNK >> S_SHIFT] = EXT2_FT_SYMLINK, | ||
237 | }; | ||
238 | |||
239 | static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) | ||
240 | { | ||
241 | mode_t mode = inode->i_mode; | ||
242 | if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) | ||
243 | de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; | ||
244 | else | ||
245 | de->file_type = 0; | ||
246 | } | ||
247 | |||
248 | static int | ||
249 | ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | ||
250 | { | ||
251 | loff_t pos = filp->f_pos; | ||
252 | struct inode *inode = filp->f_dentry->d_inode; | ||
253 | struct super_block *sb = inode->i_sb; | ||
254 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | ||
255 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
256 | unsigned long npages = dir_pages(inode); | ||
257 | unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); | ||
258 | unsigned char *types = NULL; | ||
259 | int need_revalidate = (filp->f_version != inode->i_version); | ||
260 | int ret; | ||
261 | |||
262 | if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) | ||
263 | goto success; | ||
264 | |||
265 | if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) | ||
266 | types = ext2_filetype_table; | ||
267 | |||
268 | for ( ; n < npages; n++, offset = 0) { | ||
269 | char *kaddr, *limit; | ||
270 | ext2_dirent *de; | ||
271 | struct page *page = ext2_get_page(inode, n); | ||
272 | |||
273 | if (IS_ERR(page)) { | ||
274 | ext2_error(sb, __FUNCTION__, | ||
275 | "bad page in #%lu", | ||
276 | inode->i_ino); | ||
277 | filp->f_pos += PAGE_CACHE_SIZE - offset; | ||
278 | ret = -EIO; | ||
279 | goto done; | ||
280 | } | ||
281 | kaddr = page_address(page); | ||
282 | if (need_revalidate) { | ||
283 | offset = ext2_validate_entry(kaddr, offset, chunk_mask); | ||
284 | need_revalidate = 0; | ||
285 | } | ||
286 | de = (ext2_dirent *)(kaddr+offset); | ||
287 | limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); | ||
288 | for ( ;(char*)de <= limit; de = ext2_next_entry(de)) { | ||
289 | if (de->rec_len == 0) { | ||
290 | ext2_error(sb, __FUNCTION__, | ||
291 | "zero-length directory entry"); | ||
292 | ret = -EIO; | ||
293 | ext2_put_page(page); | ||
294 | goto done; | ||
295 | } | ||
296 | if (de->inode) { | ||
297 | int over; | ||
298 | unsigned char d_type = DT_UNKNOWN; | ||
299 | |||
300 | if (types && de->file_type < EXT2_FT_MAX) | ||
301 | d_type = types[de->file_type]; | ||
302 | |||
303 | offset = (char *)de - kaddr; | ||
304 | over = filldir(dirent, de->name, de->name_len, | ||
305 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
306 | le32_to_cpu(de->inode), d_type); | ||
307 | if (over) { | ||
308 | ext2_put_page(page); | ||
309 | goto success; | ||
310 | } | ||
311 | } | ||
312 | filp->f_pos += le16_to_cpu(de->rec_len); | ||
313 | } | ||
314 | ext2_put_page(page); | ||
315 | } | ||
316 | |||
317 | success: | ||
318 | ret = 0; | ||
319 | done: | ||
320 | filp->f_version = inode->i_version; | ||
321 | return ret; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * ext2_find_entry() | ||
326 | * | ||
327 | * finds an entry in the specified directory with the wanted name. It | ||
328 | * returns the page in which the entry was found, and the entry itself | ||
329 | * (as a parameter - res_dir). Page is returned mapped and unlocked. | ||
330 | * Entry is guaranteed to be valid. | ||
331 | */ | ||
332 | struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, | ||
333 | struct dentry *dentry, struct page ** res_page) | ||
334 | { | ||
335 | const char *name = dentry->d_name.name; | ||
336 | int namelen = dentry->d_name.len; | ||
337 | unsigned reclen = EXT2_DIR_REC_LEN(namelen); | ||
338 | unsigned long start, n; | ||
339 | unsigned long npages = dir_pages(dir); | ||
340 | struct page *page = NULL; | ||
341 | struct ext2_inode_info *ei = EXT2_I(dir); | ||
342 | ext2_dirent * de; | ||
343 | |||
344 | if (npages == 0) | ||
345 | goto out; | ||
346 | |||
347 | /* OFFSET_CACHE */ | ||
348 | *res_page = NULL; | ||
349 | |||
350 | start = ei->i_dir_start_lookup; | ||
351 | if (start >= npages) | ||
352 | start = 0; | ||
353 | n = start; | ||
354 | do { | ||
355 | char *kaddr; | ||
356 | page = ext2_get_page(dir, n); | ||
357 | if (!IS_ERR(page)) { | ||
358 | kaddr = page_address(page); | ||
359 | de = (ext2_dirent *) kaddr; | ||
360 | kaddr += ext2_last_byte(dir, n) - reclen; | ||
361 | while ((char *) de <= kaddr) { | ||
362 | if (de->rec_len == 0) { | ||
363 | ext2_error(dir->i_sb, __FUNCTION__, | ||
364 | "zero-length directory entry"); | ||
365 | ext2_put_page(page); | ||
366 | goto out; | ||
367 | } | ||
368 | if (ext2_match (namelen, name, de)) | ||
369 | goto found; | ||
370 | de = ext2_next_entry(de); | ||
371 | } | ||
372 | ext2_put_page(page); | ||
373 | } | ||
374 | if (++n >= npages) | ||
375 | n = 0; | ||
376 | } while (n != start); | ||
377 | out: | ||
378 | return NULL; | ||
379 | |||
380 | found: | ||
381 | *res_page = page; | ||
382 | ei->i_dir_start_lookup = n; | ||
383 | return de; | ||
384 | } | ||
385 | |||
386 | struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) | ||
387 | { | ||
388 | struct page *page = ext2_get_page(dir, 0); | ||
389 | ext2_dirent *de = NULL; | ||
390 | |||
391 | if (!IS_ERR(page)) { | ||
392 | de = ext2_next_entry((ext2_dirent *) page_address(page)); | ||
393 | *p = page; | ||
394 | } | ||
395 | return de; | ||
396 | } | ||
397 | |||
398 | ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) | ||
399 | { | ||
400 | ino_t res = 0; | ||
401 | struct ext2_dir_entry_2 * de; | ||
402 | struct page *page; | ||
403 | |||
404 | de = ext2_find_entry (dir, dentry, &page); | ||
405 | if (de) { | ||
406 | res = le32_to_cpu(de->inode); | ||
407 | kunmap(page); | ||
408 | page_cache_release(page); | ||
409 | } | ||
410 | return res; | ||
411 | } | ||
412 | |||
413 | /* Releases the page */ | ||
414 | void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, | ||
415 | struct page *page, struct inode *inode) | ||
416 | { | ||
417 | unsigned from = (char *) de - (char *) page_address(page); | ||
418 | unsigned to = from + le16_to_cpu(de->rec_len); | ||
419 | int err; | ||
420 | |||
421 | lock_page(page); | ||
422 | err = page->mapping->a_ops->prepare_write(NULL, page, from, to); | ||
423 | if (err) | ||
424 | BUG(); | ||
425 | de->inode = cpu_to_le32(inode->i_ino); | ||
426 | ext2_set_de_type (de, inode); | ||
427 | err = ext2_commit_chunk(page, from, to); | ||
428 | ext2_put_page(page); | ||
429 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; | ||
430 | EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; | ||
431 | mark_inode_dirty(dir); | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * Parent is locked. | ||
436 | */ | ||
437 | int ext2_add_link (struct dentry *dentry, struct inode *inode) | ||
438 | { | ||
439 | struct inode *dir = dentry->d_parent->d_inode; | ||
440 | const char *name = dentry->d_name.name; | ||
441 | int namelen = dentry->d_name.len; | ||
442 | unsigned chunk_size = ext2_chunk_size(dir); | ||
443 | unsigned reclen = EXT2_DIR_REC_LEN(namelen); | ||
444 | unsigned short rec_len, name_len; | ||
445 | struct page *page = NULL; | ||
446 | ext2_dirent * de; | ||
447 | unsigned long npages = dir_pages(dir); | ||
448 | unsigned long n; | ||
449 | char *kaddr; | ||
450 | unsigned from, to; | ||
451 | int err; | ||
452 | |||
453 | /* | ||
454 | * We take care of directory expansion in the same loop. | ||
455 | * This code plays outside i_size, so it locks the page | ||
456 | * to protect that region. | ||
457 | */ | ||
458 | for (n = 0; n <= npages; n++) { | ||
459 | char *dir_end; | ||
460 | |||
461 | page = ext2_get_page(dir, n); | ||
462 | err = PTR_ERR(page); | ||
463 | if (IS_ERR(page)) | ||
464 | goto out; | ||
465 | lock_page(page); | ||
466 | kaddr = page_address(page); | ||
467 | dir_end = kaddr + ext2_last_byte(dir, n); | ||
468 | de = (ext2_dirent *)kaddr; | ||
469 | kaddr += PAGE_CACHE_SIZE - reclen; | ||
470 | while ((char *)de <= kaddr) { | ||
471 | if ((char *)de == dir_end) { | ||
472 | /* We hit i_size */ | ||
473 | name_len = 0; | ||
474 | rec_len = chunk_size; | ||
475 | de->rec_len = cpu_to_le16(chunk_size); | ||
476 | de->inode = 0; | ||
477 | goto got_it; | ||
478 | } | ||
479 | if (de->rec_len == 0) { | ||
480 | ext2_error(dir->i_sb, __FUNCTION__, | ||
481 | "zero-length directory entry"); | ||
482 | err = -EIO; | ||
483 | goto out_unlock; | ||
484 | } | ||
485 | err = -EEXIST; | ||
486 | if (ext2_match (namelen, name, de)) | ||
487 | goto out_unlock; | ||
488 | name_len = EXT2_DIR_REC_LEN(de->name_len); | ||
489 | rec_len = le16_to_cpu(de->rec_len); | ||
490 | if (!de->inode && rec_len >= reclen) | ||
491 | goto got_it; | ||
492 | if (rec_len >= name_len + reclen) | ||
493 | goto got_it; | ||
494 | de = (ext2_dirent *) ((char *) de + rec_len); | ||
495 | } | ||
496 | unlock_page(page); | ||
497 | ext2_put_page(page); | ||
498 | } | ||
499 | BUG(); | ||
500 | return -EINVAL; | ||
501 | |||
502 | got_it: | ||
503 | from = (char*)de - (char*)page_address(page); | ||
504 | to = from + rec_len; | ||
505 | err = page->mapping->a_ops->prepare_write(NULL, page, from, to); | ||
506 | if (err) | ||
507 | goto out_unlock; | ||
508 | if (de->inode) { | ||
509 | ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); | ||
510 | de1->rec_len = cpu_to_le16(rec_len - name_len); | ||
511 | de->rec_len = cpu_to_le16(name_len); | ||
512 | de = de1; | ||
513 | } | ||
514 | de->name_len = namelen; | ||
515 | memcpy (de->name, name, namelen); | ||
516 | de->inode = cpu_to_le32(inode->i_ino); | ||
517 | ext2_set_de_type (de, inode); | ||
518 | err = ext2_commit_chunk(page, from, to); | ||
519 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; | ||
520 | EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; | ||
521 | mark_inode_dirty(dir); | ||
522 | /* OFFSET_CACHE */ | ||
523 | out_put: | ||
524 | ext2_put_page(page); | ||
525 | out: | ||
526 | return err; | ||
527 | out_unlock: | ||
528 | unlock_page(page); | ||
529 | goto out_put; | ||
530 | } | ||
531 | |||
532 | /* | ||
533 | * ext2_delete_entry deletes a directory entry by merging it with the | ||
534 | * previous entry. Page is up-to-date. Releases the page. | ||
535 | */ | ||
536 | int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) | ||
537 | { | ||
538 | struct address_space *mapping = page->mapping; | ||
539 | struct inode *inode = mapping->host; | ||
540 | char *kaddr = page_address(page); | ||
541 | unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); | ||
542 | unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); | ||
543 | ext2_dirent * pde = NULL; | ||
544 | ext2_dirent * de = (ext2_dirent *) (kaddr + from); | ||
545 | int err; | ||
546 | |||
547 | while ((char*)de < (char*)dir) { | ||
548 | if (de->rec_len == 0) { | ||
549 | ext2_error(inode->i_sb, __FUNCTION__, | ||
550 | "zero-length directory entry"); | ||
551 | err = -EIO; | ||
552 | goto out; | ||
553 | } | ||
554 | pde = de; | ||
555 | de = ext2_next_entry(de); | ||
556 | } | ||
557 | if (pde) | ||
558 | from = (char*)pde - (char*)page_address(page); | ||
559 | lock_page(page); | ||
560 | err = mapping->a_ops->prepare_write(NULL, page, from, to); | ||
561 | if (err) | ||
562 | BUG(); | ||
563 | if (pde) | ||
564 | pde->rec_len = cpu_to_le16(to-from); | ||
565 | dir->inode = 0; | ||
566 | err = ext2_commit_chunk(page, from, to); | ||
567 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; | ||
568 | EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; | ||
569 | mark_inode_dirty(inode); | ||
570 | out: | ||
571 | ext2_put_page(page); | ||
572 | return err; | ||
573 | } | ||
574 | |||
575 | /* | ||
576 | * Set the first fragment of directory. | ||
577 | */ | ||
578 | int ext2_make_empty(struct inode *inode, struct inode *parent) | ||
579 | { | ||
580 | struct address_space *mapping = inode->i_mapping; | ||
581 | struct page *page = grab_cache_page(mapping, 0); | ||
582 | unsigned chunk_size = ext2_chunk_size(inode); | ||
583 | struct ext2_dir_entry_2 * de; | ||
584 | int err; | ||
585 | void *kaddr; | ||
586 | |||
587 | if (!page) | ||
588 | return -ENOMEM; | ||
589 | err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); | ||
590 | if (err) { | ||
591 | unlock_page(page); | ||
592 | goto fail; | ||
593 | } | ||
594 | kaddr = kmap_atomic(page, KM_USER0); | ||
595 | memset(kaddr, 0, chunk_size); | ||
596 | de = (struct ext2_dir_entry_2 *)kaddr; | ||
597 | de->name_len = 1; | ||
598 | de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); | ||
599 | memcpy (de->name, ".\0\0", 4); | ||
600 | de->inode = cpu_to_le32(inode->i_ino); | ||
601 | ext2_set_de_type (de, inode); | ||
602 | |||
603 | de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); | ||
604 | de->name_len = 2; | ||
605 | de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); | ||
606 | de->inode = cpu_to_le32(parent->i_ino); | ||
607 | memcpy (de->name, "..\0", 4); | ||
608 | ext2_set_de_type (de, inode); | ||
609 | kunmap_atomic(kaddr, KM_USER0); | ||
610 | err = ext2_commit_chunk(page, 0, chunk_size); | ||
611 | fail: | ||
612 | page_cache_release(page); | ||
613 | return err; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * routine to check that the specified directory is empty (for rmdir) | ||
618 | */ | ||
619 | int ext2_empty_dir (struct inode * inode) | ||
620 | { | ||
621 | struct page *page = NULL; | ||
622 | unsigned long i, npages = dir_pages(inode); | ||
623 | |||
624 | for (i = 0; i < npages; i++) { | ||
625 | char *kaddr; | ||
626 | ext2_dirent * de; | ||
627 | page = ext2_get_page(inode, i); | ||
628 | |||
629 | if (IS_ERR(page)) | ||
630 | continue; | ||
631 | |||
632 | kaddr = page_address(page); | ||
633 | de = (ext2_dirent *)kaddr; | ||
634 | kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1); | ||
635 | |||
636 | while ((char *)de <= kaddr) { | ||
637 | if (de->rec_len == 0) { | ||
638 | ext2_error(inode->i_sb, __FUNCTION__, | ||
639 | "zero-length directory entry"); | ||
640 | printk("kaddr=%p, de=%p\n", kaddr, de); | ||
641 | goto not_empty; | ||
642 | } | ||
643 | if (de->inode != 0) { | ||
644 | /* check for . and .. */ | ||
645 | if (de->name[0] != '.') | ||
646 | goto not_empty; | ||
647 | if (de->name_len > 2) | ||
648 | goto not_empty; | ||
649 | if (de->name_len < 2) { | ||
650 | if (de->inode != | ||
651 | cpu_to_le32(inode->i_ino)) | ||
652 | goto not_empty; | ||
653 | } else if (de->name[1] != '.') | ||
654 | goto not_empty; | ||
655 | } | ||
656 | de = ext2_next_entry(de); | ||
657 | } | ||
658 | ext2_put_page(page); | ||
659 | } | ||
660 | return 1; | ||
661 | |||
662 | not_empty: | ||
663 | ext2_put_page(page); | ||
664 | return 0; | ||
665 | } | ||
666 | |||
667 | struct file_operations ext2_dir_operations = { | ||
668 | .llseek = generic_file_llseek, | ||
669 | .read = generic_read_dir, | ||
670 | .readdir = ext2_readdir, | ||
671 | .ioctl = ext2_ioctl, | ||
672 | .fsync = ext2_sync_file, | ||
673 | }; | ||
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h new file mode 100644 index 000000000000..9f1a40e7945c --- /dev/null +++ b/fs/ext2/ext2.h | |||
@@ -0,0 +1,160 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/ext2_fs.h> | ||
3 | |||
4 | /* | ||
5 | * second extended file system inode data in memory | ||
6 | */ | ||
7 | struct ext2_inode_info { | ||
8 | __le32 i_data[15]; | ||
9 | __u32 i_flags; | ||
10 | __u32 i_faddr; | ||
11 | __u8 i_frag_no; | ||
12 | __u8 i_frag_size; | ||
13 | __u16 i_state; | ||
14 | __u32 i_file_acl; | ||
15 | __u32 i_dir_acl; | ||
16 | __u32 i_dtime; | ||
17 | |||
18 | /* | ||
19 | * i_block_group is the number of the block group which contains | ||
20 | * this file's inode. Constant across the lifetime of the inode, | ||
21 | * it is ued for making block allocation decisions - we try to | ||
22 | * place a file's data blocks near its inode block, and new inodes | ||
23 | * near to their parent directory's inode. | ||
24 | */ | ||
25 | __u32 i_block_group; | ||
26 | |||
27 | /* | ||
28 | * i_next_alloc_block is the logical (file-relative) number of the | ||
29 | * most-recently-allocated block in this file. Yes, it is misnamed. | ||
30 | * We use this for detecting linearly ascending allocation requests. | ||
31 | */ | ||
32 | __u32 i_next_alloc_block; | ||
33 | |||
34 | /* | ||
35 | * i_next_alloc_goal is the *physical* companion to i_next_alloc_block. | ||
36 | * it the the physical block number of the block which was most-recently | ||
37 | * allocated to this file. This give us the goal (target) for the next | ||
38 | * allocation when we detect linearly ascending requests. | ||
39 | */ | ||
40 | __u32 i_next_alloc_goal; | ||
41 | __u32 i_prealloc_block; | ||
42 | __u32 i_prealloc_count; | ||
43 | __u32 i_dir_start_lookup; | ||
44 | #ifdef CONFIG_EXT2_FS_XATTR | ||
45 | /* | ||
46 | * Extended attributes can be read independently of the main file | ||
47 | * data. Taking i_sem even when reading would cause contention | ||
48 | * between readers of EAs and writers of regular file data, so | ||
49 | * instead we synchronize on xattr_sem when reading or changing | ||
50 | * EAs. | ||
51 | */ | ||
52 | struct rw_semaphore xattr_sem; | ||
53 | #endif | ||
54 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | ||
55 | struct posix_acl *i_acl; | ||
56 | struct posix_acl *i_default_acl; | ||
57 | #endif | ||
58 | rwlock_t i_meta_lock; | ||
59 | struct inode vfs_inode; | ||
60 | }; | ||
61 | |||
62 | /* | ||
63 | * Inode dynamic state flags | ||
64 | */ | ||
65 | #define EXT2_STATE_NEW 0x00000001 /* inode is newly created */ | ||
66 | |||
67 | |||
68 | /* | ||
69 | * Function prototypes | ||
70 | */ | ||
71 | |||
72 | /* | ||
73 | * Ok, these declarations are also in <linux/kernel.h> but none of the | ||
74 | * ext2 source programs needs to include it so they are duplicated here. | ||
75 | */ | ||
76 | |||
77 | static inline struct ext2_inode_info *EXT2_I(struct inode *inode) | ||
78 | { | ||
79 | return container_of(inode, struct ext2_inode_info, vfs_inode); | ||
80 | } | ||
81 | |||
82 | /* balloc.c */ | ||
83 | extern int ext2_bg_has_super(struct super_block *sb, int group); | ||
84 | extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group); | ||
85 | extern int ext2_new_block (struct inode *, unsigned long, | ||
86 | __u32 *, __u32 *, int *); | ||
87 | extern void ext2_free_blocks (struct inode *, unsigned long, | ||
88 | unsigned long); | ||
89 | extern unsigned long ext2_count_free_blocks (struct super_block *); | ||
90 | extern unsigned long ext2_count_dirs (struct super_block *); | ||
91 | extern void ext2_check_blocks_bitmap (struct super_block *); | ||
92 | extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, | ||
93 | unsigned int block_group, | ||
94 | struct buffer_head ** bh); | ||
95 | |||
96 | /* dir.c */ | ||
97 | extern int ext2_add_link (struct dentry *, struct inode *); | ||
98 | extern ino_t ext2_inode_by_name(struct inode *, struct dentry *); | ||
99 | extern int ext2_make_empty(struct inode *, struct inode *); | ||
100 | extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **); | ||
101 | extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); | ||
102 | extern int ext2_empty_dir (struct inode *); | ||
103 | extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); | ||
104 | extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); | ||
105 | |||
106 | /* fsync.c */ | ||
107 | extern int ext2_sync_file (struct file *, struct dentry *, int); | ||
108 | |||
109 | /* ialloc.c */ | ||
110 | extern struct inode * ext2_new_inode (struct inode *, int); | ||
111 | extern void ext2_free_inode (struct inode *); | ||
112 | extern unsigned long ext2_count_free_inodes (struct super_block *); | ||
113 | extern void ext2_check_inodes_bitmap (struct super_block *); | ||
114 | extern unsigned long ext2_count_free (struct buffer_head *, unsigned); | ||
115 | |||
116 | /* inode.c */ | ||
117 | extern void ext2_read_inode (struct inode *); | ||
118 | extern int ext2_write_inode (struct inode *, int); | ||
119 | extern void ext2_delete_inode (struct inode *); | ||
120 | extern int ext2_sync_inode (struct inode *); | ||
121 | extern void ext2_discard_prealloc (struct inode *); | ||
122 | extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); | ||
123 | extern void ext2_truncate (struct inode *); | ||
124 | extern int ext2_setattr (struct dentry *, struct iattr *); | ||
125 | extern void ext2_set_inode_flags(struct inode *inode); | ||
126 | |||
127 | /* ioctl.c */ | ||
128 | extern int ext2_ioctl (struct inode *, struct file *, unsigned int, | ||
129 | unsigned long); | ||
130 | |||
131 | /* super.c */ | ||
132 | extern void ext2_error (struct super_block *, const char *, const char *, ...) | ||
133 | __attribute__ ((format (printf, 3, 4))); | ||
134 | extern void ext2_warning (struct super_block *, const char *, const char *, ...) | ||
135 | __attribute__ ((format (printf, 3, 4))); | ||
136 | extern void ext2_update_dynamic_rev (struct super_block *sb); | ||
137 | extern void ext2_write_super (struct super_block *); | ||
138 | |||
139 | /* | ||
140 | * Inodes and files operations | ||
141 | */ | ||
142 | |||
143 | /* dir.c */ | ||
144 | extern struct file_operations ext2_dir_operations; | ||
145 | |||
146 | /* file.c */ | ||
147 | extern struct inode_operations ext2_file_inode_operations; | ||
148 | extern struct file_operations ext2_file_operations; | ||
149 | |||
150 | /* inode.c */ | ||
151 | extern struct address_space_operations ext2_aops; | ||
152 | extern struct address_space_operations ext2_nobh_aops; | ||
153 | |||
154 | /* namei.c */ | ||
155 | extern struct inode_operations ext2_dir_inode_operations; | ||
156 | extern struct inode_operations ext2_special_inode_operations; | ||
157 | |||
158 | /* symlink.c */ | ||
159 | extern struct inode_operations ext2_fast_symlink_inode_operations; | ||
160 | extern struct inode_operations ext2_symlink_inode_operations; | ||
diff --git a/fs/ext2/file.c b/fs/ext2/file.c new file mode 100644 index 000000000000..f5e86141ec54 --- /dev/null +++ b/fs/ext2/file.c | |||
@@ -0,0 +1,68 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/file.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/fs/minix/file.c | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | * | ||
15 | * ext2 fs regular file handling primitives | ||
16 | * | ||
17 | * 64-bit file support on 64-bit platforms by Jakub Jelinek | ||
18 | * (jj@sunsite.ms.mff.cuni.cz) | ||
19 | */ | ||
20 | |||
21 | #include <linux/time.h> | ||
22 | #include "ext2.h" | ||
23 | #include "xattr.h" | ||
24 | #include "acl.h" | ||
25 | |||
26 | /* | ||
27 | * Called when an inode is released. Note that this is different | ||
28 | * from ext2_open_file: open gets called at every open, but release | ||
29 | * gets called only when /all/ the files are closed. | ||
30 | */ | ||
31 | static int ext2_release_file (struct inode * inode, struct file * filp) | ||
32 | { | ||
33 | if (filp->f_mode & FMODE_WRITE) | ||
34 | ext2_discard_prealloc (inode); | ||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * We have mostly NULL's here: the current defaults are ok for | ||
40 | * the ext2 filesystem. | ||
41 | */ | ||
42 | struct file_operations ext2_file_operations = { | ||
43 | .llseek = generic_file_llseek, | ||
44 | .read = generic_file_read, | ||
45 | .write = generic_file_write, | ||
46 | .aio_read = generic_file_aio_read, | ||
47 | .aio_write = generic_file_aio_write, | ||
48 | .ioctl = ext2_ioctl, | ||
49 | .mmap = generic_file_mmap, | ||
50 | .open = generic_file_open, | ||
51 | .release = ext2_release_file, | ||
52 | .fsync = ext2_sync_file, | ||
53 | .readv = generic_file_readv, | ||
54 | .writev = generic_file_writev, | ||
55 | .sendfile = generic_file_sendfile, | ||
56 | }; | ||
57 | |||
58 | struct inode_operations ext2_file_inode_operations = { | ||
59 | .truncate = ext2_truncate, | ||
60 | #ifdef CONFIG_EXT2_FS_XATTR | ||
61 | .setxattr = generic_setxattr, | ||
62 | .getxattr = generic_getxattr, | ||
63 | .listxattr = ext2_listxattr, | ||
64 | .removexattr = generic_removexattr, | ||
65 | #endif | ||
66 | .setattr = ext2_setattr, | ||
67 | .permission = ext2_permission, | ||
68 | }; | ||
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c new file mode 100644 index 000000000000..c9c2e5ffa48e --- /dev/null +++ b/fs/ext2/fsync.c | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/fsync.c | ||
3 | * | ||
4 | * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) | ||
5 | * from | ||
6 | * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) | ||
7 | * Laboratoire MASI - Institut Blaise Pascal | ||
8 | * Universite Pierre et Marie Curie (Paris VI) | ||
9 | * from | ||
10 | * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds | ||
11 | * | ||
12 | * ext2fs fsync primitive | ||
13 | * | ||
14 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
15 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
16 | * | ||
17 | * Removed unnecessary code duplication for little endian machines | ||
18 | * and excessive __inline__s. | ||
19 | * Andi Kleen, 1997 | ||
20 | * | ||
21 | * Major simplications and cleanup - we only need to do the metadata, because | ||
22 | * we can depend on generic_block_fdatasync() to sync the data blocks. | ||
23 | */ | ||
24 | |||
25 | #include "ext2.h" | ||
26 | #include <linux/smp_lock.h> | ||
27 | #include <linux/buffer_head.h> /* for fsync_inode_buffers() */ | ||
28 | |||
29 | |||
30 | /* | ||
31 | * File may be NULL when we are called. Perhaps we shouldn't | ||
32 | * even pass file to fsync ? | ||
33 | */ | ||
34 | |||
35 | int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync) | ||
36 | { | ||
37 | struct inode *inode = dentry->d_inode; | ||
38 | int err; | ||
39 | int ret; | ||
40 | |||
41 | ret = sync_mapping_buffers(inode->i_mapping); | ||
42 | if (!(inode->i_state & I_DIRTY)) | ||
43 | return ret; | ||
44 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
45 | return ret; | ||
46 | |||
47 | err = ext2_sync_inode(inode); | ||
48 | if (ret == 0) | ||
49 | ret = err; | ||
50 | return ret; | ||
51 | } | ||
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c new file mode 100644 index 000000000000..77e059149212 --- /dev/null +++ b/fs/ext2/ialloc.c | |||
@@ -0,0 +1,735 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/ialloc.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * BSD ufs-inspired inode and directory allocation by | ||
10 | * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 | ||
11 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
12 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <linux/quotaops.h> | ||
17 | #include <linux/sched.h> | ||
18 | #include <linux/backing-dev.h> | ||
19 | #include <linux/buffer_head.h> | ||
20 | #include <linux/random.h> | ||
21 | #include "ext2.h" | ||
22 | #include "xattr.h" | ||
23 | #include "acl.h" | ||
24 | |||
25 | /* | ||
26 | * ialloc.c contains the inodes allocation and deallocation routines | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * The free inodes are managed by bitmaps. A file system contains several | ||
31 | * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap | ||
32 | * block for inodes, N blocks for the inode table and data blocks. | ||
33 | * | ||
34 | * The file system contains group descriptors which are located after the | ||
35 | * super block. Each descriptor contains the number of the bitmap block and | ||
36 | * the free blocks count in the block. | ||
37 | */ | ||
38 | |||
39 | |||
40 | /* | ||
41 | * Read the inode allocation bitmap for a given block_group, reading | ||
42 | * into the specified slot in the superblock's bitmap cache. | ||
43 | * | ||
44 | * Return buffer_head of bitmap on success or NULL. | ||
45 | */ | ||
46 | static struct buffer_head * | ||
47 | read_inode_bitmap(struct super_block * sb, unsigned long block_group) | ||
48 | { | ||
49 | struct ext2_group_desc *desc; | ||
50 | struct buffer_head *bh = NULL; | ||
51 | |||
52 | desc = ext2_get_group_desc(sb, block_group, NULL); | ||
53 | if (!desc) | ||
54 | goto error_out; | ||
55 | |||
56 | bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); | ||
57 | if (!bh) | ||
58 | ext2_error(sb, "read_inode_bitmap", | ||
59 | "Cannot read inode bitmap - " | ||
60 | "block_group = %lu, inode_bitmap = %u", | ||
61 | block_group, le32_to_cpu(desc->bg_inode_bitmap)); | ||
62 | error_out: | ||
63 | return bh; | ||
64 | } | ||
65 | |||
66 | static void ext2_release_inode(struct super_block *sb, int group, int dir) | ||
67 | { | ||
68 | struct ext2_group_desc * desc; | ||
69 | struct buffer_head *bh; | ||
70 | |||
71 | desc = ext2_get_group_desc(sb, group, &bh); | ||
72 | if (!desc) { | ||
73 | ext2_error(sb, "ext2_release_inode", | ||
74 | "can't get descriptor for group %d", group); | ||
75 | return; | ||
76 | } | ||
77 | |||
78 | spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); | ||
79 | desc->bg_free_inodes_count = | ||
80 | cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1); | ||
81 | if (dir) | ||
82 | desc->bg_used_dirs_count = | ||
83 | cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1); | ||
84 | spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); | ||
85 | if (dir) | ||
86 | percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); | ||
87 | sb->s_dirt = 1; | ||
88 | mark_buffer_dirty(bh); | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * NOTE! When we get the inode, we're the only people | ||
93 | * that have access to it, and as such there are no | ||
94 | * race conditions we have to worry about. The inode | ||
95 | * is not on the hash-lists, and it cannot be reached | ||
96 | * through the filesystem because the directory entry | ||
97 | * has been deleted earlier. | ||
98 | * | ||
99 | * HOWEVER: we must make sure that we get no aliases, | ||
100 | * which means that we have to call "clear_inode()" | ||
101 | * _before_ we mark the inode not in use in the inode | ||
102 | * bitmaps. Otherwise a newly created file might use | ||
103 | * the same inode number (not actually the same pointer | ||
104 | * though), and then we'd have two inodes sharing the | ||
105 | * same inode number and space on the harddisk. | ||
106 | */ | ||
107 | void ext2_free_inode (struct inode * inode) | ||
108 | { | ||
109 | struct super_block * sb = inode->i_sb; | ||
110 | int is_directory; | ||
111 | unsigned long ino; | ||
112 | struct buffer_head *bitmap_bh = NULL; | ||
113 | unsigned long block_group; | ||
114 | unsigned long bit; | ||
115 | struct ext2_super_block * es; | ||
116 | |||
117 | ino = inode->i_ino; | ||
118 | ext2_debug ("freeing inode %lu\n", ino); | ||
119 | |||
120 | /* | ||
121 | * Note: we must free any quota before locking the superblock, | ||
122 | * as writing the quota to disk may need the lock as well. | ||
123 | */ | ||
124 | if (!is_bad_inode(inode)) { | ||
125 | /* Quota is already initialized in iput() */ | ||
126 | ext2_xattr_delete_inode(inode); | ||
127 | DQUOT_FREE_INODE(inode); | ||
128 | DQUOT_DROP(inode); | ||
129 | } | ||
130 | |||
131 | es = EXT2_SB(sb)->s_es; | ||
132 | is_directory = S_ISDIR(inode->i_mode); | ||
133 | |||
134 | /* Do this BEFORE marking the inode not in use or returning an error */ | ||
135 | clear_inode (inode); | ||
136 | |||
137 | if (ino < EXT2_FIRST_INO(sb) || | ||
138 | ino > le32_to_cpu(es->s_inodes_count)) { | ||
139 | ext2_error (sb, "ext2_free_inode", | ||
140 | "reserved or nonexistent inode %lu", ino); | ||
141 | goto error_return; | ||
142 | } | ||
143 | block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); | ||
144 | bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); | ||
145 | brelse(bitmap_bh); | ||
146 | bitmap_bh = read_inode_bitmap(sb, block_group); | ||
147 | if (!bitmap_bh) | ||
148 | goto error_return; | ||
149 | |||
150 | /* Ok, now we can actually update the inode bitmaps.. */ | ||
151 | if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), | ||
152 | bit, (void *) bitmap_bh->b_data)) | ||
153 | ext2_error (sb, "ext2_free_inode", | ||
154 | "bit already cleared for inode %lu", ino); | ||
155 | else | ||
156 | ext2_release_inode(sb, block_group, is_directory); | ||
157 | mark_buffer_dirty(bitmap_bh); | ||
158 | if (sb->s_flags & MS_SYNCHRONOUS) | ||
159 | sync_dirty_buffer(bitmap_bh); | ||
160 | error_return: | ||
161 | brelse(bitmap_bh); | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * We perform asynchronous prereading of the new inode's inode block when | ||
166 | * we create the inode, in the expectation that the inode will be written | ||
167 | * back soon. There are two reasons: | ||
168 | * | ||
169 | * - When creating a large number of files, the async prereads will be | ||
170 | * nicely merged into large reads | ||
171 | * - When writing out a large number of inodes, we don't need to keep on | ||
172 | * stalling the writes while we read the inode block. | ||
173 | * | ||
174 | * FIXME: ext2_get_group_desc() needs to be simplified. | ||
175 | */ | ||
176 | static void ext2_preread_inode(struct inode *inode) | ||
177 | { | ||
178 | unsigned long block_group; | ||
179 | unsigned long offset; | ||
180 | unsigned long block; | ||
181 | struct buffer_head *bh; | ||
182 | struct ext2_group_desc * gdp; | ||
183 | struct backing_dev_info *bdi; | ||
184 | |||
185 | bdi = inode->i_mapping->backing_dev_info; | ||
186 | if (bdi_read_congested(bdi)) | ||
187 | return; | ||
188 | if (bdi_write_congested(bdi)) | ||
189 | return; | ||
190 | |||
191 | block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); | ||
192 | gdp = ext2_get_group_desc(inode->i_sb, block_group, &bh); | ||
193 | if (gdp == NULL) | ||
194 | return; | ||
195 | |||
196 | /* | ||
197 | * Figure out the offset within the block group inode table | ||
198 | */ | ||
199 | offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) * | ||
200 | EXT2_INODE_SIZE(inode->i_sb); | ||
201 | block = le32_to_cpu(gdp->bg_inode_table) + | ||
202 | (offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb)); | ||
203 | sb_breadahead(inode->i_sb, block); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * There are two policies for allocating an inode. If the new inode is | ||
208 | * a directory, then a forward search is made for a block group with both | ||
209 | * free space and a low directory-to-inode ratio; if that fails, then of | ||
210 | * the groups with above-average free space, that group with the fewest | ||
211 | * directories already is chosen. | ||
212 | * | ||
213 | * For other inodes, search forward from the parent directory\'s block | ||
214 | * group to find a free inode. | ||
215 | */ | ||
216 | static int find_group_dir(struct super_block *sb, struct inode *parent) | ||
217 | { | ||
218 | int ngroups = EXT2_SB(sb)->s_groups_count; | ||
219 | int avefreei = ext2_count_free_inodes(sb) / ngroups; | ||
220 | struct ext2_group_desc *desc, *best_desc = NULL; | ||
221 | struct buffer_head *bh, *best_bh = NULL; | ||
222 | int group, best_group = -1; | ||
223 | |||
224 | for (group = 0; group < ngroups; group++) { | ||
225 | desc = ext2_get_group_desc (sb, group, &bh); | ||
226 | if (!desc || !desc->bg_free_inodes_count) | ||
227 | continue; | ||
228 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) | ||
229 | continue; | ||
230 | if (!best_desc || | ||
231 | (le16_to_cpu(desc->bg_free_blocks_count) > | ||
232 | le16_to_cpu(best_desc->bg_free_blocks_count))) { | ||
233 | best_group = group; | ||
234 | best_desc = desc; | ||
235 | best_bh = bh; | ||
236 | } | ||
237 | } | ||
238 | if (!best_desc) | ||
239 | return -1; | ||
240 | |||
241 | return best_group; | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Orlov's allocator for directories. | ||
246 | * | ||
247 | * We always try to spread first-level directories. | ||
248 | * | ||
249 | * If there are blockgroups with both free inodes and free blocks counts | ||
250 | * not worse than average we return one with smallest directory count. | ||
251 | * Otherwise we simply return a random group. | ||
252 | * | ||
253 | * For the rest rules look so: | ||
254 | * | ||
255 | * It's OK to put directory into a group unless | ||
256 | * it has too many directories already (max_dirs) or | ||
257 | * it has too few free inodes left (min_inodes) or | ||
258 | * it has too few free blocks left (min_blocks) or | ||
259 | * it's already running too large debt (max_debt). | ||
260 | * Parent's group is prefered, if it doesn't satisfy these | ||
261 | * conditions we search cyclically through the rest. If none | ||
262 | * of the groups look good we just look for a group with more | ||
263 | * free inodes than average (starting at parent's group). | ||
264 | * | ||
265 | * Debt is incremented each time we allocate a directory and decremented | ||
266 | * when we allocate an inode, within 0--255. | ||
267 | */ | ||
268 | |||
269 | #define INODE_COST 64 | ||
270 | #define BLOCK_COST 256 | ||
271 | |||
272 | static int find_group_orlov(struct super_block *sb, struct inode *parent) | ||
273 | { | ||
274 | int parent_group = EXT2_I(parent)->i_block_group; | ||
275 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
276 | struct ext2_super_block *es = sbi->s_es; | ||
277 | int ngroups = sbi->s_groups_count; | ||
278 | int inodes_per_group = EXT2_INODES_PER_GROUP(sb); | ||
279 | int freei; | ||
280 | int avefreei; | ||
281 | int free_blocks; | ||
282 | int avefreeb; | ||
283 | int blocks_per_dir; | ||
284 | int ndirs; | ||
285 | int max_debt, max_dirs, min_blocks, min_inodes; | ||
286 | int group = -1, i; | ||
287 | struct ext2_group_desc *desc; | ||
288 | struct buffer_head *bh; | ||
289 | |||
290 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); | ||
291 | avefreei = freei / ngroups; | ||
292 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | ||
293 | avefreeb = free_blocks / ngroups; | ||
294 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); | ||
295 | |||
296 | if ((parent == sb->s_root->d_inode) || | ||
297 | (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) { | ||
298 | struct ext2_group_desc *best_desc = NULL; | ||
299 | struct buffer_head *best_bh = NULL; | ||
300 | int best_ndir = inodes_per_group; | ||
301 | int best_group = -1; | ||
302 | |||
303 | get_random_bytes(&group, sizeof(group)); | ||
304 | parent_group = (unsigned)group % ngroups; | ||
305 | for (i = 0; i < ngroups; i++) { | ||
306 | group = (parent_group + i) % ngroups; | ||
307 | desc = ext2_get_group_desc (sb, group, &bh); | ||
308 | if (!desc || !desc->bg_free_inodes_count) | ||
309 | continue; | ||
310 | if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) | ||
311 | continue; | ||
312 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) | ||
313 | continue; | ||
314 | if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) | ||
315 | continue; | ||
316 | best_group = group; | ||
317 | best_ndir = le16_to_cpu(desc->bg_used_dirs_count); | ||
318 | best_desc = desc; | ||
319 | best_bh = bh; | ||
320 | } | ||
321 | if (best_group >= 0) { | ||
322 | desc = best_desc; | ||
323 | bh = best_bh; | ||
324 | group = best_group; | ||
325 | goto found; | ||
326 | } | ||
327 | goto fallback; | ||
328 | } | ||
329 | |||
330 | if (ndirs == 0) | ||
331 | ndirs = 1; /* percpu_counters are approximate... */ | ||
332 | |||
333 | blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs; | ||
334 | |||
335 | max_dirs = ndirs / ngroups + inodes_per_group / 16; | ||
336 | min_inodes = avefreei - inodes_per_group / 4; | ||
337 | min_blocks = avefreeb - EXT2_BLOCKS_PER_GROUP(sb) / 4; | ||
338 | |||
339 | max_debt = EXT2_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST); | ||
340 | if (max_debt * INODE_COST > inodes_per_group) | ||
341 | max_debt = inodes_per_group / INODE_COST; | ||
342 | if (max_debt > 255) | ||
343 | max_debt = 255; | ||
344 | if (max_debt == 0) | ||
345 | max_debt = 1; | ||
346 | |||
347 | for (i = 0; i < ngroups; i++) { | ||
348 | group = (parent_group + i) % ngroups; | ||
349 | desc = ext2_get_group_desc (sb, group, &bh); | ||
350 | if (!desc || !desc->bg_free_inodes_count) | ||
351 | continue; | ||
352 | if (sbi->s_debts[group] >= max_debt) | ||
353 | continue; | ||
354 | if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) | ||
355 | continue; | ||
356 | if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) | ||
357 | continue; | ||
358 | if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) | ||
359 | continue; | ||
360 | goto found; | ||
361 | } | ||
362 | |||
363 | fallback: | ||
364 | for (i = 0; i < ngroups; i++) { | ||
365 | group = (parent_group + i) % ngroups; | ||
366 | desc = ext2_get_group_desc (sb, group, &bh); | ||
367 | if (!desc || !desc->bg_free_inodes_count) | ||
368 | continue; | ||
369 | if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) | ||
370 | goto found; | ||
371 | } | ||
372 | |||
373 | if (avefreei) { | ||
374 | /* | ||
375 | * The free-inodes counter is approximate, and for really small | ||
376 | * filesystems the above test can fail to find any blockgroups | ||
377 | */ | ||
378 | avefreei = 0; | ||
379 | goto fallback; | ||
380 | } | ||
381 | |||
382 | return -1; | ||
383 | |||
384 | found: | ||
385 | return group; | ||
386 | } | ||
387 | |||
388 | static int find_group_other(struct super_block *sb, struct inode *parent) | ||
389 | { | ||
390 | int parent_group = EXT2_I(parent)->i_block_group; | ||
391 | int ngroups = EXT2_SB(sb)->s_groups_count; | ||
392 | struct ext2_group_desc *desc; | ||
393 | struct buffer_head *bh; | ||
394 | int group, i; | ||
395 | |||
396 | /* | ||
397 | * Try to place the inode in its parent directory | ||
398 | */ | ||
399 | group = parent_group; | ||
400 | desc = ext2_get_group_desc (sb, group, &bh); | ||
401 | if (desc && le16_to_cpu(desc->bg_free_inodes_count) && | ||
402 | le16_to_cpu(desc->bg_free_blocks_count)) | ||
403 | goto found; | ||
404 | |||
405 | /* | ||
406 | * We're going to place this inode in a different blockgroup from its | ||
407 | * parent. We want to cause files in a common directory to all land in | ||
408 | * the same blockgroup. But we want files which are in a different | ||
409 | * directory which shares a blockgroup with our parent to land in a | ||
410 | * different blockgroup. | ||
411 | * | ||
412 | * So add our directory's i_ino into the starting point for the hash. | ||
413 | */ | ||
414 | group = (group + parent->i_ino) % ngroups; | ||
415 | |||
416 | /* | ||
417 | * Use a quadratic hash to find a group with a free inode and some | ||
418 | * free blocks. | ||
419 | */ | ||
420 | for (i = 1; i < ngroups; i <<= 1) { | ||
421 | group += i; | ||
422 | if (group >= ngroups) | ||
423 | group -= ngroups; | ||
424 | desc = ext2_get_group_desc (sb, group, &bh); | ||
425 | if (desc && le16_to_cpu(desc->bg_free_inodes_count) && | ||
426 | le16_to_cpu(desc->bg_free_blocks_count)) | ||
427 | goto found; | ||
428 | } | ||
429 | |||
430 | /* | ||
431 | * That failed: try linear search for a free inode, even if that group | ||
432 | * has no free blocks. | ||
433 | */ | ||
434 | group = parent_group; | ||
435 | for (i = 0; i < ngroups; i++) { | ||
436 | if (++group >= ngroups) | ||
437 | group = 0; | ||
438 | desc = ext2_get_group_desc (sb, group, &bh); | ||
439 | if (desc && le16_to_cpu(desc->bg_free_inodes_count)) | ||
440 | goto found; | ||
441 | } | ||
442 | |||
443 | return -1; | ||
444 | |||
445 | found: | ||
446 | return group; | ||
447 | } | ||
448 | |||
449 | struct inode *ext2_new_inode(struct inode *dir, int mode) | ||
450 | { | ||
451 | struct super_block *sb; | ||
452 | struct buffer_head *bitmap_bh = NULL; | ||
453 | struct buffer_head *bh2; | ||
454 | int group, i; | ||
455 | ino_t ino = 0; | ||
456 | struct inode * inode; | ||
457 | struct ext2_group_desc *gdp; | ||
458 | struct ext2_super_block *es; | ||
459 | struct ext2_inode_info *ei; | ||
460 | struct ext2_sb_info *sbi; | ||
461 | int err; | ||
462 | |||
463 | sb = dir->i_sb; | ||
464 | inode = new_inode(sb); | ||
465 | if (!inode) | ||
466 | return ERR_PTR(-ENOMEM); | ||
467 | |||
468 | ei = EXT2_I(inode); | ||
469 | sbi = EXT2_SB(sb); | ||
470 | es = sbi->s_es; | ||
471 | if (S_ISDIR(mode)) { | ||
472 | if (test_opt(sb, OLDALLOC)) | ||
473 | group = find_group_dir(sb, dir); | ||
474 | else | ||
475 | group = find_group_orlov(sb, dir); | ||
476 | } else | ||
477 | group = find_group_other(sb, dir); | ||
478 | |||
479 | if (group == -1) { | ||
480 | err = -ENOSPC; | ||
481 | goto fail; | ||
482 | } | ||
483 | |||
484 | for (i = 0; i < sbi->s_groups_count; i++) { | ||
485 | gdp = ext2_get_group_desc(sb, group, &bh2); | ||
486 | brelse(bitmap_bh); | ||
487 | bitmap_bh = read_inode_bitmap(sb, group); | ||
488 | if (!bitmap_bh) { | ||
489 | err = -EIO; | ||
490 | goto fail; | ||
491 | } | ||
492 | ino = 0; | ||
493 | |||
494 | repeat_in_this_group: | ||
495 | ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data, | ||
496 | EXT2_INODES_PER_GROUP(sb), ino); | ||
497 | if (ino >= EXT2_INODES_PER_GROUP(sb)) { | ||
498 | /* | ||
499 | * Rare race: find_group_xx() decided that there were | ||
500 | * free inodes in this group, but by the time we tried | ||
501 | * to allocate one, they're all gone. This can also | ||
502 | * occur because the counters which find_group_orlov() | ||
503 | * uses are approximate. So just go and search the | ||
504 | * next block group. | ||
505 | */ | ||
506 | if (++group == sbi->s_groups_count) | ||
507 | group = 0; | ||
508 | continue; | ||
509 | } | ||
510 | if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group), | ||
511 | ino, bitmap_bh->b_data)) { | ||
512 | /* we lost this inode */ | ||
513 | if (++ino >= EXT2_INODES_PER_GROUP(sb)) { | ||
514 | /* this group is exhausted, try next group */ | ||
515 | if (++group == sbi->s_groups_count) | ||
516 | group = 0; | ||
517 | continue; | ||
518 | } | ||
519 | /* try to find free inode in the same group */ | ||
520 | goto repeat_in_this_group; | ||
521 | } | ||
522 | goto got; | ||
523 | } | ||
524 | |||
525 | /* | ||
526 | * Scanned all blockgroups. | ||
527 | */ | ||
528 | err = -ENOSPC; | ||
529 | goto fail; | ||
530 | got: | ||
531 | mark_buffer_dirty(bitmap_bh); | ||
532 | if (sb->s_flags & MS_SYNCHRONOUS) | ||
533 | sync_dirty_buffer(bitmap_bh); | ||
534 | brelse(bitmap_bh); | ||
535 | |||
536 | ino += group * EXT2_INODES_PER_GROUP(sb) + 1; | ||
537 | if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | ||
538 | ext2_error (sb, "ext2_new_inode", | ||
539 | "reserved inode or inode > inodes count - " | ||
540 | "block_group = %d,inode=%lu", group, | ||
541 | (unsigned long) ino); | ||
542 | err = -EIO; | ||
543 | goto fail; | ||
544 | } | ||
545 | |||
546 | percpu_counter_mod(&sbi->s_freeinodes_counter, -1); | ||
547 | if (S_ISDIR(mode)) | ||
548 | percpu_counter_inc(&sbi->s_dirs_counter); | ||
549 | |||
550 | spin_lock(sb_bgl_lock(sbi, group)); | ||
551 | gdp->bg_free_inodes_count = | ||
552 | cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); | ||
553 | if (S_ISDIR(mode)) { | ||
554 | if (sbi->s_debts[group] < 255) | ||
555 | sbi->s_debts[group]++; | ||
556 | gdp->bg_used_dirs_count = | ||
557 | cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); | ||
558 | } else { | ||
559 | if (sbi->s_debts[group]) | ||
560 | sbi->s_debts[group]--; | ||
561 | } | ||
562 | spin_unlock(sb_bgl_lock(sbi, group)); | ||
563 | |||
564 | sb->s_dirt = 1; | ||
565 | mark_buffer_dirty(bh2); | ||
566 | inode->i_uid = current->fsuid; | ||
567 | if (test_opt (sb, GRPID)) | ||
568 | inode->i_gid = dir->i_gid; | ||
569 | else if (dir->i_mode & S_ISGID) { | ||
570 | inode->i_gid = dir->i_gid; | ||
571 | if (S_ISDIR(mode)) | ||
572 | mode |= S_ISGID; | ||
573 | } else | ||
574 | inode->i_gid = current->fsgid; | ||
575 | inode->i_mode = mode; | ||
576 | |||
577 | inode->i_ino = ino; | ||
578 | inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ | ||
579 | inode->i_blocks = 0; | ||
580 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | ||
581 | memset(ei->i_data, 0, sizeof(ei->i_data)); | ||
582 | ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; | ||
583 | if (S_ISLNK(mode)) | ||
584 | ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); | ||
585 | /* dirsync is only applied to directories */ | ||
586 | if (!S_ISDIR(mode)) | ||
587 | ei->i_flags &= ~EXT2_DIRSYNC_FL; | ||
588 | ei->i_faddr = 0; | ||
589 | ei->i_frag_no = 0; | ||
590 | ei->i_frag_size = 0; | ||
591 | ei->i_file_acl = 0; | ||
592 | ei->i_dir_acl = 0; | ||
593 | ei->i_dtime = 0; | ||
594 | ei->i_block_group = group; | ||
595 | ei->i_next_alloc_block = 0; | ||
596 | ei->i_next_alloc_goal = 0; | ||
597 | ei->i_prealloc_block = 0; | ||
598 | ei->i_prealloc_count = 0; | ||
599 | ei->i_dir_start_lookup = 0; | ||
600 | ei->i_state = EXT2_STATE_NEW; | ||
601 | ext2_set_inode_flags(inode); | ||
602 | spin_lock(&sbi->s_next_gen_lock); | ||
603 | inode->i_generation = sbi->s_next_generation++; | ||
604 | spin_unlock(&sbi->s_next_gen_lock); | ||
605 | insert_inode_hash(inode); | ||
606 | |||
607 | if (DQUOT_ALLOC_INODE(inode)) { | ||
608 | DQUOT_DROP(inode); | ||
609 | err = -ENOSPC; | ||
610 | goto fail2; | ||
611 | } | ||
612 | err = ext2_init_acl(inode, dir); | ||
613 | if (err) { | ||
614 | DQUOT_FREE_INODE(inode); | ||
615 | goto fail2; | ||
616 | } | ||
617 | mark_inode_dirty(inode); | ||
618 | ext2_debug("allocating inode %lu\n", inode->i_ino); | ||
619 | ext2_preread_inode(inode); | ||
620 | return inode; | ||
621 | |||
622 | fail2: | ||
623 | inode->i_flags |= S_NOQUOTA; | ||
624 | inode->i_nlink = 0; | ||
625 | iput(inode); | ||
626 | return ERR_PTR(err); | ||
627 | |||
628 | fail: | ||
629 | make_bad_inode(inode); | ||
630 | iput(inode); | ||
631 | return ERR_PTR(err); | ||
632 | } | ||
633 | |||
634 | unsigned long ext2_count_free_inodes (struct super_block * sb) | ||
635 | { | ||
636 | struct ext2_group_desc *desc; | ||
637 | unsigned long desc_count = 0; | ||
638 | int i; | ||
639 | |||
640 | #ifdef EXT2FS_DEBUG | ||
641 | struct ext2_super_block *es; | ||
642 | unsigned long bitmap_count = 0; | ||
643 | struct buffer_head *bitmap_bh = NULL; | ||
644 | |||
645 | lock_super (sb); | ||
646 | es = EXT2_SB(sb)->s_es; | ||
647 | for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { | ||
648 | unsigned x; | ||
649 | |||
650 | desc = ext2_get_group_desc (sb, i, NULL); | ||
651 | if (!desc) | ||
652 | continue; | ||
653 | desc_count += le16_to_cpu(desc->bg_free_inodes_count); | ||
654 | brelse(bitmap_bh); | ||
655 | bitmap_bh = read_inode_bitmap(sb, i); | ||
656 | if (!bitmap_bh) | ||
657 | continue; | ||
658 | |||
659 | x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8); | ||
660 | printk("group %d: stored = %d, counted = %u\n", | ||
661 | i, le16_to_cpu(desc->bg_free_inodes_count), x); | ||
662 | bitmap_count += x; | ||
663 | } | ||
664 | brelse(bitmap_bh); | ||
665 | printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", | ||
666 | percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter), | ||
667 | desc_count, bitmap_count); | ||
668 | unlock_super(sb); | ||
669 | return desc_count; | ||
670 | #else | ||
671 | for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { | ||
672 | desc = ext2_get_group_desc (sb, i, NULL); | ||
673 | if (!desc) | ||
674 | continue; | ||
675 | desc_count += le16_to_cpu(desc->bg_free_inodes_count); | ||
676 | } | ||
677 | return desc_count; | ||
678 | #endif | ||
679 | } | ||
680 | |||
681 | /* Called at mount-time, super-block is locked */ | ||
682 | unsigned long ext2_count_dirs (struct super_block * sb) | ||
683 | { | ||
684 | unsigned long count = 0; | ||
685 | int i; | ||
686 | |||
687 | for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { | ||
688 | struct ext2_group_desc *gdp = ext2_get_group_desc (sb, i, NULL); | ||
689 | if (!gdp) | ||
690 | continue; | ||
691 | count += le16_to_cpu(gdp->bg_used_dirs_count); | ||
692 | } | ||
693 | return count; | ||
694 | } | ||
695 | |||
696 | #ifdef CONFIG_EXT2_CHECK | ||
697 | /* Called at mount-time, super-block is locked */ | ||
698 | void ext2_check_inodes_bitmap (struct super_block * sb) | ||
699 | { | ||
700 | struct ext2_super_block * es = EXT2_SB(sb)->s_es; | ||
701 | unsigned long desc_count = 0, bitmap_count = 0; | ||
702 | struct buffer_head *bitmap_bh = NULL; | ||
703 | int i; | ||
704 | |||
705 | for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { | ||
706 | struct ext2_group_desc *desc; | ||
707 | unsigned x; | ||
708 | |||
709 | desc = ext2_get_group_desc(sb, i, NULL); | ||
710 | if (!desc) | ||
711 | continue; | ||
712 | desc_count += le16_to_cpu(desc->bg_free_inodes_count); | ||
713 | brelse(bitmap_bh); | ||
714 | bitmap_bh = read_inode_bitmap(sb, i); | ||
715 | if (!bitmap_bh) | ||
716 | continue; | ||
717 | |||
718 | x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8); | ||
719 | if (le16_to_cpu(desc->bg_free_inodes_count) != x) | ||
720 | ext2_error (sb, "ext2_check_inodes_bitmap", | ||
721 | "Wrong free inodes count in group %d, " | ||
722 | "stored = %d, counted = %lu", i, | ||
723 | le16_to_cpu(desc->bg_free_inodes_count), x); | ||
724 | bitmap_count += x; | ||
725 | } | ||
726 | brelse(bitmap_bh); | ||
727 | if (percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter) != | ||
728 | bitmap_count) | ||
729 | ext2_error(sb, "ext2_check_inodes_bitmap", | ||
730 | "Wrong free inodes count in super block, " | ||
731 | "stored = %lu, counted = %lu", | ||
732 | (unsigned long)le32_to_cpu(es->s_free_inodes_count), | ||
733 | bitmap_count); | ||
734 | } | ||
735 | #endif | ||
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c new file mode 100644 index 000000000000..b890be022496 --- /dev/null +++ b/fs/ext2/inode.c | |||
@@ -0,0 +1,1276 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/inode.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/fs/minix/inode.c | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | * | ||
15 | * Goal-directed block allocation by Stephen Tweedie | ||
16 | * (sct@dcs.ed.ac.uk), 1993, 1998 | ||
17 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek | ||
20 | * (jj@sunsite.ms.mff.cuni.cz) | ||
21 | * | ||
22 | * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 | ||
23 | */ | ||
24 | |||
25 | #include <linux/smp_lock.h> | ||
26 | #include <linux/time.h> | ||
27 | #include <linux/highuid.h> | ||
28 | #include <linux/pagemap.h> | ||
29 | #include <linux/quotaops.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/writeback.h> | ||
32 | #include <linux/buffer_head.h> | ||
33 | #include <linux/mpage.h> | ||
34 | #include "ext2.h" | ||
35 | #include "acl.h" | ||
36 | |||
37 | MODULE_AUTHOR("Remy Card and others"); | ||
38 | MODULE_DESCRIPTION("Second Extended Filesystem"); | ||
39 | MODULE_LICENSE("GPL"); | ||
40 | |||
41 | static int ext2_update_inode(struct inode * inode, int do_sync); | ||
42 | |||
43 | /* | ||
44 | * Test whether an inode is a fast symlink. | ||
45 | */ | ||
46 | static inline int ext2_inode_is_fast_symlink(struct inode *inode) | ||
47 | { | ||
48 | int ea_blocks = EXT2_I(inode)->i_file_acl ? | ||
49 | (inode->i_sb->s_blocksize >> 9) : 0; | ||
50 | |||
51 | return (S_ISLNK(inode->i_mode) && | ||
52 | inode->i_blocks - ea_blocks == 0); | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Called at the last iput() if i_nlink is zero. | ||
57 | */ | ||
58 | void ext2_delete_inode (struct inode * inode) | ||
59 | { | ||
60 | if (is_bad_inode(inode)) | ||
61 | goto no_delete; | ||
62 | EXT2_I(inode)->i_dtime = get_seconds(); | ||
63 | mark_inode_dirty(inode); | ||
64 | ext2_update_inode(inode, inode_needs_sync(inode)); | ||
65 | |||
66 | inode->i_size = 0; | ||
67 | if (inode->i_blocks) | ||
68 | ext2_truncate (inode); | ||
69 | ext2_free_inode (inode); | ||
70 | |||
71 | return; | ||
72 | no_delete: | ||
73 | clear_inode(inode); /* We must guarantee clearing of inode... */ | ||
74 | } | ||
75 | |||
76 | void ext2_discard_prealloc (struct inode * inode) | ||
77 | { | ||
78 | #ifdef EXT2_PREALLOCATE | ||
79 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
80 | write_lock(&ei->i_meta_lock); | ||
81 | if (ei->i_prealloc_count) { | ||
82 | unsigned short total = ei->i_prealloc_count; | ||
83 | unsigned long block = ei->i_prealloc_block; | ||
84 | ei->i_prealloc_count = 0; | ||
85 | ei->i_prealloc_block = 0; | ||
86 | write_unlock(&ei->i_meta_lock); | ||
87 | ext2_free_blocks (inode, block, total); | ||
88 | return; | ||
89 | } else | ||
90 | write_unlock(&ei->i_meta_lock); | ||
91 | #endif | ||
92 | } | ||
93 | |||
94 | static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) | ||
95 | { | ||
96 | #ifdef EXT2FS_DEBUG | ||
97 | static unsigned long alloc_hits, alloc_attempts; | ||
98 | #endif | ||
99 | unsigned long result; | ||
100 | |||
101 | |||
102 | #ifdef EXT2_PREALLOCATE | ||
103 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
104 | write_lock(&ei->i_meta_lock); | ||
105 | if (ei->i_prealloc_count && | ||
106 | (goal == ei->i_prealloc_block || goal + 1 == ei->i_prealloc_block)) | ||
107 | { | ||
108 | result = ei->i_prealloc_block++; | ||
109 | ei->i_prealloc_count--; | ||
110 | write_unlock(&ei->i_meta_lock); | ||
111 | ext2_debug ("preallocation hit (%lu/%lu).\n", | ||
112 | ++alloc_hits, ++alloc_attempts); | ||
113 | } else { | ||
114 | write_unlock(&ei->i_meta_lock); | ||
115 | ext2_discard_prealloc (inode); | ||
116 | ext2_debug ("preallocation miss (%lu/%lu).\n", | ||
117 | alloc_hits, ++alloc_attempts); | ||
118 | if (S_ISREG(inode->i_mode)) | ||
119 | result = ext2_new_block (inode, goal, | ||
120 | &ei->i_prealloc_count, | ||
121 | &ei->i_prealloc_block, err); | ||
122 | else | ||
123 | result = ext2_new_block(inode, goal, NULL, NULL, err); | ||
124 | } | ||
125 | #else | ||
126 | result = ext2_new_block (inode, goal, 0, 0, err); | ||
127 | #endif | ||
128 | return result; | ||
129 | } | ||
130 | |||
131 | typedef struct { | ||
132 | __le32 *p; | ||
133 | __le32 key; | ||
134 | struct buffer_head *bh; | ||
135 | } Indirect; | ||
136 | |||
137 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | ||
138 | { | ||
139 | p->key = *(p->p = v); | ||
140 | p->bh = bh; | ||
141 | } | ||
142 | |||
143 | static inline int verify_chain(Indirect *from, Indirect *to) | ||
144 | { | ||
145 | while (from <= to && from->key == *from->p) | ||
146 | from++; | ||
147 | return (from > to); | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * ext2_block_to_path - parse the block number into array of offsets | ||
152 | * @inode: inode in question (we are only interested in its superblock) | ||
153 | * @i_block: block number to be parsed | ||
154 | * @offsets: array to store the offsets in | ||
155 | * @boundary: set this non-zero if the referred-to block is likely to be | ||
156 | * followed (on disk) by an indirect block. | ||
157 | * To store the locations of file's data ext2 uses a data structure common | ||
158 | * for UNIX filesystems - tree of pointers anchored in the inode, with | ||
159 | * data blocks at leaves and indirect blocks in intermediate nodes. | ||
160 | * This function translates the block number into path in that tree - | ||
161 | * return value is the path length and @offsets[n] is the offset of | ||
162 | * pointer to (n+1)th node in the nth one. If @block is out of range | ||
163 | * (negative or too large) warning is printed and zero returned. | ||
164 | * | ||
165 | * Note: function doesn't find node addresses, so no IO is needed. All | ||
166 | * we need to know is the capacity of indirect blocks (taken from the | ||
167 | * inode->i_sb). | ||
168 | */ | ||
169 | |||
170 | /* | ||
171 | * Portability note: the last comparison (check that we fit into triple | ||
172 | * indirect block) is spelled differently, because otherwise on an | ||
173 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | ||
174 | * if our filesystem had 8Kb blocks. We might use long long, but that would | ||
175 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | ||
176 | * i_block would have to be negative in the very beginning, so we would not | ||
177 | * get there at all. | ||
178 | */ | ||
179 | |||
180 | static int ext2_block_to_path(struct inode *inode, | ||
181 | long i_block, int offsets[4], int *boundary) | ||
182 | { | ||
183 | int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb); | ||
184 | int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); | ||
185 | const long direct_blocks = EXT2_NDIR_BLOCKS, | ||
186 | indirect_blocks = ptrs, | ||
187 | double_blocks = (1 << (ptrs_bits * 2)); | ||
188 | int n = 0; | ||
189 | int final = 0; | ||
190 | |||
191 | if (i_block < 0) { | ||
192 | ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0"); | ||
193 | } else if (i_block < direct_blocks) { | ||
194 | offsets[n++] = i_block; | ||
195 | final = direct_blocks; | ||
196 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { | ||
197 | offsets[n++] = EXT2_IND_BLOCK; | ||
198 | offsets[n++] = i_block; | ||
199 | final = ptrs; | ||
200 | } else if ((i_block -= indirect_blocks) < double_blocks) { | ||
201 | offsets[n++] = EXT2_DIND_BLOCK; | ||
202 | offsets[n++] = i_block >> ptrs_bits; | ||
203 | offsets[n++] = i_block & (ptrs - 1); | ||
204 | final = ptrs; | ||
205 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | ||
206 | offsets[n++] = EXT2_TIND_BLOCK; | ||
207 | offsets[n++] = i_block >> (ptrs_bits * 2); | ||
208 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | ||
209 | offsets[n++] = i_block & (ptrs - 1); | ||
210 | final = ptrs; | ||
211 | } else { | ||
212 | ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big"); | ||
213 | } | ||
214 | if (boundary) | ||
215 | *boundary = (i_block & (ptrs - 1)) == (final - 1); | ||
216 | return n; | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * ext2_get_branch - read the chain of indirect blocks leading to data | ||
221 | * @inode: inode in question | ||
222 | * @depth: depth of the chain (1 - direct pointer, etc.) | ||
223 | * @offsets: offsets of pointers in inode/indirect blocks | ||
224 | * @chain: place to store the result | ||
225 | * @err: here we store the error value | ||
226 | * | ||
227 | * Function fills the array of triples <key, p, bh> and returns %NULL | ||
228 | * if everything went OK or the pointer to the last filled triple | ||
229 | * (incomplete one) otherwise. Upon the return chain[i].key contains | ||
230 | * the number of (i+1)-th block in the chain (as it is stored in memory, | ||
231 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | ||
232 | * number (it points into struct inode for i==0 and into the bh->b_data | ||
233 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | ||
234 | * block for i>0 and NULL for i==0. In other words, it holds the block | ||
235 | * numbers of the chain, addresses they were taken from (and where we can | ||
236 | * verify that chain did not change) and buffer_heads hosting these | ||
237 | * numbers. | ||
238 | * | ||
239 | * Function stops when it stumbles upon zero pointer (absent block) | ||
240 | * (pointer to last triple returned, *@err == 0) | ||
241 | * or when it gets an IO error reading an indirect block | ||
242 | * (ditto, *@err == -EIO) | ||
243 | * or when it notices that chain had been changed while it was reading | ||
244 | * (ditto, *@err == -EAGAIN) | ||
245 | * or when it reads all @depth-1 indirect blocks successfully and finds | ||
246 | * the whole chain, all way to the data (returns %NULL, *err == 0). | ||
247 | */ | ||
248 | static Indirect *ext2_get_branch(struct inode *inode, | ||
249 | int depth, | ||
250 | int *offsets, | ||
251 | Indirect chain[4], | ||
252 | int *err) | ||
253 | { | ||
254 | struct super_block *sb = inode->i_sb; | ||
255 | Indirect *p = chain; | ||
256 | struct buffer_head *bh; | ||
257 | |||
258 | *err = 0; | ||
259 | /* i_data is not going away, no lock needed */ | ||
260 | add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets); | ||
261 | if (!p->key) | ||
262 | goto no_block; | ||
263 | while (--depth) { | ||
264 | bh = sb_bread(sb, le32_to_cpu(p->key)); | ||
265 | if (!bh) | ||
266 | goto failure; | ||
267 | read_lock(&EXT2_I(inode)->i_meta_lock); | ||
268 | if (!verify_chain(chain, p)) | ||
269 | goto changed; | ||
270 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | ||
271 | read_unlock(&EXT2_I(inode)->i_meta_lock); | ||
272 | if (!p->key) | ||
273 | goto no_block; | ||
274 | } | ||
275 | return NULL; | ||
276 | |||
277 | changed: | ||
278 | read_unlock(&EXT2_I(inode)->i_meta_lock); | ||
279 | brelse(bh); | ||
280 | *err = -EAGAIN; | ||
281 | goto no_block; | ||
282 | failure: | ||
283 | *err = -EIO; | ||
284 | no_block: | ||
285 | return p; | ||
286 | } | ||
287 | |||
288 | /** | ||
289 | * ext2_find_near - find a place for allocation with sufficient locality | ||
290 | * @inode: owner | ||
291 | * @ind: descriptor of indirect block. | ||
292 | * | ||
293 | * This function returns the prefered place for block allocation. | ||
294 | * It is used when heuristic for sequential allocation fails. | ||
295 | * Rules are: | ||
296 | * + if there is a block to the left of our position - allocate near it. | ||
297 | * + if pointer will live in indirect block - allocate near that block. | ||
298 | * + if pointer will live in inode - allocate in the same cylinder group. | ||
299 | * | ||
300 | * In the latter case we colour the starting block by the callers PID to | ||
301 | * prevent it from clashing with concurrent allocations for a different inode | ||
302 | * in the same block group. The PID is used here so that functionally related | ||
303 | * files will be close-by on-disk. | ||
304 | * | ||
305 | * Caller must make sure that @ind is valid and will stay that way. | ||
306 | */ | ||
307 | |||
308 | static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) | ||
309 | { | ||
310 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
311 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; | ||
312 | __le32 *p; | ||
313 | unsigned long bg_start; | ||
314 | unsigned long colour; | ||
315 | |||
316 | /* Try to find previous block */ | ||
317 | for (p = ind->p - 1; p >= start; p--) | ||
318 | if (*p) | ||
319 | return le32_to_cpu(*p); | ||
320 | |||
321 | /* No such thing, so let's try location of indirect block */ | ||
322 | if (ind->bh) | ||
323 | return ind->bh->b_blocknr; | ||
324 | |||
325 | /* | ||
326 | * It is going to be refered from inode itself? OK, just put it into | ||
327 | * the same cylinder group then. | ||
328 | */ | ||
329 | bg_start = (ei->i_block_group * EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + | ||
330 | le32_to_cpu(EXT2_SB(inode->i_sb)->s_es->s_first_data_block); | ||
331 | colour = (current->pid % 16) * | ||
332 | (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16); | ||
333 | return bg_start + colour; | ||
334 | } | ||
335 | |||
336 | /** | ||
337 | * ext2_find_goal - find a prefered place for allocation. | ||
338 | * @inode: owner | ||
339 | * @block: block we want | ||
340 | * @chain: chain of indirect blocks | ||
341 | * @partial: pointer to the last triple within a chain | ||
342 | * @goal: place to store the result. | ||
343 | * | ||
344 | * Normally this function find the prefered place for block allocation, | ||
345 | * stores it in *@goal and returns zero. If the branch had been changed | ||
346 | * under us we return -EAGAIN. | ||
347 | */ | ||
348 | |||
349 | static inline int ext2_find_goal(struct inode *inode, | ||
350 | long block, | ||
351 | Indirect chain[4], | ||
352 | Indirect *partial, | ||
353 | unsigned long *goal) | ||
354 | { | ||
355 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
356 | write_lock(&ei->i_meta_lock); | ||
357 | if ((block == ei->i_next_alloc_block + 1) && ei->i_next_alloc_goal) { | ||
358 | ei->i_next_alloc_block++; | ||
359 | ei->i_next_alloc_goal++; | ||
360 | } | ||
361 | if (verify_chain(chain, partial)) { | ||
362 | /* | ||
363 | * try the heuristic for sequential allocation, | ||
364 | * failing that at least try to get decent locality. | ||
365 | */ | ||
366 | if (block == ei->i_next_alloc_block) | ||
367 | *goal = ei->i_next_alloc_goal; | ||
368 | if (!*goal) | ||
369 | *goal = ext2_find_near(inode, partial); | ||
370 | write_unlock(&ei->i_meta_lock); | ||
371 | return 0; | ||
372 | } | ||
373 | write_unlock(&ei->i_meta_lock); | ||
374 | return -EAGAIN; | ||
375 | } | ||
376 | |||
377 | /** | ||
378 | * ext2_alloc_branch - allocate and set up a chain of blocks. | ||
379 | * @inode: owner | ||
380 | * @num: depth of the chain (number of blocks to allocate) | ||
381 | * @offsets: offsets (in the blocks) to store the pointers to next. | ||
382 | * @branch: place to store the chain in. | ||
383 | * | ||
384 | * This function allocates @num blocks, zeroes out all but the last one, | ||
385 | * links them into chain and (if we are synchronous) writes them to disk. | ||
386 | * In other words, it prepares a branch that can be spliced onto the | ||
387 | * inode. It stores the information about that chain in the branch[], in | ||
388 | * the same format as ext2_get_branch() would do. We are calling it after | ||
389 | * we had read the existing part of chain and partial points to the last | ||
390 | * triple of that (one with zero ->key). Upon the exit we have the same | ||
391 | * picture as after the successful ext2_get_block(), excpet that in one | ||
392 | * place chain is disconnected - *branch->p is still zero (we did not | ||
393 | * set the last link), but branch->key contains the number that should | ||
394 | * be placed into *branch->p to fill that gap. | ||
395 | * | ||
396 | * If allocation fails we free all blocks we've allocated (and forget | ||
397 | * their buffer_heads) and return the error value the from failed | ||
398 | * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain | ||
399 | * as described above and return 0. | ||
400 | */ | ||
401 | |||
402 | static int ext2_alloc_branch(struct inode *inode, | ||
403 | int num, | ||
404 | unsigned long goal, | ||
405 | int *offsets, | ||
406 | Indirect *branch) | ||
407 | { | ||
408 | int blocksize = inode->i_sb->s_blocksize; | ||
409 | int n = 0; | ||
410 | int err; | ||
411 | int i; | ||
412 | int parent = ext2_alloc_block(inode, goal, &err); | ||
413 | |||
414 | branch[0].key = cpu_to_le32(parent); | ||
415 | if (parent) for (n = 1; n < num; n++) { | ||
416 | struct buffer_head *bh; | ||
417 | /* Allocate the next block */ | ||
418 | int nr = ext2_alloc_block(inode, parent, &err); | ||
419 | if (!nr) | ||
420 | break; | ||
421 | branch[n].key = cpu_to_le32(nr); | ||
422 | /* | ||
423 | * Get buffer_head for parent block, zero it out and set | ||
424 | * the pointer to new one, then send parent to disk. | ||
425 | */ | ||
426 | bh = sb_getblk(inode->i_sb, parent); | ||
427 | lock_buffer(bh); | ||
428 | memset(bh->b_data, 0, blocksize); | ||
429 | branch[n].bh = bh; | ||
430 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | ||
431 | *branch[n].p = branch[n].key; | ||
432 | set_buffer_uptodate(bh); | ||
433 | unlock_buffer(bh); | ||
434 | mark_buffer_dirty_inode(bh, inode); | ||
435 | /* We used to sync bh here if IS_SYNC(inode). | ||
436 | * But we now rely upon generic_osync_inode() | ||
437 | * and b_inode_buffers. But not for directories. | ||
438 | */ | ||
439 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) | ||
440 | sync_dirty_buffer(bh); | ||
441 | parent = nr; | ||
442 | } | ||
443 | if (n == num) | ||
444 | return 0; | ||
445 | |||
446 | /* Allocation failed, free what we already allocated */ | ||
447 | for (i = 1; i < n; i++) | ||
448 | bforget(branch[i].bh); | ||
449 | for (i = 0; i < n; i++) | ||
450 | ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1); | ||
451 | return err; | ||
452 | } | ||
453 | |||
454 | /** | ||
455 | * ext2_splice_branch - splice the allocated branch onto inode. | ||
456 | * @inode: owner | ||
457 | * @block: (logical) number of block we are adding | ||
458 | * @chain: chain of indirect blocks (with a missing link - see | ||
459 | * ext2_alloc_branch) | ||
460 | * @where: location of missing link | ||
461 | * @num: number of blocks we are adding | ||
462 | * | ||
463 | * This function verifies that chain (up to the missing link) had not | ||
464 | * changed, fills the missing link and does all housekeeping needed in | ||
465 | * inode (->i_blocks, etc.). In case of success we end up with the full | ||
466 | * chain to new block and return 0. Otherwise (== chain had been changed) | ||
467 | * we free the new blocks (forgetting their buffer_heads, indeed) and | ||
468 | * return -EAGAIN. | ||
469 | */ | ||
470 | |||
471 | static inline int ext2_splice_branch(struct inode *inode, | ||
472 | long block, | ||
473 | Indirect chain[4], | ||
474 | Indirect *where, | ||
475 | int num) | ||
476 | { | ||
477 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
478 | int i; | ||
479 | |||
480 | /* Verify that place we are splicing to is still there and vacant */ | ||
481 | |||
482 | write_lock(&ei->i_meta_lock); | ||
483 | if (!verify_chain(chain, where-1) || *where->p) | ||
484 | goto changed; | ||
485 | |||
486 | /* That's it */ | ||
487 | |||
488 | *where->p = where->key; | ||
489 | ei->i_next_alloc_block = block; | ||
490 | ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); | ||
491 | |||
492 | write_unlock(&ei->i_meta_lock); | ||
493 | |||
494 | /* We are done with atomic stuff, now do the rest of housekeeping */ | ||
495 | |||
496 | inode->i_ctime = CURRENT_TIME_SEC; | ||
497 | |||
498 | /* had we spliced it onto indirect block? */ | ||
499 | if (where->bh) | ||
500 | mark_buffer_dirty_inode(where->bh, inode); | ||
501 | |||
502 | mark_inode_dirty(inode); | ||
503 | return 0; | ||
504 | |||
505 | changed: | ||
506 | write_unlock(&ei->i_meta_lock); | ||
507 | for (i = 1; i < num; i++) | ||
508 | bforget(where[i].bh); | ||
509 | for (i = 0; i < num; i++) | ||
510 | ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1); | ||
511 | return -EAGAIN; | ||
512 | } | ||
513 | |||
514 | /* | ||
515 | * Allocation strategy is simple: if we have to allocate something, we will | ||
516 | * have to go the whole way to leaf. So let's do it before attaching anything | ||
517 | * to tree, set linkage between the newborn blocks, write them if sync is | ||
518 | * required, recheck the path, free and repeat if check fails, otherwise | ||
519 | * set the last missing link (that will protect us from any truncate-generated | ||
520 | * removals - all blocks on the path are immune now) and possibly force the | ||
521 | * write on the parent block. | ||
522 | * That has a nice additional property: no special recovery from the failed | ||
523 | * allocations is needed - we simply release blocks and do not touch anything | ||
524 | * reachable from inode. | ||
525 | */ | ||
526 | |||
527 | int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) | ||
528 | { | ||
529 | int err = -EIO; | ||
530 | int offsets[4]; | ||
531 | Indirect chain[4]; | ||
532 | Indirect *partial; | ||
533 | unsigned long goal; | ||
534 | int left; | ||
535 | int boundary = 0; | ||
536 | int depth = ext2_block_to_path(inode, iblock, offsets, &boundary); | ||
537 | |||
538 | if (depth == 0) | ||
539 | goto out; | ||
540 | |||
541 | reread: | ||
542 | partial = ext2_get_branch(inode, depth, offsets, chain, &err); | ||
543 | |||
544 | /* Simplest case - block found, no allocation needed */ | ||
545 | if (!partial) { | ||
546 | got_it: | ||
547 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | ||
548 | if (boundary) | ||
549 | set_buffer_boundary(bh_result); | ||
550 | /* Clean up and exit */ | ||
551 | partial = chain+depth-1; /* the whole chain */ | ||
552 | goto cleanup; | ||
553 | } | ||
554 | |||
555 | /* Next simple case - plain lookup or failed read of indirect block */ | ||
556 | if (!create || err == -EIO) { | ||
557 | cleanup: | ||
558 | while (partial > chain) { | ||
559 | brelse(partial->bh); | ||
560 | partial--; | ||
561 | } | ||
562 | out: | ||
563 | return err; | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * Indirect block might be removed by truncate while we were | ||
568 | * reading it. Handling of that case (forget what we've got and | ||
569 | * reread) is taken out of the main path. | ||
570 | */ | ||
571 | if (err == -EAGAIN) | ||
572 | goto changed; | ||
573 | |||
574 | goal = 0; | ||
575 | if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0) | ||
576 | goto changed; | ||
577 | |||
578 | left = (chain + depth) - partial; | ||
579 | err = ext2_alloc_branch(inode, left, goal, | ||
580 | offsets+(partial-chain), partial); | ||
581 | if (err) | ||
582 | goto cleanup; | ||
583 | |||
584 | if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) | ||
585 | goto changed; | ||
586 | |||
587 | set_buffer_new(bh_result); | ||
588 | goto got_it; | ||
589 | |||
590 | changed: | ||
591 | while (partial > chain) { | ||
592 | brelse(partial->bh); | ||
593 | partial--; | ||
594 | } | ||
595 | goto reread; | ||
596 | } | ||
597 | |||
598 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) | ||
599 | { | ||
600 | return block_write_full_page(page, ext2_get_block, wbc); | ||
601 | } | ||
602 | |||
603 | static int ext2_readpage(struct file *file, struct page *page) | ||
604 | { | ||
605 | return mpage_readpage(page, ext2_get_block); | ||
606 | } | ||
607 | |||
608 | static int | ||
609 | ext2_readpages(struct file *file, struct address_space *mapping, | ||
610 | struct list_head *pages, unsigned nr_pages) | ||
611 | { | ||
612 | return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); | ||
613 | } | ||
614 | |||
615 | static int | ||
616 | ext2_prepare_write(struct file *file, struct page *page, | ||
617 | unsigned from, unsigned to) | ||
618 | { | ||
619 | return block_prepare_write(page,from,to,ext2_get_block); | ||
620 | } | ||
621 | |||
622 | static int | ||
623 | ext2_nobh_prepare_write(struct file *file, struct page *page, | ||
624 | unsigned from, unsigned to) | ||
625 | { | ||
626 | return nobh_prepare_write(page,from,to,ext2_get_block); | ||
627 | } | ||
628 | |||
629 | static int ext2_nobh_writepage(struct page *page, | ||
630 | struct writeback_control *wbc) | ||
631 | { | ||
632 | return nobh_writepage(page, ext2_get_block, wbc); | ||
633 | } | ||
634 | |||
635 | static sector_t ext2_bmap(struct address_space *mapping, sector_t block) | ||
636 | { | ||
637 | return generic_block_bmap(mapping,block,ext2_get_block); | ||
638 | } | ||
639 | |||
640 | static int | ||
641 | ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, | ||
642 | struct buffer_head *bh_result, int create) | ||
643 | { | ||
644 | int ret; | ||
645 | |||
646 | ret = ext2_get_block(inode, iblock, bh_result, create); | ||
647 | if (ret == 0) | ||
648 | bh_result->b_size = (1 << inode->i_blkbits); | ||
649 | return ret; | ||
650 | } | ||
651 | |||
652 | static ssize_t | ||
653 | ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | ||
654 | loff_t offset, unsigned long nr_segs) | ||
655 | { | ||
656 | struct file *file = iocb->ki_filp; | ||
657 | struct inode *inode = file->f_mapping->host; | ||
658 | |||
659 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | ||
660 | offset, nr_segs, ext2_get_blocks, NULL); | ||
661 | } | ||
662 | |||
663 | static int | ||
664 | ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) | ||
665 | { | ||
666 | return mpage_writepages(mapping, wbc, ext2_get_block); | ||
667 | } | ||
668 | |||
669 | struct address_space_operations ext2_aops = { | ||
670 | .readpage = ext2_readpage, | ||
671 | .readpages = ext2_readpages, | ||
672 | .writepage = ext2_writepage, | ||
673 | .sync_page = block_sync_page, | ||
674 | .prepare_write = ext2_prepare_write, | ||
675 | .commit_write = generic_commit_write, | ||
676 | .bmap = ext2_bmap, | ||
677 | .direct_IO = ext2_direct_IO, | ||
678 | .writepages = ext2_writepages, | ||
679 | }; | ||
680 | |||
681 | struct address_space_operations ext2_nobh_aops = { | ||
682 | .readpage = ext2_readpage, | ||
683 | .readpages = ext2_readpages, | ||
684 | .writepage = ext2_nobh_writepage, | ||
685 | .sync_page = block_sync_page, | ||
686 | .prepare_write = ext2_nobh_prepare_write, | ||
687 | .commit_write = nobh_commit_write, | ||
688 | .bmap = ext2_bmap, | ||
689 | .direct_IO = ext2_direct_IO, | ||
690 | .writepages = ext2_writepages, | ||
691 | }; | ||
692 | |||
693 | /* | ||
694 | * Probably it should be a library function... search for first non-zero word | ||
695 | * or memcmp with zero_page, whatever is better for particular architecture. | ||
696 | * Linus? | ||
697 | */ | ||
698 | static inline int all_zeroes(__le32 *p, __le32 *q) | ||
699 | { | ||
700 | while (p < q) | ||
701 | if (*p++) | ||
702 | return 0; | ||
703 | return 1; | ||
704 | } | ||
705 | |||
706 | /** | ||
707 | * ext2_find_shared - find the indirect blocks for partial truncation. | ||
708 | * @inode: inode in question | ||
709 | * @depth: depth of the affected branch | ||
710 | * @offsets: offsets of pointers in that branch (see ext2_block_to_path) | ||
711 | * @chain: place to store the pointers to partial indirect blocks | ||
712 | * @top: place to the (detached) top of branch | ||
713 | * | ||
714 | * This is a helper function used by ext2_truncate(). | ||
715 | * | ||
716 | * When we do truncate() we may have to clean the ends of several indirect | ||
717 | * blocks but leave the blocks themselves alive. Block is partially | ||
718 | * truncated if some data below the new i_size is refered from it (and | ||
719 | * it is on the path to the first completely truncated data block, indeed). | ||
720 | * We have to free the top of that path along with everything to the right | ||
721 | * of the path. Since no allocation past the truncation point is possible | ||
722 | * until ext2_truncate() finishes, we may safely do the latter, but top | ||
723 | * of branch may require special attention - pageout below the truncation | ||
724 | * point might try to populate it. | ||
725 | * | ||
726 | * We atomically detach the top of branch from the tree, store the block | ||
727 | * number of its root in *@top, pointers to buffer_heads of partially | ||
728 | * truncated blocks - in @chain[].bh and pointers to their last elements | ||
729 | * that should not be removed - in @chain[].p. Return value is the pointer | ||
730 | * to last filled element of @chain. | ||
731 | * | ||
732 | * The work left to caller to do the actual freeing of subtrees: | ||
733 | * a) free the subtree starting from *@top | ||
734 | * b) free the subtrees whose roots are stored in | ||
735 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | ||
736 | * c) free the subtrees growing from the inode past the @chain[0].p | ||
737 | * (no partially truncated stuff there). | ||
738 | */ | ||
739 | |||
740 | static Indirect *ext2_find_shared(struct inode *inode, | ||
741 | int depth, | ||
742 | int offsets[4], | ||
743 | Indirect chain[4], | ||
744 | __le32 *top) | ||
745 | { | ||
746 | Indirect *partial, *p; | ||
747 | int k, err; | ||
748 | |||
749 | *top = 0; | ||
750 | for (k = depth; k > 1 && !offsets[k-1]; k--) | ||
751 | ; | ||
752 | partial = ext2_get_branch(inode, k, offsets, chain, &err); | ||
753 | if (!partial) | ||
754 | partial = chain + k-1; | ||
755 | /* | ||
756 | * If the branch acquired continuation since we've looked at it - | ||
757 | * fine, it should all survive and (new) top doesn't belong to us. | ||
758 | */ | ||
759 | write_lock(&EXT2_I(inode)->i_meta_lock); | ||
760 | if (!partial->key && *partial->p) { | ||
761 | write_unlock(&EXT2_I(inode)->i_meta_lock); | ||
762 | goto no_top; | ||
763 | } | ||
764 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) | ||
765 | ; | ||
766 | /* | ||
767 | * OK, we've found the last block that must survive. The rest of our | ||
768 | * branch should be detached before unlocking. However, if that rest | ||
769 | * of branch is all ours and does not grow immediately from the inode | ||
770 | * it's easier to cheat and just decrement partial->p. | ||
771 | */ | ||
772 | if (p == chain + k - 1 && p > chain) { | ||
773 | p->p--; | ||
774 | } else { | ||
775 | *top = *p->p; | ||
776 | *p->p = 0; | ||
777 | } | ||
778 | write_unlock(&EXT2_I(inode)->i_meta_lock); | ||
779 | |||
780 | while(partial > p) | ||
781 | { | ||
782 | brelse(partial->bh); | ||
783 | partial--; | ||
784 | } | ||
785 | no_top: | ||
786 | return partial; | ||
787 | } | ||
788 | |||
789 | /** | ||
790 | * ext2_free_data - free a list of data blocks | ||
791 | * @inode: inode we are dealing with | ||
792 | * @p: array of block numbers | ||
793 | * @q: points immediately past the end of array | ||
794 | * | ||
795 | * We are freeing all blocks refered from that array (numbers are | ||
796 | * stored as little-endian 32-bit) and updating @inode->i_blocks | ||
797 | * appropriately. | ||
798 | */ | ||
799 | static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) | ||
800 | { | ||
801 | unsigned long block_to_free = 0, count = 0; | ||
802 | unsigned long nr; | ||
803 | |||
804 | for ( ; p < q ; p++) { | ||
805 | nr = le32_to_cpu(*p); | ||
806 | if (nr) { | ||
807 | *p = 0; | ||
808 | /* accumulate blocks to free if they're contiguous */ | ||
809 | if (count == 0) | ||
810 | goto free_this; | ||
811 | else if (block_to_free == nr - count) | ||
812 | count++; | ||
813 | else { | ||
814 | mark_inode_dirty(inode); | ||
815 | ext2_free_blocks (inode, block_to_free, count); | ||
816 | free_this: | ||
817 | block_to_free = nr; | ||
818 | count = 1; | ||
819 | } | ||
820 | } | ||
821 | } | ||
822 | if (count > 0) { | ||
823 | mark_inode_dirty(inode); | ||
824 | ext2_free_blocks (inode, block_to_free, count); | ||
825 | } | ||
826 | } | ||
827 | |||
828 | /** | ||
829 | * ext2_free_branches - free an array of branches | ||
830 | * @inode: inode we are dealing with | ||
831 | * @p: array of block numbers | ||
832 | * @q: pointer immediately past the end of array | ||
833 | * @depth: depth of the branches to free | ||
834 | * | ||
835 | * We are freeing all blocks refered from these branches (numbers are | ||
836 | * stored as little-endian 32-bit) and updating @inode->i_blocks | ||
837 | * appropriately. | ||
838 | */ | ||
839 | static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth) | ||
840 | { | ||
841 | struct buffer_head * bh; | ||
842 | unsigned long nr; | ||
843 | |||
844 | if (depth--) { | ||
845 | int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); | ||
846 | for ( ; p < q ; p++) { | ||
847 | nr = le32_to_cpu(*p); | ||
848 | if (!nr) | ||
849 | continue; | ||
850 | *p = 0; | ||
851 | bh = sb_bread(inode->i_sb, nr); | ||
852 | /* | ||
853 | * A read failure? Report error and clear slot | ||
854 | * (should be rare). | ||
855 | */ | ||
856 | if (!bh) { | ||
857 | ext2_error(inode->i_sb, "ext2_free_branches", | ||
858 | "Read failure, inode=%ld, block=%ld", | ||
859 | inode->i_ino, nr); | ||
860 | continue; | ||
861 | } | ||
862 | ext2_free_branches(inode, | ||
863 | (__le32*)bh->b_data, | ||
864 | (__le32*)bh->b_data + addr_per_block, | ||
865 | depth); | ||
866 | bforget(bh); | ||
867 | ext2_free_blocks(inode, nr, 1); | ||
868 | mark_inode_dirty(inode); | ||
869 | } | ||
870 | } else | ||
871 | ext2_free_data(inode, p, q); | ||
872 | } | ||
873 | |||
874 | void ext2_truncate (struct inode * inode) | ||
875 | { | ||
876 | __le32 *i_data = EXT2_I(inode)->i_data; | ||
877 | int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); | ||
878 | int offsets[4]; | ||
879 | Indirect chain[4]; | ||
880 | Indirect *partial; | ||
881 | __le32 nr = 0; | ||
882 | int n; | ||
883 | long iblock; | ||
884 | unsigned blocksize; | ||
885 | |||
886 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
887 | S_ISLNK(inode->i_mode))) | ||
888 | return; | ||
889 | if (ext2_inode_is_fast_symlink(inode)) | ||
890 | return; | ||
891 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
892 | return; | ||
893 | |||
894 | ext2_discard_prealloc(inode); | ||
895 | |||
896 | blocksize = inode->i_sb->s_blocksize; | ||
897 | iblock = (inode->i_size + blocksize-1) | ||
898 | >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); | ||
899 | |||
900 | if (test_opt(inode->i_sb, NOBH)) | ||
901 | nobh_truncate_page(inode->i_mapping, inode->i_size); | ||
902 | else | ||
903 | block_truncate_page(inode->i_mapping, | ||
904 | inode->i_size, ext2_get_block); | ||
905 | |||
906 | n = ext2_block_to_path(inode, iblock, offsets, NULL); | ||
907 | if (n == 0) | ||
908 | return; | ||
909 | |||
910 | if (n == 1) { | ||
911 | ext2_free_data(inode, i_data+offsets[0], | ||
912 | i_data + EXT2_NDIR_BLOCKS); | ||
913 | goto do_indirects; | ||
914 | } | ||
915 | |||
916 | partial = ext2_find_shared(inode, n, offsets, chain, &nr); | ||
917 | /* Kill the top of shared branch (already detached) */ | ||
918 | if (nr) { | ||
919 | if (partial == chain) | ||
920 | mark_inode_dirty(inode); | ||
921 | else | ||
922 | mark_buffer_dirty_inode(partial->bh, inode); | ||
923 | ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); | ||
924 | } | ||
925 | /* Clear the ends of indirect blocks on the shared branch */ | ||
926 | while (partial > chain) { | ||
927 | ext2_free_branches(inode, | ||
928 | partial->p + 1, | ||
929 | (__le32*)partial->bh->b_data+addr_per_block, | ||
930 | (chain+n-1) - partial); | ||
931 | mark_buffer_dirty_inode(partial->bh, inode); | ||
932 | brelse (partial->bh); | ||
933 | partial--; | ||
934 | } | ||
935 | do_indirects: | ||
936 | /* Kill the remaining (whole) subtrees */ | ||
937 | switch (offsets[0]) { | ||
938 | default: | ||
939 | nr = i_data[EXT2_IND_BLOCK]; | ||
940 | if (nr) { | ||
941 | i_data[EXT2_IND_BLOCK] = 0; | ||
942 | mark_inode_dirty(inode); | ||
943 | ext2_free_branches(inode, &nr, &nr+1, 1); | ||
944 | } | ||
945 | case EXT2_IND_BLOCK: | ||
946 | nr = i_data[EXT2_DIND_BLOCK]; | ||
947 | if (nr) { | ||
948 | i_data[EXT2_DIND_BLOCK] = 0; | ||
949 | mark_inode_dirty(inode); | ||
950 | ext2_free_branches(inode, &nr, &nr+1, 2); | ||
951 | } | ||
952 | case EXT2_DIND_BLOCK: | ||
953 | nr = i_data[EXT2_TIND_BLOCK]; | ||
954 | if (nr) { | ||
955 | i_data[EXT2_TIND_BLOCK] = 0; | ||
956 | mark_inode_dirty(inode); | ||
957 | ext2_free_branches(inode, &nr, &nr+1, 3); | ||
958 | } | ||
959 | case EXT2_TIND_BLOCK: | ||
960 | ; | ||
961 | } | ||
962 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | ||
963 | if (inode_needs_sync(inode)) { | ||
964 | sync_mapping_buffers(inode->i_mapping); | ||
965 | ext2_sync_inode (inode); | ||
966 | } else { | ||
967 | mark_inode_dirty(inode); | ||
968 | } | ||
969 | } | ||
970 | |||
971 | static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, | ||
972 | struct buffer_head **p) | ||
973 | { | ||
974 | struct buffer_head * bh; | ||
975 | unsigned long block_group; | ||
976 | unsigned long block; | ||
977 | unsigned long offset; | ||
978 | struct ext2_group_desc * gdp; | ||
979 | |||
980 | *p = NULL; | ||
981 | if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) || | ||
982 | ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) | ||
983 | goto Einval; | ||
984 | |||
985 | block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); | ||
986 | gdp = ext2_get_group_desc(sb, block_group, &bh); | ||
987 | if (!gdp) | ||
988 | goto Egdp; | ||
989 | /* | ||
990 | * Figure out the offset within the block group inode table | ||
991 | */ | ||
992 | offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb); | ||
993 | block = le32_to_cpu(gdp->bg_inode_table) + | ||
994 | (offset >> EXT2_BLOCK_SIZE_BITS(sb)); | ||
995 | if (!(bh = sb_bread(sb, block))) | ||
996 | goto Eio; | ||
997 | |||
998 | *p = bh; | ||
999 | offset &= (EXT2_BLOCK_SIZE(sb) - 1); | ||
1000 | return (struct ext2_inode *) (bh->b_data + offset); | ||
1001 | |||
1002 | Einval: | ||
1003 | ext2_error(sb, "ext2_get_inode", "bad inode number: %lu", | ||
1004 | (unsigned long) ino); | ||
1005 | return ERR_PTR(-EINVAL); | ||
1006 | Eio: | ||
1007 | ext2_error(sb, "ext2_get_inode", | ||
1008 | "unable to read inode block - inode=%lu, block=%lu", | ||
1009 | (unsigned long) ino, block); | ||
1010 | Egdp: | ||
1011 | return ERR_PTR(-EIO); | ||
1012 | } | ||
1013 | |||
1014 | void ext2_set_inode_flags(struct inode *inode) | ||
1015 | { | ||
1016 | unsigned int flags = EXT2_I(inode)->i_flags; | ||
1017 | |||
1018 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | ||
1019 | if (flags & EXT2_SYNC_FL) | ||
1020 | inode->i_flags |= S_SYNC; | ||
1021 | if (flags & EXT2_APPEND_FL) | ||
1022 | inode->i_flags |= S_APPEND; | ||
1023 | if (flags & EXT2_IMMUTABLE_FL) | ||
1024 | inode->i_flags |= S_IMMUTABLE; | ||
1025 | if (flags & EXT2_NOATIME_FL) | ||
1026 | inode->i_flags |= S_NOATIME; | ||
1027 | if (flags & EXT2_DIRSYNC_FL) | ||
1028 | inode->i_flags |= S_DIRSYNC; | ||
1029 | } | ||
1030 | |||
1031 | void ext2_read_inode (struct inode * inode) | ||
1032 | { | ||
1033 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
1034 | ino_t ino = inode->i_ino; | ||
1035 | struct buffer_head * bh; | ||
1036 | struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); | ||
1037 | int n; | ||
1038 | |||
1039 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | ||
1040 | ei->i_acl = EXT2_ACL_NOT_CACHED; | ||
1041 | ei->i_default_acl = EXT2_ACL_NOT_CACHED; | ||
1042 | #endif | ||
1043 | if (IS_ERR(raw_inode)) | ||
1044 | goto bad_inode; | ||
1045 | |||
1046 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | ||
1047 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | ||
1048 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | ||
1049 | if (!(test_opt (inode->i_sb, NO_UID32))) { | ||
1050 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | ||
1051 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | ||
1052 | } | ||
1053 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | ||
1054 | inode->i_size = le32_to_cpu(raw_inode->i_size); | ||
1055 | inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); | ||
1056 | inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); | ||
1057 | inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); | ||
1058 | inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; | ||
1059 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | ||
1060 | /* We now have enough fields to check if the inode was active or not. | ||
1061 | * This is needed because nfsd might try to access dead inodes | ||
1062 | * the test is that same one that e2fsck uses | ||
1063 | * NeilBrown 1999oct15 | ||
1064 | */ | ||
1065 | if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { | ||
1066 | /* this inode is deleted */ | ||
1067 | brelse (bh); | ||
1068 | goto bad_inode; | ||
1069 | } | ||
1070 | inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ | ||
1071 | inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); | ||
1072 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | ||
1073 | ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); | ||
1074 | ei->i_frag_no = raw_inode->i_frag; | ||
1075 | ei->i_frag_size = raw_inode->i_fsize; | ||
1076 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); | ||
1077 | ei->i_dir_acl = 0; | ||
1078 | if (S_ISREG(inode->i_mode)) | ||
1079 | inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; | ||
1080 | else | ||
1081 | ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); | ||
1082 | ei->i_dtime = 0; | ||
1083 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | ||
1084 | ei->i_state = 0; | ||
1085 | ei->i_next_alloc_block = 0; | ||
1086 | ei->i_next_alloc_goal = 0; | ||
1087 | ei->i_prealloc_count = 0; | ||
1088 | ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); | ||
1089 | ei->i_dir_start_lookup = 0; | ||
1090 | |||
1091 | /* | ||
1092 | * NOTE! The in-memory inode i_data array is in little-endian order | ||
1093 | * even on big-endian machines: we do NOT byteswap the block numbers! | ||
1094 | */ | ||
1095 | for (n = 0; n < EXT2_N_BLOCKS; n++) | ||
1096 | ei->i_data[n] = raw_inode->i_block[n]; | ||
1097 | |||
1098 | if (S_ISREG(inode->i_mode)) { | ||
1099 | inode->i_op = &ext2_file_inode_operations; | ||
1100 | inode->i_fop = &ext2_file_operations; | ||
1101 | if (test_opt(inode->i_sb, NOBH)) | ||
1102 | inode->i_mapping->a_ops = &ext2_nobh_aops; | ||
1103 | else | ||
1104 | inode->i_mapping->a_ops = &ext2_aops; | ||
1105 | } else if (S_ISDIR(inode->i_mode)) { | ||
1106 | inode->i_op = &ext2_dir_inode_operations; | ||
1107 | inode->i_fop = &ext2_dir_operations; | ||
1108 | if (test_opt(inode->i_sb, NOBH)) | ||
1109 | inode->i_mapping->a_ops = &ext2_nobh_aops; | ||
1110 | else | ||
1111 | inode->i_mapping->a_ops = &ext2_aops; | ||
1112 | } else if (S_ISLNK(inode->i_mode)) { | ||
1113 | if (ext2_inode_is_fast_symlink(inode)) | ||
1114 | inode->i_op = &ext2_fast_symlink_inode_operations; | ||
1115 | else { | ||
1116 | inode->i_op = &ext2_symlink_inode_operations; | ||
1117 | if (test_opt(inode->i_sb, NOBH)) | ||
1118 | inode->i_mapping->a_ops = &ext2_nobh_aops; | ||
1119 | else | ||
1120 | inode->i_mapping->a_ops = &ext2_aops; | ||
1121 | } | ||
1122 | } else { | ||
1123 | inode->i_op = &ext2_special_inode_operations; | ||
1124 | if (raw_inode->i_block[0]) | ||
1125 | init_special_inode(inode, inode->i_mode, | ||
1126 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); | ||
1127 | else | ||
1128 | init_special_inode(inode, inode->i_mode, | ||
1129 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | ||
1130 | } | ||
1131 | brelse (bh); | ||
1132 | ext2_set_inode_flags(inode); | ||
1133 | return; | ||
1134 | |||
1135 | bad_inode: | ||
1136 | make_bad_inode(inode); | ||
1137 | return; | ||
1138 | } | ||
1139 | |||
1140 | static int ext2_update_inode(struct inode * inode, int do_sync) | ||
1141 | { | ||
1142 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
1143 | struct super_block *sb = inode->i_sb; | ||
1144 | ino_t ino = inode->i_ino; | ||
1145 | uid_t uid = inode->i_uid; | ||
1146 | gid_t gid = inode->i_gid; | ||
1147 | struct buffer_head * bh; | ||
1148 | struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); | ||
1149 | int n; | ||
1150 | int err = 0; | ||
1151 | |||
1152 | if (IS_ERR(raw_inode)) | ||
1153 | return -EIO; | ||
1154 | |||
1155 | /* For fields not not tracking in the in-memory inode, | ||
1156 | * initialise them to zero for new inodes. */ | ||
1157 | if (ei->i_state & EXT2_STATE_NEW) | ||
1158 | memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); | ||
1159 | |||
1160 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | ||
1161 | if (!(test_opt(sb, NO_UID32))) { | ||
1162 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); | ||
1163 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); | ||
1164 | /* | ||
1165 | * Fix up interoperability with old kernels. Otherwise, old inodes get | ||
1166 | * re-used with the upper 16 bits of the uid/gid intact | ||
1167 | */ | ||
1168 | if (!ei->i_dtime) { | ||
1169 | raw_inode->i_uid_high = cpu_to_le16(high_16_bits(uid)); | ||
1170 | raw_inode->i_gid_high = cpu_to_le16(high_16_bits(gid)); | ||
1171 | } else { | ||
1172 | raw_inode->i_uid_high = 0; | ||
1173 | raw_inode->i_gid_high = 0; | ||
1174 | } | ||
1175 | } else { | ||
1176 | raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(uid)); | ||
1177 | raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(gid)); | ||
1178 | raw_inode->i_uid_high = 0; | ||
1179 | raw_inode->i_gid_high = 0; | ||
1180 | } | ||
1181 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | ||
1182 | raw_inode->i_size = cpu_to_le32(inode->i_size); | ||
1183 | raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | ||
1184 | raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | ||
1185 | raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | ||
1186 | |||
1187 | raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); | ||
1188 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | ||
1189 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); | ||
1190 | raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); | ||
1191 | raw_inode->i_frag = ei->i_frag_no; | ||
1192 | raw_inode->i_fsize = ei->i_frag_size; | ||
1193 | raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); | ||
1194 | if (!S_ISREG(inode->i_mode)) | ||
1195 | raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); | ||
1196 | else { | ||
1197 | raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32); | ||
1198 | if (inode->i_size > 0x7fffffffULL) { | ||
1199 | if (!EXT2_HAS_RO_COMPAT_FEATURE(sb, | ||
1200 | EXT2_FEATURE_RO_COMPAT_LARGE_FILE) || | ||
1201 | EXT2_SB(sb)->s_es->s_rev_level == | ||
1202 | cpu_to_le32(EXT2_GOOD_OLD_REV)) { | ||
1203 | /* If this is the first large file | ||
1204 | * created, add a flag to the superblock. | ||
1205 | */ | ||
1206 | lock_kernel(); | ||
1207 | ext2_update_dynamic_rev(sb); | ||
1208 | EXT2_SET_RO_COMPAT_FEATURE(sb, | ||
1209 | EXT2_FEATURE_RO_COMPAT_LARGE_FILE); | ||
1210 | unlock_kernel(); | ||
1211 | ext2_write_super(sb); | ||
1212 | } | ||
1213 | } | ||
1214 | } | ||
1215 | |||
1216 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | ||
1217 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | ||
1218 | if (old_valid_dev(inode->i_rdev)) { | ||
1219 | raw_inode->i_block[0] = | ||
1220 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | ||
1221 | raw_inode->i_block[1] = 0; | ||
1222 | } else { | ||
1223 | raw_inode->i_block[0] = 0; | ||
1224 | raw_inode->i_block[1] = | ||
1225 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | ||
1226 | raw_inode->i_block[2] = 0; | ||
1227 | } | ||
1228 | } else for (n = 0; n < EXT2_N_BLOCKS; n++) | ||
1229 | raw_inode->i_block[n] = ei->i_data[n]; | ||
1230 | mark_buffer_dirty(bh); | ||
1231 | if (do_sync) { | ||
1232 | sync_dirty_buffer(bh); | ||
1233 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | ||
1234 | printk ("IO error syncing ext2 inode [%s:%08lx]\n", | ||
1235 | sb->s_id, (unsigned long) ino); | ||
1236 | err = -EIO; | ||
1237 | } | ||
1238 | } | ||
1239 | ei->i_state &= ~EXT2_STATE_NEW; | ||
1240 | brelse (bh); | ||
1241 | return err; | ||
1242 | } | ||
1243 | |||
1244 | int ext2_write_inode(struct inode *inode, int wait) | ||
1245 | { | ||
1246 | return ext2_update_inode(inode, wait); | ||
1247 | } | ||
1248 | |||
1249 | int ext2_sync_inode(struct inode *inode) | ||
1250 | { | ||
1251 | struct writeback_control wbc = { | ||
1252 | .sync_mode = WB_SYNC_ALL, | ||
1253 | .nr_to_write = 0, /* sys_fsync did this */ | ||
1254 | }; | ||
1255 | return sync_inode(inode, &wbc); | ||
1256 | } | ||
1257 | |||
1258 | int ext2_setattr(struct dentry *dentry, struct iattr *iattr) | ||
1259 | { | ||
1260 | struct inode *inode = dentry->d_inode; | ||
1261 | int error; | ||
1262 | |||
1263 | error = inode_change_ok(inode, iattr); | ||
1264 | if (error) | ||
1265 | return error; | ||
1266 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || | ||
1267 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { | ||
1268 | error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0; | ||
1269 | if (error) | ||
1270 | return error; | ||
1271 | } | ||
1272 | error = inode_setattr(inode, iattr); | ||
1273 | if (!error && (iattr->ia_valid & ATTR_MODE)) | ||
1274 | error = ext2_acl_chmod(inode); | ||
1275 | return error; | ||
1276 | } | ||
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c new file mode 100644 index 000000000000..709d8676b962 --- /dev/null +++ b/fs/ext2/ioctl.c | |||
@@ -0,0 +1,81 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/ioctl.c | ||
3 | * | ||
4 | * Copyright (C) 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | */ | ||
9 | |||
10 | #include "ext2.h" | ||
11 | #include <linux/time.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <asm/current.h> | ||
14 | #include <asm/uaccess.h> | ||
15 | |||
16 | |||
17 | int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | ||
18 | unsigned long arg) | ||
19 | { | ||
20 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
21 | unsigned int flags; | ||
22 | |||
23 | ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); | ||
24 | |||
25 | switch (cmd) { | ||
26 | case EXT2_IOC_GETFLAGS: | ||
27 | flags = ei->i_flags & EXT2_FL_USER_VISIBLE; | ||
28 | return put_user(flags, (int __user *) arg); | ||
29 | case EXT2_IOC_SETFLAGS: { | ||
30 | unsigned int oldflags; | ||
31 | |||
32 | if (IS_RDONLY(inode)) | ||
33 | return -EROFS; | ||
34 | |||
35 | if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) | ||
36 | return -EACCES; | ||
37 | |||
38 | if (get_user(flags, (int __user *) arg)) | ||
39 | return -EFAULT; | ||
40 | |||
41 | if (!S_ISDIR(inode->i_mode)) | ||
42 | flags &= ~EXT2_DIRSYNC_FL; | ||
43 | |||
44 | oldflags = ei->i_flags; | ||
45 | |||
46 | /* | ||
47 | * The IMMUTABLE and APPEND_ONLY flags can only be changed by | ||
48 | * the relevant capability. | ||
49 | * | ||
50 | * This test looks nicer. Thanks to Pauline Middelink | ||
51 | */ | ||
52 | if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { | ||
53 | if (!capable(CAP_LINUX_IMMUTABLE)) | ||
54 | return -EPERM; | ||
55 | } | ||
56 | |||
57 | flags = flags & EXT2_FL_USER_MODIFIABLE; | ||
58 | flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; | ||
59 | ei->i_flags = flags; | ||
60 | |||
61 | ext2_set_inode_flags(inode); | ||
62 | inode->i_ctime = CURRENT_TIME_SEC; | ||
63 | mark_inode_dirty(inode); | ||
64 | return 0; | ||
65 | } | ||
66 | case EXT2_IOC_GETVERSION: | ||
67 | return put_user(inode->i_generation, (int __user *) arg); | ||
68 | case EXT2_IOC_SETVERSION: | ||
69 | if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) | ||
70 | return -EPERM; | ||
71 | if (IS_RDONLY(inode)) | ||
72 | return -EROFS; | ||
73 | if (get_user(inode->i_generation, (int __user *) arg)) | ||
74 | return -EFAULT; | ||
75 | inode->i_ctime = CURRENT_TIME_SEC; | ||
76 | mark_inode_dirty(inode); | ||
77 | return 0; | ||
78 | default: | ||
79 | return -ENOTTY; | ||
80 | } | ||
81 | } | ||
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c new file mode 100644 index 000000000000..3176b3d3ffa8 --- /dev/null +++ b/fs/ext2/namei.c | |||
@@ -0,0 +1,418 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/namei.c | ||
3 | * | ||
4 | * Rewrite to pagecache. Almost all code had been changed, so blame me | ||
5 | * if the things go wrong. Please, send bug reports to | ||
6 | * viro@parcelfarce.linux.theplanet.co.uk | ||
7 | * | ||
8 | * Stuff here is basically a glue between the VFS and generic UNIXish | ||
9 | * filesystem that keeps everything in pagecache. All knowledge of the | ||
10 | * directory layout is in fs/ext2/dir.c - it turned out to be easily separatable | ||
11 | * and it's easier to debug that way. In principle we might want to | ||
12 | * generalize that a bit and turn it into a library. Or not. | ||
13 | * | ||
14 | * The only non-static object here is ext2_dir_inode_operations. | ||
15 | * | ||
16 | * TODO: get rid of kmap() use, add readahead. | ||
17 | * | ||
18 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
19 | * Remy Card (card@masi.ibp.fr) | ||
20 | * Laboratoire MASI - Institut Blaise Pascal | ||
21 | * Universite Pierre et Marie Curie (Paris VI) | ||
22 | * | ||
23 | * from | ||
24 | * | ||
25 | * linux/fs/minix/namei.c | ||
26 | * | ||
27 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
28 | * | ||
29 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
30 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
31 | */ | ||
32 | |||
33 | #include <linux/pagemap.h> | ||
34 | #include "ext2.h" | ||
35 | #include "xattr.h" | ||
36 | #include "acl.h" | ||
37 | |||
38 | /* | ||
39 | * Couple of helper functions - make the code slightly cleaner. | ||
40 | */ | ||
41 | |||
42 | static inline void ext2_inc_count(struct inode *inode) | ||
43 | { | ||
44 | inode->i_nlink++; | ||
45 | mark_inode_dirty(inode); | ||
46 | } | ||
47 | |||
48 | static inline void ext2_dec_count(struct inode *inode) | ||
49 | { | ||
50 | inode->i_nlink--; | ||
51 | mark_inode_dirty(inode); | ||
52 | } | ||
53 | |||
54 | static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) | ||
55 | { | ||
56 | int err = ext2_add_link(dentry, inode); | ||
57 | if (!err) { | ||
58 | d_instantiate(dentry, inode); | ||
59 | return 0; | ||
60 | } | ||
61 | ext2_dec_count(inode); | ||
62 | iput(inode); | ||
63 | return err; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * Methods themselves. | ||
68 | */ | ||
69 | |||
70 | static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) | ||
71 | { | ||
72 | struct inode * inode; | ||
73 | ino_t ino; | ||
74 | |||
75 | if (dentry->d_name.len > EXT2_NAME_LEN) | ||
76 | return ERR_PTR(-ENAMETOOLONG); | ||
77 | |||
78 | ino = ext2_inode_by_name(dir, dentry); | ||
79 | inode = NULL; | ||
80 | if (ino) { | ||
81 | inode = iget(dir->i_sb, ino); | ||
82 | if (!inode) | ||
83 | return ERR_PTR(-EACCES); | ||
84 | } | ||
85 | if (inode) | ||
86 | return d_splice_alias(inode, dentry); | ||
87 | d_add(dentry, inode); | ||
88 | return NULL; | ||
89 | } | ||
90 | |||
91 | struct dentry *ext2_get_parent(struct dentry *child) | ||
92 | { | ||
93 | unsigned long ino; | ||
94 | struct dentry *parent; | ||
95 | struct inode *inode; | ||
96 | struct dentry dotdot; | ||
97 | |||
98 | dotdot.d_name.name = ".."; | ||
99 | dotdot.d_name.len = 2; | ||
100 | |||
101 | ino = ext2_inode_by_name(child->d_inode, &dotdot); | ||
102 | if (!ino) | ||
103 | return ERR_PTR(-ENOENT); | ||
104 | inode = iget(child->d_inode->i_sb, ino); | ||
105 | |||
106 | if (!inode) | ||
107 | return ERR_PTR(-EACCES); | ||
108 | parent = d_alloc_anon(inode); | ||
109 | if (!parent) { | ||
110 | iput(inode); | ||
111 | parent = ERR_PTR(-ENOMEM); | ||
112 | } | ||
113 | return parent; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * By the time this is called, we already have created | ||
118 | * the directory cache entry for the new file, but it | ||
119 | * is so far negative - it has no inode. | ||
120 | * | ||
121 | * If the create succeeds, we fill in the inode information | ||
122 | * with d_instantiate(). | ||
123 | */ | ||
124 | static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) | ||
125 | { | ||
126 | struct inode * inode = ext2_new_inode (dir, mode); | ||
127 | int err = PTR_ERR(inode); | ||
128 | if (!IS_ERR(inode)) { | ||
129 | inode->i_op = &ext2_file_inode_operations; | ||
130 | inode->i_fop = &ext2_file_operations; | ||
131 | if (test_opt(inode->i_sb, NOBH)) | ||
132 | inode->i_mapping->a_ops = &ext2_nobh_aops; | ||
133 | else | ||
134 | inode->i_mapping->a_ops = &ext2_aops; | ||
135 | mark_inode_dirty(inode); | ||
136 | err = ext2_add_nondir(dentry, inode); | ||
137 | } | ||
138 | return err; | ||
139 | } | ||
140 | |||
141 | static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) | ||
142 | { | ||
143 | struct inode * inode; | ||
144 | int err; | ||
145 | |||
146 | if (!new_valid_dev(rdev)) | ||
147 | return -EINVAL; | ||
148 | |||
149 | inode = ext2_new_inode (dir, mode); | ||
150 | err = PTR_ERR(inode); | ||
151 | if (!IS_ERR(inode)) { | ||
152 | init_special_inode(inode, inode->i_mode, rdev); | ||
153 | #ifdef CONFIG_EXT2_FS_XATTR | ||
154 | inode->i_op = &ext2_special_inode_operations; | ||
155 | #endif | ||
156 | mark_inode_dirty(inode); | ||
157 | err = ext2_add_nondir(dentry, inode); | ||
158 | } | ||
159 | return err; | ||
160 | } | ||
161 | |||
162 | static int ext2_symlink (struct inode * dir, struct dentry * dentry, | ||
163 | const char * symname) | ||
164 | { | ||
165 | struct super_block * sb = dir->i_sb; | ||
166 | int err = -ENAMETOOLONG; | ||
167 | unsigned l = strlen(symname)+1; | ||
168 | struct inode * inode; | ||
169 | |||
170 | if (l > sb->s_blocksize) | ||
171 | goto out; | ||
172 | |||
173 | inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO); | ||
174 | err = PTR_ERR(inode); | ||
175 | if (IS_ERR(inode)) | ||
176 | goto out; | ||
177 | |||
178 | if (l > sizeof (EXT2_I(inode)->i_data)) { | ||
179 | /* slow symlink */ | ||
180 | inode->i_op = &ext2_symlink_inode_operations; | ||
181 | if (test_opt(inode->i_sb, NOBH)) | ||
182 | inode->i_mapping->a_ops = &ext2_nobh_aops; | ||
183 | else | ||
184 | inode->i_mapping->a_ops = &ext2_aops; | ||
185 | err = page_symlink(inode, symname, l); | ||
186 | if (err) | ||
187 | goto out_fail; | ||
188 | } else { | ||
189 | /* fast symlink */ | ||
190 | inode->i_op = &ext2_fast_symlink_inode_operations; | ||
191 | memcpy((char*)(EXT2_I(inode)->i_data),symname,l); | ||
192 | inode->i_size = l-1; | ||
193 | } | ||
194 | mark_inode_dirty(inode); | ||
195 | |||
196 | err = ext2_add_nondir(dentry, inode); | ||
197 | out: | ||
198 | return err; | ||
199 | |||
200 | out_fail: | ||
201 | ext2_dec_count(inode); | ||
202 | iput (inode); | ||
203 | goto out; | ||
204 | } | ||
205 | |||
206 | static int ext2_link (struct dentry * old_dentry, struct inode * dir, | ||
207 | struct dentry *dentry) | ||
208 | { | ||
209 | struct inode *inode = old_dentry->d_inode; | ||
210 | |||
211 | if (inode->i_nlink >= EXT2_LINK_MAX) | ||
212 | return -EMLINK; | ||
213 | |||
214 | inode->i_ctime = CURRENT_TIME_SEC; | ||
215 | ext2_inc_count(inode); | ||
216 | atomic_inc(&inode->i_count); | ||
217 | |||
218 | return ext2_add_nondir(dentry, inode); | ||
219 | } | ||
220 | |||
221 | static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) | ||
222 | { | ||
223 | struct inode * inode; | ||
224 | int err = -EMLINK; | ||
225 | |||
226 | if (dir->i_nlink >= EXT2_LINK_MAX) | ||
227 | goto out; | ||
228 | |||
229 | ext2_inc_count(dir); | ||
230 | |||
231 | inode = ext2_new_inode (dir, S_IFDIR | mode); | ||
232 | err = PTR_ERR(inode); | ||
233 | if (IS_ERR(inode)) | ||
234 | goto out_dir; | ||
235 | |||
236 | inode->i_op = &ext2_dir_inode_operations; | ||
237 | inode->i_fop = &ext2_dir_operations; | ||
238 | if (test_opt(inode->i_sb, NOBH)) | ||
239 | inode->i_mapping->a_ops = &ext2_nobh_aops; | ||
240 | else | ||
241 | inode->i_mapping->a_ops = &ext2_aops; | ||
242 | |||
243 | ext2_inc_count(inode); | ||
244 | |||
245 | err = ext2_make_empty(inode, dir); | ||
246 | if (err) | ||
247 | goto out_fail; | ||
248 | |||
249 | err = ext2_add_link(dentry, inode); | ||
250 | if (err) | ||
251 | goto out_fail; | ||
252 | |||
253 | d_instantiate(dentry, inode); | ||
254 | out: | ||
255 | return err; | ||
256 | |||
257 | out_fail: | ||
258 | ext2_dec_count(inode); | ||
259 | ext2_dec_count(inode); | ||
260 | iput(inode); | ||
261 | out_dir: | ||
262 | ext2_dec_count(dir); | ||
263 | goto out; | ||
264 | } | ||
265 | |||
266 | static int ext2_unlink(struct inode * dir, struct dentry *dentry) | ||
267 | { | ||
268 | struct inode * inode = dentry->d_inode; | ||
269 | struct ext2_dir_entry_2 * de; | ||
270 | struct page * page; | ||
271 | int err = -ENOENT; | ||
272 | |||
273 | de = ext2_find_entry (dir, dentry, &page); | ||
274 | if (!de) | ||
275 | goto out; | ||
276 | |||
277 | err = ext2_delete_entry (de, page); | ||
278 | if (err) | ||
279 | goto out; | ||
280 | |||
281 | inode->i_ctime = dir->i_ctime; | ||
282 | ext2_dec_count(inode); | ||
283 | err = 0; | ||
284 | out: | ||
285 | return err; | ||
286 | } | ||
287 | |||
288 | static int ext2_rmdir (struct inode * dir, struct dentry *dentry) | ||
289 | { | ||
290 | struct inode * inode = dentry->d_inode; | ||
291 | int err = -ENOTEMPTY; | ||
292 | |||
293 | if (ext2_empty_dir(inode)) { | ||
294 | err = ext2_unlink(dir, dentry); | ||
295 | if (!err) { | ||
296 | inode->i_size = 0; | ||
297 | ext2_dec_count(inode); | ||
298 | ext2_dec_count(dir); | ||
299 | } | ||
300 | } | ||
301 | return err; | ||
302 | } | ||
303 | |||
304 | static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, | ||
305 | struct inode * new_dir, struct dentry * new_dentry ) | ||
306 | { | ||
307 | struct inode * old_inode = old_dentry->d_inode; | ||
308 | struct inode * new_inode = new_dentry->d_inode; | ||
309 | struct page * dir_page = NULL; | ||
310 | struct ext2_dir_entry_2 * dir_de = NULL; | ||
311 | struct page * old_page; | ||
312 | struct ext2_dir_entry_2 * old_de; | ||
313 | int err = -ENOENT; | ||
314 | |||
315 | old_de = ext2_find_entry (old_dir, old_dentry, &old_page); | ||
316 | if (!old_de) | ||
317 | goto out; | ||
318 | |||
319 | if (S_ISDIR(old_inode->i_mode)) { | ||
320 | err = -EIO; | ||
321 | dir_de = ext2_dotdot(old_inode, &dir_page); | ||
322 | if (!dir_de) | ||
323 | goto out_old; | ||
324 | } | ||
325 | |||
326 | if (new_inode) { | ||
327 | struct page *new_page; | ||
328 | struct ext2_dir_entry_2 *new_de; | ||
329 | |||
330 | err = -ENOTEMPTY; | ||
331 | if (dir_de && !ext2_empty_dir (new_inode)) | ||
332 | goto out_dir; | ||
333 | |||
334 | err = -ENOENT; | ||
335 | new_de = ext2_find_entry (new_dir, new_dentry, &new_page); | ||
336 | if (!new_de) | ||
337 | goto out_dir; | ||
338 | ext2_inc_count(old_inode); | ||
339 | ext2_set_link(new_dir, new_de, new_page, old_inode); | ||
340 | new_inode->i_ctime = CURRENT_TIME_SEC; | ||
341 | if (dir_de) | ||
342 | new_inode->i_nlink--; | ||
343 | ext2_dec_count(new_inode); | ||
344 | } else { | ||
345 | if (dir_de) { | ||
346 | err = -EMLINK; | ||
347 | if (new_dir->i_nlink >= EXT2_LINK_MAX) | ||
348 | goto out_dir; | ||
349 | } | ||
350 | ext2_inc_count(old_inode); | ||
351 | err = ext2_add_link(new_dentry, old_inode); | ||
352 | if (err) { | ||
353 | ext2_dec_count(old_inode); | ||
354 | goto out_dir; | ||
355 | } | ||
356 | if (dir_de) | ||
357 | ext2_inc_count(new_dir); | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * Like most other Unix systems, set the ctime for inodes on a | ||
362 | * rename. | ||
363 | * ext2_dec_count() will mark the inode dirty. | ||
364 | */ | ||
365 | old_inode->i_ctime = CURRENT_TIME_SEC; | ||
366 | |||
367 | ext2_delete_entry (old_de, old_page); | ||
368 | ext2_dec_count(old_inode); | ||
369 | |||
370 | if (dir_de) { | ||
371 | ext2_set_link(old_inode, dir_de, dir_page, new_dir); | ||
372 | ext2_dec_count(old_dir); | ||
373 | } | ||
374 | return 0; | ||
375 | |||
376 | |||
377 | out_dir: | ||
378 | if (dir_de) { | ||
379 | kunmap(dir_page); | ||
380 | page_cache_release(dir_page); | ||
381 | } | ||
382 | out_old: | ||
383 | kunmap(old_page); | ||
384 | page_cache_release(old_page); | ||
385 | out: | ||
386 | return err; | ||
387 | } | ||
388 | |||
389 | struct inode_operations ext2_dir_inode_operations = { | ||
390 | .create = ext2_create, | ||
391 | .lookup = ext2_lookup, | ||
392 | .link = ext2_link, | ||
393 | .unlink = ext2_unlink, | ||
394 | .symlink = ext2_symlink, | ||
395 | .mkdir = ext2_mkdir, | ||
396 | .rmdir = ext2_rmdir, | ||
397 | .mknod = ext2_mknod, | ||
398 | .rename = ext2_rename, | ||
399 | #ifdef CONFIG_EXT2_FS_XATTR | ||
400 | .setxattr = generic_setxattr, | ||
401 | .getxattr = generic_getxattr, | ||
402 | .listxattr = ext2_listxattr, | ||
403 | .removexattr = generic_removexattr, | ||
404 | #endif | ||
405 | .setattr = ext2_setattr, | ||
406 | .permission = ext2_permission, | ||
407 | }; | ||
408 | |||
409 | struct inode_operations ext2_special_inode_operations = { | ||
410 | #ifdef CONFIG_EXT2_FS_XATTR | ||
411 | .setxattr = generic_setxattr, | ||
412 | .getxattr = generic_getxattr, | ||
413 | .listxattr = ext2_listxattr, | ||
414 | .removexattr = generic_removexattr, | ||
415 | #endif | ||
416 | .setattr = ext2_setattr, | ||
417 | .permission = ext2_permission, | ||
418 | }; | ||
diff --git a/fs/ext2/super.c b/fs/ext2/super.c new file mode 100644 index 000000000000..37ca77a157ba --- /dev/null +++ b/fs/ext2/super.c | |||
@@ -0,0 +1,1161 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/super.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/fs/minix/inode.c | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | * | ||
15 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
16 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
17 | */ | ||
18 | |||
19 | #include <linux/config.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/blkdev.h> | ||
25 | #include <linux/parser.h> | ||
26 | #include <linux/random.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/smp_lock.h> | ||
29 | #include <linux/vfs.h> | ||
30 | #include <asm/uaccess.h> | ||
31 | #include "ext2.h" | ||
32 | #include "xattr.h" | ||
33 | #include "acl.h" | ||
34 | |||
35 | static void ext2_sync_super(struct super_block *sb, | ||
36 | struct ext2_super_block *es); | ||
37 | static int ext2_remount (struct super_block * sb, int * flags, char * data); | ||
38 | static int ext2_statfs (struct super_block * sb, struct kstatfs * buf); | ||
39 | |||
40 | void ext2_error (struct super_block * sb, const char * function, | ||
41 | const char * fmt, ...) | ||
42 | { | ||
43 | va_list args; | ||
44 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
45 | struct ext2_super_block *es = sbi->s_es; | ||
46 | |||
47 | if (!(sb->s_flags & MS_RDONLY)) { | ||
48 | sbi->s_mount_state |= EXT2_ERROR_FS; | ||
49 | es->s_state = | ||
50 | cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS); | ||
51 | ext2_sync_super(sb, es); | ||
52 | } | ||
53 | |||
54 | va_start(args, fmt); | ||
55 | printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function); | ||
56 | vprintk(fmt, args); | ||
57 | printk("\n"); | ||
58 | va_end(args); | ||
59 | |||
60 | if (test_opt(sb, ERRORS_PANIC)) | ||
61 | panic("EXT2-fs panic from previous error\n"); | ||
62 | if (test_opt(sb, ERRORS_RO)) { | ||
63 | printk("Remounting filesystem read-only\n"); | ||
64 | sb->s_flags |= MS_RDONLY; | ||
65 | } | ||
66 | } | ||
67 | |||
68 | void ext2_warning (struct super_block * sb, const char * function, | ||
69 | const char * fmt, ...) | ||
70 | { | ||
71 | va_list args; | ||
72 | |||
73 | va_start(args, fmt); | ||
74 | printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ", | ||
75 | sb->s_id, function); | ||
76 | vprintk(fmt, args); | ||
77 | printk("\n"); | ||
78 | va_end(args); | ||
79 | } | ||
80 | |||
81 | void ext2_update_dynamic_rev(struct super_block *sb) | ||
82 | { | ||
83 | struct ext2_super_block *es = EXT2_SB(sb)->s_es; | ||
84 | |||
85 | if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV) | ||
86 | return; | ||
87 | |||
88 | ext2_warning(sb, __FUNCTION__, | ||
89 | "updating to rev %d because of new feature flag, " | ||
90 | "running e2fsck is recommended", | ||
91 | EXT2_DYNAMIC_REV); | ||
92 | |||
93 | es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO); | ||
94 | es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE); | ||
95 | es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV); | ||
96 | /* leave es->s_feature_*compat flags alone */ | ||
97 | /* es->s_uuid will be set by e2fsck if empty */ | ||
98 | |||
99 | /* | ||
100 | * The rest of the superblock fields should be zero, and if not it | ||
101 | * means they are likely already in use, so leave them alone. We | ||
102 | * can leave it up to e2fsck to clean up any inconsistencies there. | ||
103 | */ | ||
104 | } | ||
105 | |||
106 | static void ext2_put_super (struct super_block * sb) | ||
107 | { | ||
108 | int db_count; | ||
109 | int i; | ||
110 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
111 | |||
112 | ext2_xattr_put_super(sb); | ||
113 | if (!(sb->s_flags & MS_RDONLY)) { | ||
114 | struct ext2_super_block *es = sbi->s_es; | ||
115 | |||
116 | es->s_state = cpu_to_le16(sbi->s_mount_state); | ||
117 | ext2_sync_super(sb, es); | ||
118 | } | ||
119 | db_count = sbi->s_gdb_count; | ||
120 | for (i = 0; i < db_count; i++) | ||
121 | if (sbi->s_group_desc[i]) | ||
122 | brelse (sbi->s_group_desc[i]); | ||
123 | kfree(sbi->s_group_desc); | ||
124 | kfree(sbi->s_debts); | ||
125 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
126 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
127 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
128 | brelse (sbi->s_sbh); | ||
129 | sb->s_fs_info = NULL; | ||
130 | kfree(sbi); | ||
131 | |||
132 | return; | ||
133 | } | ||
134 | |||
135 | static kmem_cache_t * ext2_inode_cachep; | ||
136 | |||
137 | static struct inode *ext2_alloc_inode(struct super_block *sb) | ||
138 | { | ||
139 | struct ext2_inode_info *ei; | ||
140 | ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL); | ||
141 | if (!ei) | ||
142 | return NULL; | ||
143 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | ||
144 | ei->i_acl = EXT2_ACL_NOT_CACHED; | ||
145 | ei->i_default_acl = EXT2_ACL_NOT_CACHED; | ||
146 | #endif | ||
147 | ei->vfs_inode.i_version = 1; | ||
148 | return &ei->vfs_inode; | ||
149 | } | ||
150 | |||
151 | static void ext2_destroy_inode(struct inode *inode) | ||
152 | { | ||
153 | kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); | ||
154 | } | ||
155 | |||
156 | static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) | ||
157 | { | ||
158 | struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; | ||
159 | |||
160 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
161 | SLAB_CTOR_CONSTRUCTOR) { | ||
162 | rwlock_init(&ei->i_meta_lock); | ||
163 | #ifdef CONFIG_EXT2_FS_XATTR | ||
164 | init_rwsem(&ei->xattr_sem); | ||
165 | #endif | ||
166 | inode_init_once(&ei->vfs_inode); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static int init_inodecache(void) | ||
171 | { | ||
172 | ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", | ||
173 | sizeof(struct ext2_inode_info), | ||
174 | 0, SLAB_RECLAIM_ACCOUNT, | ||
175 | init_once, NULL); | ||
176 | if (ext2_inode_cachep == NULL) | ||
177 | return -ENOMEM; | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static void destroy_inodecache(void) | ||
182 | { | ||
183 | if (kmem_cache_destroy(ext2_inode_cachep)) | ||
184 | printk(KERN_INFO "ext2_inode_cache: not all structures were freed\n"); | ||
185 | } | ||
186 | |||
187 | static void ext2_clear_inode(struct inode *inode) | ||
188 | { | ||
189 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | ||
190 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
191 | |||
192 | if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) { | ||
193 | posix_acl_release(ei->i_acl); | ||
194 | ei->i_acl = EXT2_ACL_NOT_CACHED; | ||
195 | } | ||
196 | if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) { | ||
197 | posix_acl_release(ei->i_default_acl); | ||
198 | ei->i_default_acl = EXT2_ACL_NOT_CACHED; | ||
199 | } | ||
200 | #endif | ||
201 | if (!is_bad_inode(inode)) | ||
202 | ext2_discard_prealloc(inode); | ||
203 | } | ||
204 | |||
205 | |||
206 | #ifdef CONFIG_QUOTA | ||
207 | static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); | ||
208 | static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); | ||
209 | #endif | ||
210 | |||
211 | static struct super_operations ext2_sops = { | ||
212 | .alloc_inode = ext2_alloc_inode, | ||
213 | .destroy_inode = ext2_destroy_inode, | ||
214 | .read_inode = ext2_read_inode, | ||
215 | .write_inode = ext2_write_inode, | ||
216 | .delete_inode = ext2_delete_inode, | ||
217 | .put_super = ext2_put_super, | ||
218 | .write_super = ext2_write_super, | ||
219 | .statfs = ext2_statfs, | ||
220 | .remount_fs = ext2_remount, | ||
221 | .clear_inode = ext2_clear_inode, | ||
222 | #ifdef CONFIG_QUOTA | ||
223 | .quota_read = ext2_quota_read, | ||
224 | .quota_write = ext2_quota_write, | ||
225 | #endif | ||
226 | }; | ||
227 | |||
228 | /* Yes, most of these are left as NULL!! | ||
229 | * A NULL value implies the default, which works with ext2-like file | ||
230 | * systems, but can be improved upon. | ||
231 | * Currently only get_parent is required. | ||
232 | */ | ||
233 | struct dentry *ext2_get_parent(struct dentry *child); | ||
234 | static struct export_operations ext2_export_ops = { | ||
235 | .get_parent = ext2_get_parent, | ||
236 | }; | ||
237 | |||
238 | static unsigned long get_sb_block(void **data) | ||
239 | { | ||
240 | unsigned long sb_block; | ||
241 | char *options = (char *) *data; | ||
242 | |||
243 | if (!options || strncmp(options, "sb=", 3) != 0) | ||
244 | return 1; /* Default location */ | ||
245 | options += 3; | ||
246 | sb_block = simple_strtoul(options, &options, 0); | ||
247 | if (*options && *options != ',') { | ||
248 | printk("EXT2-fs: Invalid sb specification: %s\n", | ||
249 | (char *) *data); | ||
250 | return 1; | ||
251 | } | ||
252 | if (*options == ',') | ||
253 | options++; | ||
254 | *data = (void *) options; | ||
255 | return sb_block; | ||
256 | } | ||
257 | |||
258 | enum { | ||
259 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | ||
260 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | ||
261 | Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, | ||
262 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | ||
263 | Opt_ignore, Opt_err, | ||
264 | }; | ||
265 | |||
266 | static match_table_t tokens = { | ||
267 | {Opt_bsd_df, "bsddf"}, | ||
268 | {Opt_minix_df, "minixdf"}, | ||
269 | {Opt_grpid, "grpid"}, | ||
270 | {Opt_grpid, "bsdgroups"}, | ||
271 | {Opt_nogrpid, "nogrpid"}, | ||
272 | {Opt_nogrpid, "sysvgroups"}, | ||
273 | {Opt_resgid, "resgid=%u"}, | ||
274 | {Opt_resuid, "resuid=%u"}, | ||
275 | {Opt_sb, "sb=%u"}, | ||
276 | {Opt_err_cont, "errors=continue"}, | ||
277 | {Opt_err_panic, "errors=panic"}, | ||
278 | {Opt_err_ro, "errors=remount-ro"}, | ||
279 | {Opt_nouid32, "nouid32"}, | ||
280 | {Opt_nocheck, "check=none"}, | ||
281 | {Opt_nocheck, "nocheck"}, | ||
282 | {Opt_check, "check"}, | ||
283 | {Opt_debug, "debug"}, | ||
284 | {Opt_oldalloc, "oldalloc"}, | ||
285 | {Opt_orlov, "orlov"}, | ||
286 | {Opt_nobh, "nobh"}, | ||
287 | {Opt_user_xattr, "user_xattr"}, | ||
288 | {Opt_nouser_xattr, "nouser_xattr"}, | ||
289 | {Opt_acl, "acl"}, | ||
290 | {Opt_noacl, "noacl"}, | ||
291 | {Opt_ignore, "grpquota"}, | ||
292 | {Opt_ignore, "noquota"}, | ||
293 | {Opt_ignore, "quota"}, | ||
294 | {Opt_ignore, "usrquota"}, | ||
295 | {Opt_err, NULL} | ||
296 | }; | ||
297 | |||
298 | static int parse_options (char * options, | ||
299 | struct ext2_sb_info *sbi) | ||
300 | { | ||
301 | char * p; | ||
302 | substring_t args[MAX_OPT_ARGS]; | ||
303 | unsigned long kind = EXT2_MOUNT_ERRORS_CONT; | ||
304 | int option; | ||
305 | |||
306 | if (!options) | ||
307 | return 1; | ||
308 | |||
309 | while ((p = strsep (&options, ",")) != NULL) { | ||
310 | int token; | ||
311 | if (!*p) | ||
312 | continue; | ||
313 | |||
314 | token = match_token(p, tokens, args); | ||
315 | switch (token) { | ||
316 | case Opt_bsd_df: | ||
317 | clear_opt (sbi->s_mount_opt, MINIX_DF); | ||
318 | break; | ||
319 | case Opt_minix_df: | ||
320 | set_opt (sbi->s_mount_opt, MINIX_DF); | ||
321 | break; | ||
322 | case Opt_grpid: | ||
323 | set_opt (sbi->s_mount_opt, GRPID); | ||
324 | break; | ||
325 | case Opt_nogrpid: | ||
326 | clear_opt (sbi->s_mount_opt, GRPID); | ||
327 | break; | ||
328 | case Opt_resuid: | ||
329 | if (match_int(&args[0], &option)) | ||
330 | return 0; | ||
331 | sbi->s_resuid = option; | ||
332 | break; | ||
333 | case Opt_resgid: | ||
334 | if (match_int(&args[0], &option)) | ||
335 | return 0; | ||
336 | sbi->s_resgid = option; | ||
337 | break; | ||
338 | case Opt_sb: | ||
339 | /* handled by get_sb_block() instead of here */ | ||
340 | /* *sb_block = match_int(&args[0]); */ | ||
341 | break; | ||
342 | case Opt_err_panic: | ||
343 | kind = EXT2_MOUNT_ERRORS_PANIC; | ||
344 | break; | ||
345 | case Opt_err_ro: | ||
346 | kind = EXT2_MOUNT_ERRORS_RO; | ||
347 | break; | ||
348 | case Opt_err_cont: | ||
349 | kind = EXT2_MOUNT_ERRORS_CONT; | ||
350 | break; | ||
351 | case Opt_nouid32: | ||
352 | set_opt (sbi->s_mount_opt, NO_UID32); | ||
353 | break; | ||
354 | case Opt_check: | ||
355 | #ifdef CONFIG_EXT2_CHECK | ||
356 | set_opt (sbi->s_mount_opt, CHECK); | ||
357 | #else | ||
358 | printk("EXT2 Check option not supported\n"); | ||
359 | #endif | ||
360 | break; | ||
361 | case Opt_nocheck: | ||
362 | clear_opt (sbi->s_mount_opt, CHECK); | ||
363 | break; | ||
364 | case Opt_debug: | ||
365 | set_opt (sbi->s_mount_opt, DEBUG); | ||
366 | break; | ||
367 | case Opt_oldalloc: | ||
368 | set_opt (sbi->s_mount_opt, OLDALLOC); | ||
369 | break; | ||
370 | case Opt_orlov: | ||
371 | clear_opt (sbi->s_mount_opt, OLDALLOC); | ||
372 | break; | ||
373 | case Opt_nobh: | ||
374 | set_opt (sbi->s_mount_opt, NOBH); | ||
375 | break; | ||
376 | #ifdef CONFIG_EXT2_FS_XATTR | ||
377 | case Opt_user_xattr: | ||
378 | set_opt (sbi->s_mount_opt, XATTR_USER); | ||
379 | break; | ||
380 | case Opt_nouser_xattr: | ||
381 | clear_opt (sbi->s_mount_opt, XATTR_USER); | ||
382 | break; | ||
383 | #else | ||
384 | case Opt_user_xattr: | ||
385 | case Opt_nouser_xattr: | ||
386 | printk("EXT2 (no)user_xattr options not supported\n"); | ||
387 | break; | ||
388 | #endif | ||
389 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | ||
390 | case Opt_acl: | ||
391 | set_opt(sbi->s_mount_opt, POSIX_ACL); | ||
392 | break; | ||
393 | case Opt_noacl: | ||
394 | clear_opt(sbi->s_mount_opt, POSIX_ACL); | ||
395 | break; | ||
396 | #else | ||
397 | case Opt_acl: | ||
398 | case Opt_noacl: | ||
399 | printk("EXT2 (no)acl options not supported\n"); | ||
400 | break; | ||
401 | #endif | ||
402 | case Opt_ignore: | ||
403 | break; | ||
404 | default: | ||
405 | return 0; | ||
406 | } | ||
407 | } | ||
408 | sbi->s_mount_opt |= kind; | ||
409 | return 1; | ||
410 | } | ||
411 | |||
412 | static int ext2_setup_super (struct super_block * sb, | ||
413 | struct ext2_super_block * es, | ||
414 | int read_only) | ||
415 | { | ||
416 | int res = 0; | ||
417 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
418 | |||
419 | if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) { | ||
420 | printk ("EXT2-fs warning: revision level too high, " | ||
421 | "forcing read-only mode\n"); | ||
422 | res = MS_RDONLY; | ||
423 | } | ||
424 | if (read_only) | ||
425 | return res; | ||
426 | if (!(sbi->s_mount_state & EXT2_VALID_FS)) | ||
427 | printk ("EXT2-fs warning: mounting unchecked fs, " | ||
428 | "running e2fsck is recommended\n"); | ||
429 | else if ((sbi->s_mount_state & EXT2_ERROR_FS)) | ||
430 | printk ("EXT2-fs warning: mounting fs with errors, " | ||
431 | "running e2fsck is recommended\n"); | ||
432 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && | ||
433 | le16_to_cpu(es->s_mnt_count) >= | ||
434 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) | ||
435 | printk ("EXT2-fs warning: maximal mount count reached, " | ||
436 | "running e2fsck is recommended\n"); | ||
437 | else if (le32_to_cpu(es->s_checkinterval) && | ||
438 | (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) | ||
439 | printk ("EXT2-fs warning: checktime reached, " | ||
440 | "running e2fsck is recommended\n"); | ||
441 | if (!le16_to_cpu(es->s_max_mnt_count)) | ||
442 | es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); | ||
443 | es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); | ||
444 | ext2_write_super(sb); | ||
445 | if (test_opt (sb, DEBUG)) | ||
446 | printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, " | ||
447 | "bpg=%lu, ipg=%lu, mo=%04lx]\n", | ||
448 | EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, | ||
449 | sbi->s_frag_size, | ||
450 | sbi->s_groups_count, | ||
451 | EXT2_BLOCKS_PER_GROUP(sb), | ||
452 | EXT2_INODES_PER_GROUP(sb), | ||
453 | sbi->s_mount_opt); | ||
454 | #ifdef CONFIG_EXT2_CHECK | ||
455 | if (test_opt (sb, CHECK)) { | ||
456 | ext2_check_blocks_bitmap (sb); | ||
457 | ext2_check_inodes_bitmap (sb); | ||
458 | } | ||
459 | #endif | ||
460 | return res; | ||
461 | } | ||
462 | |||
463 | static int ext2_check_descriptors (struct super_block * sb) | ||
464 | { | ||
465 | int i; | ||
466 | int desc_block = 0; | ||
467 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
468 | unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block); | ||
469 | struct ext2_group_desc * gdp = NULL; | ||
470 | |||
471 | ext2_debug ("Checking group descriptors"); | ||
472 | |||
473 | for (i = 0; i < sbi->s_groups_count; i++) | ||
474 | { | ||
475 | if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) | ||
476 | gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data; | ||
477 | if (le32_to_cpu(gdp->bg_block_bitmap) < block || | ||
478 | le32_to_cpu(gdp->bg_block_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb)) | ||
479 | { | ||
480 | ext2_error (sb, "ext2_check_descriptors", | ||
481 | "Block bitmap for group %d" | ||
482 | " not in group (block %lu)!", | ||
483 | i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap)); | ||
484 | return 0; | ||
485 | } | ||
486 | if (le32_to_cpu(gdp->bg_inode_bitmap) < block || | ||
487 | le32_to_cpu(gdp->bg_inode_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb)) | ||
488 | { | ||
489 | ext2_error (sb, "ext2_check_descriptors", | ||
490 | "Inode bitmap for group %d" | ||
491 | " not in group (block %lu)!", | ||
492 | i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap)); | ||
493 | return 0; | ||
494 | } | ||
495 | if (le32_to_cpu(gdp->bg_inode_table) < block || | ||
496 | le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >= | ||
497 | block + EXT2_BLOCKS_PER_GROUP(sb)) | ||
498 | { | ||
499 | ext2_error (sb, "ext2_check_descriptors", | ||
500 | "Inode table for group %d" | ||
501 | " not in group (block %lu)!", | ||
502 | i, (unsigned long) le32_to_cpu(gdp->bg_inode_table)); | ||
503 | return 0; | ||
504 | } | ||
505 | block += EXT2_BLOCKS_PER_GROUP(sb); | ||
506 | gdp++; | ||
507 | } | ||
508 | return 1; | ||
509 | } | ||
510 | |||
511 | #define log2(n) ffz(~(n)) | ||
512 | |||
513 | /* | ||
514 | * Maximal file size. There is a direct, and {,double-,triple-}indirect | ||
515 | * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. | ||
516 | * We need to be 1 filesystem block less than the 2^32 sector limit. | ||
517 | */ | ||
518 | static loff_t ext2_max_size(int bits) | ||
519 | { | ||
520 | loff_t res = EXT2_NDIR_BLOCKS; | ||
521 | /* This constant is calculated to be the largest file size for a | ||
522 | * dense, 4k-blocksize file such that the total number of | ||
523 | * sectors in the file, including data and all indirect blocks, | ||
524 | * does not exceed 2^32. */ | ||
525 | const loff_t upper_limit = 0x1ff7fffd000LL; | ||
526 | |||
527 | res += 1LL << (bits-2); | ||
528 | res += 1LL << (2*(bits-2)); | ||
529 | res += 1LL << (3*(bits-2)); | ||
530 | res <<= bits; | ||
531 | if (res > upper_limit) | ||
532 | res = upper_limit; | ||
533 | return res; | ||
534 | } | ||
535 | |||
536 | static unsigned long descriptor_loc(struct super_block *sb, | ||
537 | unsigned long logic_sb_block, | ||
538 | int nr) | ||
539 | { | ||
540 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
541 | unsigned long bg, first_data_block, first_meta_bg; | ||
542 | int has_super = 0; | ||
543 | |||
544 | first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block); | ||
545 | first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); | ||
546 | |||
547 | if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) || | ||
548 | nr < first_meta_bg) | ||
549 | return (logic_sb_block + nr + 1); | ||
550 | bg = sbi->s_desc_per_block * nr; | ||
551 | if (ext2_bg_has_super(sb, bg)) | ||
552 | has_super = 1; | ||
553 | return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); | ||
554 | } | ||
555 | |||
556 | static int ext2_fill_super(struct super_block *sb, void *data, int silent) | ||
557 | { | ||
558 | struct buffer_head * bh; | ||
559 | struct ext2_sb_info * sbi; | ||
560 | struct ext2_super_block * es; | ||
561 | struct inode *root; | ||
562 | unsigned long block; | ||
563 | unsigned long sb_block = get_sb_block(&data); | ||
564 | unsigned long logic_sb_block; | ||
565 | unsigned long offset = 0; | ||
566 | unsigned long def_mount_opts; | ||
567 | int blocksize = BLOCK_SIZE; | ||
568 | int db_count; | ||
569 | int i, j; | ||
570 | __le32 features; | ||
571 | |||
572 | sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); | ||
573 | if (!sbi) | ||
574 | return -ENOMEM; | ||
575 | sb->s_fs_info = sbi; | ||
576 | memset(sbi, 0, sizeof(*sbi)); | ||
577 | |||
578 | /* | ||
579 | * See what the current blocksize for the device is, and | ||
580 | * use that as the blocksize. Otherwise (or if the blocksize | ||
581 | * is smaller than the default) use the default. | ||
582 | * This is important for devices that have a hardware | ||
583 | * sectorsize that is larger than the default. | ||
584 | */ | ||
585 | blocksize = sb_min_blocksize(sb, BLOCK_SIZE); | ||
586 | if (!blocksize) { | ||
587 | printk ("EXT2-fs: unable to set blocksize\n"); | ||
588 | goto failed_sbi; | ||
589 | } | ||
590 | |||
591 | /* | ||
592 | * If the superblock doesn't start on a hardware sector boundary, | ||
593 | * calculate the offset. | ||
594 | */ | ||
595 | if (blocksize != BLOCK_SIZE) { | ||
596 | logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize; | ||
597 | offset = (sb_block*BLOCK_SIZE) % blocksize; | ||
598 | } else { | ||
599 | logic_sb_block = sb_block; | ||
600 | } | ||
601 | |||
602 | if (!(bh = sb_bread(sb, logic_sb_block))) { | ||
603 | printk ("EXT2-fs: unable to read superblock\n"); | ||
604 | goto failed_sbi; | ||
605 | } | ||
606 | /* | ||
607 | * Note: s_es must be initialized as soon as possible because | ||
608 | * some ext2 macro-instructions depend on its value | ||
609 | */ | ||
610 | es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); | ||
611 | sbi->s_es = es; | ||
612 | sb->s_magic = le16_to_cpu(es->s_magic); | ||
613 | |||
614 | if (sb->s_magic != EXT2_SUPER_MAGIC) | ||
615 | goto cantfind_ext2; | ||
616 | |||
617 | /* Set defaults before we parse the mount options */ | ||
618 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | ||
619 | if (def_mount_opts & EXT2_DEFM_DEBUG) | ||
620 | set_opt(sbi->s_mount_opt, DEBUG); | ||
621 | if (def_mount_opts & EXT2_DEFM_BSDGROUPS) | ||
622 | set_opt(sbi->s_mount_opt, GRPID); | ||
623 | if (def_mount_opts & EXT2_DEFM_UID16) | ||
624 | set_opt(sbi->s_mount_opt, NO_UID32); | ||
625 | if (def_mount_opts & EXT2_DEFM_XATTR_USER) | ||
626 | set_opt(sbi->s_mount_opt, XATTR_USER); | ||
627 | if (def_mount_opts & EXT2_DEFM_ACL) | ||
628 | set_opt(sbi->s_mount_opt, POSIX_ACL); | ||
629 | |||
630 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC) | ||
631 | set_opt(sbi->s_mount_opt, ERRORS_PANIC); | ||
632 | else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO) | ||
633 | set_opt(sbi->s_mount_opt, ERRORS_RO); | ||
634 | |||
635 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); | ||
636 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); | ||
637 | |||
638 | if (!parse_options ((char *) data, sbi)) | ||
639 | goto failed_mount; | ||
640 | |||
641 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | ||
642 | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? | ||
643 | MS_POSIXACL : 0); | ||
644 | |||
645 | if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && | ||
646 | (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || | ||
647 | EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || | ||
648 | EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U))) | ||
649 | printk("EXT2-fs warning: feature flags set on rev 0 fs, " | ||
650 | "running e2fsck is recommended\n"); | ||
651 | /* | ||
652 | * Check feature flags regardless of the revision level, since we | ||
653 | * previously didn't change the revision level when setting the flags, | ||
654 | * so there is a chance incompat flags are set on a rev 0 filesystem. | ||
655 | */ | ||
656 | features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP); | ||
657 | if (features) { | ||
658 | printk("EXT2-fs: %s: couldn't mount because of " | ||
659 | "unsupported optional features (%x).\n", | ||
660 | sb->s_id, le32_to_cpu(features)); | ||
661 | goto failed_mount; | ||
662 | } | ||
663 | if (!(sb->s_flags & MS_RDONLY) && | ||
664 | (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ | ||
665 | printk("EXT2-fs: %s: couldn't mount RDWR because of " | ||
666 | "unsupported optional features (%x).\n", | ||
667 | sb->s_id, le32_to_cpu(features)); | ||
668 | goto failed_mount; | ||
669 | } | ||
670 | |||
671 | blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); | ||
672 | |||
673 | /* If the blocksize doesn't match, re-read the thing.. */ | ||
674 | if (sb->s_blocksize != blocksize) { | ||
675 | brelse(bh); | ||
676 | |||
677 | if (!sb_set_blocksize(sb, blocksize)) { | ||
678 | printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n"); | ||
679 | goto failed_sbi; | ||
680 | } | ||
681 | |||
682 | logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize; | ||
683 | offset = (sb_block*BLOCK_SIZE) % blocksize; | ||
684 | bh = sb_bread(sb, logic_sb_block); | ||
685 | if(!bh) { | ||
686 | printk("EXT2-fs: Couldn't read superblock on " | ||
687 | "2nd try.\n"); | ||
688 | goto failed_sbi; | ||
689 | } | ||
690 | es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); | ||
691 | sbi->s_es = es; | ||
692 | if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) { | ||
693 | printk ("EXT2-fs: Magic mismatch, very weird !\n"); | ||
694 | goto failed_mount; | ||
695 | } | ||
696 | } | ||
697 | |||
698 | sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits); | ||
699 | |||
700 | if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) { | ||
701 | sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; | ||
702 | sbi->s_first_ino = EXT2_GOOD_OLD_FIRST_INO; | ||
703 | } else { | ||
704 | sbi->s_inode_size = le16_to_cpu(es->s_inode_size); | ||
705 | sbi->s_first_ino = le32_to_cpu(es->s_first_ino); | ||
706 | if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) || | ||
707 | (sbi->s_inode_size & (sbi->s_inode_size - 1)) || | ||
708 | (sbi->s_inode_size > blocksize)) { | ||
709 | printk ("EXT2-fs: unsupported inode size: %d\n", | ||
710 | sbi->s_inode_size); | ||
711 | goto failed_mount; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | sbi->s_frag_size = EXT2_MIN_FRAG_SIZE << | ||
716 | le32_to_cpu(es->s_log_frag_size); | ||
717 | if (sbi->s_frag_size == 0) | ||
718 | goto cantfind_ext2; | ||
719 | sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size; | ||
720 | |||
721 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); | ||
722 | sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); | ||
723 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); | ||
724 | |||
725 | if (EXT2_INODE_SIZE(sb) == 0) | ||
726 | goto cantfind_ext2; | ||
727 | sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); | ||
728 | if (sbi->s_inodes_per_block == 0) | ||
729 | goto cantfind_ext2; | ||
730 | sbi->s_itb_per_group = sbi->s_inodes_per_group / | ||
731 | sbi->s_inodes_per_block; | ||
732 | sbi->s_desc_per_block = sb->s_blocksize / | ||
733 | sizeof (struct ext2_group_desc); | ||
734 | sbi->s_sbh = bh; | ||
735 | sbi->s_mount_state = le16_to_cpu(es->s_state); | ||
736 | sbi->s_addr_per_block_bits = | ||
737 | log2 (EXT2_ADDR_PER_BLOCK(sb)); | ||
738 | sbi->s_desc_per_block_bits = | ||
739 | log2 (EXT2_DESC_PER_BLOCK(sb)); | ||
740 | |||
741 | if (sb->s_magic != EXT2_SUPER_MAGIC) | ||
742 | goto cantfind_ext2; | ||
743 | |||
744 | if (sb->s_blocksize != bh->b_size) { | ||
745 | if (!silent) | ||
746 | printk ("VFS: Unsupported blocksize on dev " | ||
747 | "%s.\n", sb->s_id); | ||
748 | goto failed_mount; | ||
749 | } | ||
750 | |||
751 | if (sb->s_blocksize != sbi->s_frag_size) { | ||
752 | printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n", | ||
753 | sbi->s_frag_size, sb->s_blocksize); | ||
754 | goto failed_mount; | ||
755 | } | ||
756 | |||
757 | if (sbi->s_blocks_per_group > sb->s_blocksize * 8) { | ||
758 | printk ("EXT2-fs: #blocks per group too big: %lu\n", | ||
759 | sbi->s_blocks_per_group); | ||
760 | goto failed_mount; | ||
761 | } | ||
762 | if (sbi->s_frags_per_group > sb->s_blocksize * 8) { | ||
763 | printk ("EXT2-fs: #fragments per group too big: %lu\n", | ||
764 | sbi->s_frags_per_group); | ||
765 | goto failed_mount; | ||
766 | } | ||
767 | if (sbi->s_inodes_per_group > sb->s_blocksize * 8) { | ||
768 | printk ("EXT2-fs: #inodes per group too big: %lu\n", | ||
769 | sbi->s_inodes_per_group); | ||
770 | goto failed_mount; | ||
771 | } | ||
772 | |||
773 | if (EXT2_BLOCKS_PER_GROUP(sb) == 0) | ||
774 | goto cantfind_ext2; | ||
775 | sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - | ||
776 | le32_to_cpu(es->s_first_data_block) + | ||
777 | EXT2_BLOCKS_PER_GROUP(sb) - 1) / | ||
778 | EXT2_BLOCKS_PER_GROUP(sb); | ||
779 | db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / | ||
780 | EXT2_DESC_PER_BLOCK(sb); | ||
781 | sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); | ||
782 | if (sbi->s_group_desc == NULL) { | ||
783 | printk ("EXT2-fs: not enough memory\n"); | ||
784 | goto failed_mount; | ||
785 | } | ||
786 | percpu_counter_init(&sbi->s_freeblocks_counter); | ||
787 | percpu_counter_init(&sbi->s_freeinodes_counter); | ||
788 | percpu_counter_init(&sbi->s_dirs_counter); | ||
789 | bgl_lock_init(&sbi->s_blockgroup_lock); | ||
790 | sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), | ||
791 | GFP_KERNEL); | ||
792 | if (!sbi->s_debts) { | ||
793 | printk ("EXT2-fs: not enough memory\n"); | ||
794 | goto failed_mount_group_desc; | ||
795 | } | ||
796 | memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts)); | ||
797 | for (i = 0; i < db_count; i++) { | ||
798 | block = descriptor_loc(sb, logic_sb_block, i); | ||
799 | sbi->s_group_desc[i] = sb_bread(sb, block); | ||
800 | if (!sbi->s_group_desc[i]) { | ||
801 | for (j = 0; j < i; j++) | ||
802 | brelse (sbi->s_group_desc[j]); | ||
803 | printk ("EXT2-fs: unable to read group descriptors\n"); | ||
804 | goto failed_mount_group_desc; | ||
805 | } | ||
806 | } | ||
807 | if (!ext2_check_descriptors (sb)) { | ||
808 | printk ("EXT2-fs: group descriptors corrupted!\n"); | ||
809 | db_count = i; | ||
810 | goto failed_mount2; | ||
811 | } | ||
812 | sbi->s_gdb_count = db_count; | ||
813 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | ||
814 | spin_lock_init(&sbi->s_next_gen_lock); | ||
815 | /* | ||
816 | * set up enough so that it can read an inode | ||
817 | */ | ||
818 | sb->s_op = &ext2_sops; | ||
819 | sb->s_export_op = &ext2_export_ops; | ||
820 | sb->s_xattr = ext2_xattr_handlers; | ||
821 | root = iget(sb, EXT2_ROOT_INO); | ||
822 | sb->s_root = d_alloc_root(root); | ||
823 | if (!sb->s_root) { | ||
824 | iput(root); | ||
825 | printk(KERN_ERR "EXT2-fs: get root inode failed\n"); | ||
826 | goto failed_mount2; | ||
827 | } | ||
828 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | ||
829 | dput(sb->s_root); | ||
830 | sb->s_root = NULL; | ||
831 | printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); | ||
832 | goto failed_mount2; | ||
833 | } | ||
834 | if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) | ||
835 | ext2_warning(sb, __FUNCTION__, | ||
836 | "mounting ext3 filesystem as ext2\n"); | ||
837 | ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); | ||
838 | percpu_counter_mod(&sbi->s_freeblocks_counter, | ||
839 | ext2_count_free_blocks(sb)); | ||
840 | percpu_counter_mod(&sbi->s_freeinodes_counter, | ||
841 | ext2_count_free_inodes(sb)); | ||
842 | percpu_counter_mod(&sbi->s_dirs_counter, | ||
843 | ext2_count_dirs(sb)); | ||
844 | return 0; | ||
845 | |||
846 | cantfind_ext2: | ||
847 | if (!silent) | ||
848 | printk("VFS: Can't find an ext2 filesystem on dev %s.\n", | ||
849 | sb->s_id); | ||
850 | goto failed_mount; | ||
851 | |||
852 | failed_mount2: | ||
853 | for (i = 0; i < db_count; i++) | ||
854 | brelse(sbi->s_group_desc[i]); | ||
855 | failed_mount_group_desc: | ||
856 | kfree(sbi->s_group_desc); | ||
857 | kfree(sbi->s_debts); | ||
858 | failed_mount: | ||
859 | brelse(bh); | ||
860 | failed_sbi: | ||
861 | sb->s_fs_info = NULL; | ||
862 | kfree(sbi); | ||
863 | return -EINVAL; | ||
864 | } | ||
865 | |||
866 | static void ext2_commit_super (struct super_block * sb, | ||
867 | struct ext2_super_block * es) | ||
868 | { | ||
869 | es->s_wtime = cpu_to_le32(get_seconds()); | ||
870 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); | ||
871 | sb->s_dirt = 0; | ||
872 | } | ||
873 | |||
874 | static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) | ||
875 | { | ||
876 | es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); | ||
877 | es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); | ||
878 | es->s_wtime = cpu_to_le32(get_seconds()); | ||
879 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); | ||
880 | sync_dirty_buffer(EXT2_SB(sb)->s_sbh); | ||
881 | sb->s_dirt = 0; | ||
882 | } | ||
883 | |||
884 | /* | ||
885 | * In the second extended file system, it is not necessary to | ||
886 | * write the super block since we use a mapping of the | ||
887 | * disk super block in a buffer. | ||
888 | * | ||
889 | * However, this function is still used to set the fs valid | ||
890 | * flags to 0. We need to set this flag to 0 since the fs | ||
891 | * may have been checked while mounted and e2fsck may have | ||
892 | * set s_state to EXT2_VALID_FS after some corrections. | ||
893 | */ | ||
894 | |||
895 | void ext2_write_super (struct super_block * sb) | ||
896 | { | ||
897 | struct ext2_super_block * es; | ||
898 | lock_kernel(); | ||
899 | if (!(sb->s_flags & MS_RDONLY)) { | ||
900 | es = EXT2_SB(sb)->s_es; | ||
901 | |||
902 | if (le16_to_cpu(es->s_state) & EXT2_VALID_FS) { | ||
903 | ext2_debug ("setting valid to 0\n"); | ||
904 | es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & | ||
905 | ~EXT2_VALID_FS); | ||
906 | es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); | ||
907 | es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); | ||
908 | es->s_mtime = cpu_to_le32(get_seconds()); | ||
909 | ext2_sync_super(sb, es); | ||
910 | } else | ||
911 | ext2_commit_super (sb, es); | ||
912 | } | ||
913 | sb->s_dirt = 0; | ||
914 | unlock_kernel(); | ||
915 | } | ||
916 | |||
917 | static int ext2_remount (struct super_block * sb, int * flags, char * data) | ||
918 | { | ||
919 | struct ext2_sb_info * sbi = EXT2_SB(sb); | ||
920 | struct ext2_super_block * es; | ||
921 | |||
922 | /* | ||
923 | * Allow the "check" option to be passed as a remount option. | ||
924 | */ | ||
925 | if (!parse_options (data, sbi)) | ||
926 | return -EINVAL; | ||
927 | |||
928 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | ||
929 | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); | ||
930 | |||
931 | es = sbi->s_es; | ||
932 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | ||
933 | return 0; | ||
934 | if (*flags & MS_RDONLY) { | ||
935 | if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || | ||
936 | !(sbi->s_mount_state & EXT2_VALID_FS)) | ||
937 | return 0; | ||
938 | /* | ||
939 | * OK, we are remounting a valid rw partition rdonly, so set | ||
940 | * the rdonly flag and then mark the partition as valid again. | ||
941 | */ | ||
942 | es->s_state = cpu_to_le16(sbi->s_mount_state); | ||
943 | es->s_mtime = cpu_to_le32(get_seconds()); | ||
944 | } else { | ||
945 | __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, | ||
946 | ~EXT2_FEATURE_RO_COMPAT_SUPP); | ||
947 | if (ret) { | ||
948 | printk("EXT2-fs: %s: couldn't remount RDWR because of " | ||
949 | "unsupported optional features (%x).\n", | ||
950 | sb->s_id, le32_to_cpu(ret)); | ||
951 | return -EROFS; | ||
952 | } | ||
953 | /* | ||
954 | * Mounting a RDONLY partition read-write, so reread and | ||
955 | * store the current valid flag. (It may have been changed | ||
956 | * by e2fsck since we originally mounted the partition.) | ||
957 | */ | ||
958 | sbi->s_mount_state = le16_to_cpu(es->s_state); | ||
959 | if (!ext2_setup_super (sb, es, 0)) | ||
960 | sb->s_flags &= ~MS_RDONLY; | ||
961 | } | ||
962 | ext2_sync_super(sb, es); | ||
963 | return 0; | ||
964 | } | ||
965 | |||
966 | static int ext2_statfs (struct super_block * sb, struct kstatfs * buf) | ||
967 | { | ||
968 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
969 | unsigned long overhead; | ||
970 | int i; | ||
971 | |||
972 | if (test_opt (sb, MINIX_DF)) | ||
973 | overhead = 0; | ||
974 | else { | ||
975 | /* | ||
976 | * Compute the overhead (FS structures) | ||
977 | */ | ||
978 | |||
979 | /* | ||
980 | * All of the blocks before first_data_block are | ||
981 | * overhead | ||
982 | */ | ||
983 | overhead = le32_to_cpu(sbi->s_es->s_first_data_block); | ||
984 | |||
985 | /* | ||
986 | * Add the overhead attributed to the superblock and | ||
987 | * block group descriptors. If the sparse superblocks | ||
988 | * feature is turned on, then not all groups have this. | ||
989 | */ | ||
990 | for (i = 0; i < sbi->s_groups_count; i++) | ||
991 | overhead += ext2_bg_has_super(sb, i) + | ||
992 | ext2_bg_num_gdb(sb, i); | ||
993 | |||
994 | /* | ||
995 | * Every block group has an inode bitmap, a block | ||
996 | * bitmap, and an inode table. | ||
997 | */ | ||
998 | overhead += (sbi->s_groups_count * | ||
999 | (2 + sbi->s_itb_per_group)); | ||
1000 | } | ||
1001 | |||
1002 | buf->f_type = EXT2_SUPER_MAGIC; | ||
1003 | buf->f_bsize = sb->s_blocksize; | ||
1004 | buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead; | ||
1005 | buf->f_bfree = ext2_count_free_blocks(sb); | ||
1006 | buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count); | ||
1007 | if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count)) | ||
1008 | buf->f_bavail = 0; | ||
1009 | buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); | ||
1010 | buf->f_ffree = ext2_count_free_inodes (sb); | ||
1011 | buf->f_namelen = EXT2_NAME_LEN; | ||
1012 | return 0; | ||
1013 | } | ||
1014 | |||
1015 | static struct super_block *ext2_get_sb(struct file_system_type *fs_type, | ||
1016 | int flags, const char *dev_name, void *data) | ||
1017 | { | ||
1018 | return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super); | ||
1019 | } | ||
1020 | |||
1021 | #ifdef CONFIG_QUOTA | ||
1022 | |||
1023 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | ||
1024 | * acquiring the locks... As quota files are never truncated and quota code | ||
1025 | * itself serializes the operations (and noone else should touch the files) | ||
1026 | * we don't have to be afraid of races */ | ||
1027 | static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, | ||
1028 | size_t len, loff_t off) | ||
1029 | { | ||
1030 | struct inode *inode = sb_dqopt(sb)->files[type]; | ||
1031 | sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb); | ||
1032 | int err = 0; | ||
1033 | int offset = off & (sb->s_blocksize - 1); | ||
1034 | int tocopy; | ||
1035 | size_t toread; | ||
1036 | struct buffer_head tmp_bh; | ||
1037 | struct buffer_head *bh; | ||
1038 | loff_t i_size = i_size_read(inode); | ||
1039 | |||
1040 | if (off > i_size) | ||
1041 | return 0; | ||
1042 | if (off+len > i_size) | ||
1043 | len = i_size-off; | ||
1044 | toread = len; | ||
1045 | while (toread > 0) { | ||
1046 | tocopy = sb->s_blocksize - offset < toread ? | ||
1047 | sb->s_blocksize - offset : toread; | ||
1048 | |||
1049 | tmp_bh.b_state = 0; | ||
1050 | err = ext2_get_block(inode, blk, &tmp_bh, 0); | ||
1051 | if (err) | ||
1052 | return err; | ||
1053 | if (!buffer_mapped(&tmp_bh)) /* A hole? */ | ||
1054 | memset(data, 0, tocopy); | ||
1055 | else { | ||
1056 | bh = sb_bread(sb, tmp_bh.b_blocknr); | ||
1057 | if (!bh) | ||
1058 | return -EIO; | ||
1059 | memcpy(data, bh->b_data+offset, tocopy); | ||
1060 | brelse(bh); | ||
1061 | } | ||
1062 | offset = 0; | ||
1063 | toread -= tocopy; | ||
1064 | data += tocopy; | ||
1065 | blk++; | ||
1066 | } | ||
1067 | return len; | ||
1068 | } | ||
1069 | |||
1070 | /* Write to quotafile */ | ||
1071 | static ssize_t ext2_quota_write(struct super_block *sb, int type, | ||
1072 | const char *data, size_t len, loff_t off) | ||
1073 | { | ||
1074 | struct inode *inode = sb_dqopt(sb)->files[type]; | ||
1075 | sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb); | ||
1076 | int err = 0; | ||
1077 | int offset = off & (sb->s_blocksize - 1); | ||
1078 | int tocopy; | ||
1079 | size_t towrite = len; | ||
1080 | struct buffer_head tmp_bh; | ||
1081 | struct buffer_head *bh; | ||
1082 | |||
1083 | down(&inode->i_sem); | ||
1084 | while (towrite > 0) { | ||
1085 | tocopy = sb->s_blocksize - offset < towrite ? | ||
1086 | sb->s_blocksize - offset : towrite; | ||
1087 | |||
1088 | tmp_bh.b_state = 0; | ||
1089 | err = ext2_get_block(inode, blk, &tmp_bh, 1); | ||
1090 | if (err) | ||
1091 | goto out; | ||
1092 | if (offset || tocopy != EXT2_BLOCK_SIZE(sb)) | ||
1093 | bh = sb_bread(sb, tmp_bh.b_blocknr); | ||
1094 | else | ||
1095 | bh = sb_getblk(sb, tmp_bh.b_blocknr); | ||
1096 | if (!bh) { | ||
1097 | err = -EIO; | ||
1098 | goto out; | ||
1099 | } | ||
1100 | lock_buffer(bh); | ||
1101 | memcpy(bh->b_data+offset, data, tocopy); | ||
1102 | flush_dcache_page(bh->b_page); | ||
1103 | set_buffer_uptodate(bh); | ||
1104 | mark_buffer_dirty(bh); | ||
1105 | unlock_buffer(bh); | ||
1106 | brelse(bh); | ||
1107 | offset = 0; | ||
1108 | towrite -= tocopy; | ||
1109 | data += tocopy; | ||
1110 | blk++; | ||
1111 | } | ||
1112 | out: | ||
1113 | if (len == towrite) | ||
1114 | return err; | ||
1115 | if (inode->i_size < off+len-towrite) | ||
1116 | i_size_write(inode, off+len-towrite); | ||
1117 | inode->i_version++; | ||
1118 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
1119 | mark_inode_dirty(inode); | ||
1120 | up(&inode->i_sem); | ||
1121 | return len - towrite; | ||
1122 | } | ||
1123 | |||
1124 | #endif | ||
1125 | |||
1126 | static struct file_system_type ext2_fs_type = { | ||
1127 | .owner = THIS_MODULE, | ||
1128 | .name = "ext2", | ||
1129 | .get_sb = ext2_get_sb, | ||
1130 | .kill_sb = kill_block_super, | ||
1131 | .fs_flags = FS_REQUIRES_DEV, | ||
1132 | }; | ||
1133 | |||
1134 | static int __init init_ext2_fs(void) | ||
1135 | { | ||
1136 | int err = init_ext2_xattr(); | ||
1137 | if (err) | ||
1138 | return err; | ||
1139 | err = init_inodecache(); | ||
1140 | if (err) | ||
1141 | goto out1; | ||
1142 | err = register_filesystem(&ext2_fs_type); | ||
1143 | if (err) | ||
1144 | goto out; | ||
1145 | return 0; | ||
1146 | out: | ||
1147 | destroy_inodecache(); | ||
1148 | out1: | ||
1149 | exit_ext2_xattr(); | ||
1150 | return err; | ||
1151 | } | ||
1152 | |||
1153 | static void __exit exit_ext2_fs(void) | ||
1154 | { | ||
1155 | unregister_filesystem(&ext2_fs_type); | ||
1156 | destroy_inodecache(); | ||
1157 | exit_ext2_xattr(); | ||
1158 | } | ||
1159 | |||
1160 | module_init(init_ext2_fs) | ||
1161 | module_exit(exit_ext2_fs) | ||
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c new file mode 100644 index 000000000000..9f7bac01d557 --- /dev/null +++ b/fs/ext2/symlink.c | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/symlink.c | ||
3 | * | ||
4 | * Only fast symlinks left here - the rest is done by generic code. AV, 1999 | ||
5 | * | ||
6 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
7 | * Remy Card (card@masi.ibp.fr) | ||
8 | * Laboratoire MASI - Institut Blaise Pascal | ||
9 | * Universite Pierre et Marie Curie (Paris VI) | ||
10 | * | ||
11 | * from | ||
12 | * | ||
13 | * linux/fs/minix/symlink.c | ||
14 | * | ||
15 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
16 | * | ||
17 | * ext2 symlink handling code | ||
18 | */ | ||
19 | |||
20 | #include "ext2.h" | ||
21 | #include "xattr.h" | ||
22 | #include <linux/namei.h> | ||
23 | |||
24 | static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
25 | { | ||
26 | struct ext2_inode_info *ei = EXT2_I(dentry->d_inode); | ||
27 | nd_set_link(nd, (char *)ei->i_data); | ||
28 | return 0; | ||
29 | } | ||
30 | |||
31 | struct inode_operations ext2_symlink_inode_operations = { | ||
32 | .readlink = generic_readlink, | ||
33 | .follow_link = page_follow_link_light, | ||
34 | .put_link = page_put_link, | ||
35 | #ifdef CONFIG_EXT2_FS_XATTR | ||
36 | .setxattr = generic_setxattr, | ||
37 | .getxattr = generic_getxattr, | ||
38 | .listxattr = ext2_listxattr, | ||
39 | .removexattr = generic_removexattr, | ||
40 | #endif | ||
41 | }; | ||
42 | |||
43 | struct inode_operations ext2_fast_symlink_inode_operations = { | ||
44 | .readlink = generic_readlink, | ||
45 | .follow_link = ext2_follow_link, | ||
46 | #ifdef CONFIG_EXT2_FS_XATTR | ||
47 | .setxattr = generic_setxattr, | ||
48 | .getxattr = generic_getxattr, | ||
49 | .listxattr = ext2_listxattr, | ||
50 | .removexattr = generic_removexattr, | ||
51 | #endif | ||
52 | }; | ||
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c new file mode 100644 index 000000000000..27982b500e84 --- /dev/null +++ b/fs/ext2/xattr.c | |||
@@ -0,0 +1,1043 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/xattr.c | ||
3 | * | ||
4 | * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de> | ||
5 | * | ||
6 | * Fix by Harrison Xing <harrison@mountainviewdata.com>. | ||
7 | * Extended attributes for symlinks and special files added per | ||
8 | * suggestion of Luka Renko <luka.renko@hermes.si>. | ||
9 | * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>, | ||
10 | * Red Hat Inc. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | /* | ||
15 | * Extended attributes are stored on disk blocks allocated outside of | ||
16 | * any inode. The i_file_acl field is then made to point to this allocated | ||
17 | * block. If all extended attributes of an inode are identical, these | ||
18 | * inodes may share the same extended attribute block. Such situations | ||
19 | * are automatically detected by keeping a cache of recent attribute block | ||
20 | * numbers and hashes over the block's contents in memory. | ||
21 | * | ||
22 | * | ||
23 | * Extended attribute block layout: | ||
24 | * | ||
25 | * +------------------+ | ||
26 | * | header | | ||
27 | * | entry 1 | | | ||
28 | * | entry 2 | | growing downwards | ||
29 | * | entry 3 | v | ||
30 | * | four null bytes | | ||
31 | * | . . . | | ||
32 | * | value 1 | ^ | ||
33 | * | value 3 | | growing upwards | ||
34 | * | value 2 | | | ||
35 | * +------------------+ | ||
36 | * | ||
37 | * The block header is followed by multiple entry descriptors. These entry | ||
38 | * descriptors are variable in size, and alligned to EXT2_XATTR_PAD | ||
39 | * byte boundaries. The entry descriptors are sorted by attribute name, | ||
40 | * so that two extended attribute blocks can be compared efficiently. | ||
41 | * | ||
42 | * Attribute values are aligned to the end of the block, stored in | ||
43 | * no specific order. They are also padded to EXT2_XATTR_PAD byte | ||
44 | * boundaries. No additional gaps are left between them. | ||
45 | * | ||
46 | * Locking strategy | ||
47 | * ---------------- | ||
48 | * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem. | ||
49 | * EA blocks are only changed if they are exclusive to an inode, so | ||
50 | * holding xattr_sem also means that nothing but the EA block's reference | ||
51 | * count will change. Multiple writers to an EA block are synchronized | ||
52 | * by the bh lock. No more than a single bh lock is held at any time | ||
53 | * to avoid deadlocks. | ||
54 | */ | ||
55 | |||
56 | #include <linux/buffer_head.h> | ||
57 | #include <linux/module.h> | ||
58 | #include <linux/init.h> | ||
59 | #include <linux/slab.h> | ||
60 | #include <linux/mbcache.h> | ||
61 | #include <linux/quotaops.h> | ||
62 | #include <linux/rwsem.h> | ||
63 | #include "ext2.h" | ||
64 | #include "xattr.h" | ||
65 | #include "acl.h" | ||
66 | |||
67 | #define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) | ||
68 | #define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) | ||
69 | #define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) | ||
70 | #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) | ||
71 | |||
72 | #ifdef EXT2_XATTR_DEBUG | ||
73 | # define ea_idebug(inode, f...) do { \ | ||
74 | printk(KERN_DEBUG "inode %s:%ld: ", \ | ||
75 | inode->i_sb->s_id, inode->i_ino); \ | ||
76 | printk(f); \ | ||
77 | printk("\n"); \ | ||
78 | } while (0) | ||
79 | # define ea_bdebug(bh, f...) do { \ | ||
80 | char b[BDEVNAME_SIZE]; \ | ||
81 | printk(KERN_DEBUG "block %s:%lu: ", \ | ||
82 | bdevname(bh->b_bdev, b), \ | ||
83 | (unsigned long) bh->b_blocknr); \ | ||
84 | printk(f); \ | ||
85 | printk("\n"); \ | ||
86 | } while (0) | ||
87 | #else | ||
88 | # define ea_idebug(f...) | ||
89 | # define ea_bdebug(f...) | ||
90 | #endif | ||
91 | |||
92 | static int ext2_xattr_set2(struct inode *, struct buffer_head *, | ||
93 | struct ext2_xattr_header *); | ||
94 | |||
95 | static int ext2_xattr_cache_insert(struct buffer_head *); | ||
96 | static struct buffer_head *ext2_xattr_cache_find(struct inode *, | ||
97 | struct ext2_xattr_header *); | ||
98 | static void ext2_xattr_rehash(struct ext2_xattr_header *, | ||
99 | struct ext2_xattr_entry *); | ||
100 | |||
101 | static struct mb_cache *ext2_xattr_cache; | ||
102 | |||
103 | static struct xattr_handler *ext2_xattr_handler_map[] = { | ||
104 | [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, | ||
105 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | ||
106 | [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, | ||
107 | [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext2_xattr_acl_default_handler, | ||
108 | #endif | ||
109 | [EXT2_XATTR_INDEX_TRUSTED] = &ext2_xattr_trusted_handler, | ||
110 | #ifdef CONFIG_EXT2_FS_SECURITY | ||
111 | [EXT2_XATTR_INDEX_SECURITY] = &ext2_xattr_security_handler, | ||
112 | #endif | ||
113 | }; | ||
114 | |||
115 | struct xattr_handler *ext2_xattr_handlers[] = { | ||
116 | &ext2_xattr_user_handler, | ||
117 | &ext2_xattr_trusted_handler, | ||
118 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | ||
119 | &ext2_xattr_acl_access_handler, | ||
120 | &ext2_xattr_acl_default_handler, | ||
121 | #endif | ||
122 | #ifdef CONFIG_EXT2_FS_SECURITY | ||
123 | &ext2_xattr_security_handler, | ||
124 | #endif | ||
125 | NULL | ||
126 | }; | ||
127 | |||
128 | static inline struct xattr_handler * | ||
129 | ext2_xattr_handler(int name_index) | ||
130 | { | ||
131 | struct xattr_handler *handler = NULL; | ||
132 | |||
133 | if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) | ||
134 | handler = ext2_xattr_handler_map[name_index]; | ||
135 | return handler; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * ext2_xattr_get() | ||
140 | * | ||
141 | * Copy an extended attribute into the buffer | ||
142 | * provided, or compute the buffer size required. | ||
143 | * Buffer is NULL to compute the size of the buffer required. | ||
144 | * | ||
145 | * Returns a negative error number on failure, or the number of bytes | ||
146 | * used / required on success. | ||
147 | */ | ||
148 | int | ||
149 | ext2_xattr_get(struct inode *inode, int name_index, const char *name, | ||
150 | void *buffer, size_t buffer_size) | ||
151 | { | ||
152 | struct buffer_head *bh = NULL; | ||
153 | struct ext2_xattr_entry *entry; | ||
154 | size_t name_len, size; | ||
155 | char *end; | ||
156 | int error; | ||
157 | |||
158 | ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", | ||
159 | name_index, name, buffer, (long)buffer_size); | ||
160 | |||
161 | if (name == NULL) | ||
162 | return -EINVAL; | ||
163 | down_read(&EXT2_I(inode)->xattr_sem); | ||
164 | error = -ENODATA; | ||
165 | if (!EXT2_I(inode)->i_file_acl) | ||
166 | goto cleanup; | ||
167 | ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); | ||
168 | bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); | ||
169 | error = -EIO; | ||
170 | if (!bh) | ||
171 | goto cleanup; | ||
172 | ea_bdebug(bh, "b_count=%d, refcount=%d", | ||
173 | atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); | ||
174 | end = bh->b_data + bh->b_size; | ||
175 | if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || | ||
176 | HDR(bh)->h_blocks != cpu_to_le32(1)) { | ||
177 | bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", | ||
178 | "inode %ld: bad block %d", inode->i_ino, | ||
179 | EXT2_I(inode)->i_file_acl); | ||
180 | error = -EIO; | ||
181 | goto cleanup; | ||
182 | } | ||
183 | /* find named attribute */ | ||
184 | name_len = strlen(name); | ||
185 | |||
186 | error = -ERANGE; | ||
187 | if (name_len > 255) | ||
188 | goto cleanup; | ||
189 | entry = FIRST_ENTRY(bh); | ||
190 | while (!IS_LAST_ENTRY(entry)) { | ||
191 | struct ext2_xattr_entry *next = | ||
192 | EXT2_XATTR_NEXT(entry); | ||
193 | if ((char *)next >= end) | ||
194 | goto bad_block; | ||
195 | if (name_index == entry->e_name_index && | ||
196 | name_len == entry->e_name_len && | ||
197 | memcmp(name, entry->e_name, name_len) == 0) | ||
198 | goto found; | ||
199 | entry = next; | ||
200 | } | ||
201 | /* Check the remaining name entries */ | ||
202 | while (!IS_LAST_ENTRY(entry)) { | ||
203 | struct ext2_xattr_entry *next = | ||
204 | EXT2_XATTR_NEXT(entry); | ||
205 | if ((char *)next >= end) | ||
206 | goto bad_block; | ||
207 | entry = next; | ||
208 | } | ||
209 | if (ext2_xattr_cache_insert(bh)) | ||
210 | ea_idebug(inode, "cache insert failed"); | ||
211 | error = -ENODATA; | ||
212 | goto cleanup; | ||
213 | found: | ||
214 | /* check the buffer size */ | ||
215 | if (entry->e_value_block != 0) | ||
216 | goto bad_block; | ||
217 | size = le32_to_cpu(entry->e_value_size); | ||
218 | if (size > inode->i_sb->s_blocksize || | ||
219 | le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) | ||
220 | goto bad_block; | ||
221 | |||
222 | if (ext2_xattr_cache_insert(bh)) | ||
223 | ea_idebug(inode, "cache insert failed"); | ||
224 | if (buffer) { | ||
225 | error = -ERANGE; | ||
226 | if (size > buffer_size) | ||
227 | goto cleanup; | ||
228 | /* return value of attribute */ | ||
229 | memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), | ||
230 | size); | ||
231 | } | ||
232 | error = size; | ||
233 | |||
234 | cleanup: | ||
235 | brelse(bh); | ||
236 | up_read(&EXT2_I(inode)->xattr_sem); | ||
237 | |||
238 | return error; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * ext2_xattr_list() | ||
243 | * | ||
244 | * Copy a list of attribute names into the buffer | ||
245 | * provided, or compute the buffer size required. | ||
246 | * Buffer is NULL to compute the size of the buffer required. | ||
247 | * | ||
248 | * Returns a negative error number on failure, or the number of bytes | ||
249 | * used / required on success. | ||
250 | */ | ||
251 | static int | ||
252 | ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) | ||
253 | { | ||
254 | struct buffer_head *bh = NULL; | ||
255 | struct ext2_xattr_entry *entry; | ||
256 | char *end; | ||
257 | size_t rest = buffer_size; | ||
258 | int error; | ||
259 | |||
260 | ea_idebug(inode, "buffer=%p, buffer_size=%ld", | ||
261 | buffer, (long)buffer_size); | ||
262 | |||
263 | down_read(&EXT2_I(inode)->xattr_sem); | ||
264 | error = 0; | ||
265 | if (!EXT2_I(inode)->i_file_acl) | ||
266 | goto cleanup; | ||
267 | ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); | ||
268 | bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); | ||
269 | error = -EIO; | ||
270 | if (!bh) | ||
271 | goto cleanup; | ||
272 | ea_bdebug(bh, "b_count=%d, refcount=%d", | ||
273 | atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); | ||
274 | end = bh->b_data + bh->b_size; | ||
275 | if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || | ||
276 | HDR(bh)->h_blocks != cpu_to_le32(1)) { | ||
277 | bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", | ||
278 | "inode %ld: bad block %d", inode->i_ino, | ||
279 | EXT2_I(inode)->i_file_acl); | ||
280 | error = -EIO; | ||
281 | goto cleanup; | ||
282 | } | ||
283 | |||
284 | /* check the on-disk data structure */ | ||
285 | entry = FIRST_ENTRY(bh); | ||
286 | while (!IS_LAST_ENTRY(entry)) { | ||
287 | struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry); | ||
288 | |||
289 | if ((char *)next >= end) | ||
290 | goto bad_block; | ||
291 | entry = next; | ||
292 | } | ||
293 | if (ext2_xattr_cache_insert(bh)) | ||
294 | ea_idebug(inode, "cache insert failed"); | ||
295 | |||
296 | /* list the attribute names */ | ||
297 | for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); | ||
298 | entry = EXT2_XATTR_NEXT(entry)) { | ||
299 | struct xattr_handler *handler = | ||
300 | ext2_xattr_handler(entry->e_name_index); | ||
301 | |||
302 | if (handler) { | ||
303 | size_t size = handler->list(inode, buffer, rest, | ||
304 | entry->e_name, | ||
305 | entry->e_name_len); | ||
306 | if (buffer) { | ||
307 | if (size > rest) { | ||
308 | error = -ERANGE; | ||
309 | goto cleanup; | ||
310 | } | ||
311 | buffer += size; | ||
312 | } | ||
313 | rest -= size; | ||
314 | } | ||
315 | } | ||
316 | error = buffer_size - rest; /* total size */ | ||
317 | |||
318 | cleanup: | ||
319 | brelse(bh); | ||
320 | up_read(&EXT2_I(inode)->xattr_sem); | ||
321 | |||
322 | return error; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Inode operation listxattr() | ||
327 | * | ||
328 | * dentry->d_inode->i_sem: don't care | ||
329 | */ | ||
330 | ssize_t | ||
331 | ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) | ||
332 | { | ||
333 | return ext2_xattr_list(dentry->d_inode, buffer, size); | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is | ||
338 | * not set, set it. | ||
339 | */ | ||
340 | static void ext2_xattr_update_super_block(struct super_block *sb) | ||
341 | { | ||
342 | if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) | ||
343 | return; | ||
344 | |||
345 | lock_super(sb); | ||
346 | EXT2_SB(sb)->s_es->s_feature_compat |= | ||
347 | cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); | ||
348 | sb->s_dirt = 1; | ||
349 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); | ||
350 | unlock_super(sb); | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * ext2_xattr_set() | ||
355 | * | ||
356 | * Create, replace or remove an extended attribute for this inode. Buffer | ||
357 | * is NULL to remove an existing extended attribute, and non-NULL to | ||
358 | * either replace an existing extended attribute, or create a new extended | ||
359 | * attribute. The flags XATTR_REPLACE and XATTR_CREATE | ||
360 | * specify that an extended attribute must exist and must not exist | ||
361 | * previous to the call, respectively. | ||
362 | * | ||
363 | * Returns 0, or a negative error number on failure. | ||
364 | */ | ||
365 | int | ||
366 | ext2_xattr_set(struct inode *inode, int name_index, const char *name, | ||
367 | const void *value, size_t value_len, int flags) | ||
368 | { | ||
369 | struct super_block *sb = inode->i_sb; | ||
370 | struct buffer_head *bh = NULL; | ||
371 | struct ext2_xattr_header *header = NULL; | ||
372 | struct ext2_xattr_entry *here, *last; | ||
373 | size_t name_len, free, min_offs = sb->s_blocksize; | ||
374 | int not_found = 1, error; | ||
375 | char *end; | ||
376 | |||
377 | /* | ||
378 | * header -- Points either into bh, or to a temporarily | ||
379 | * allocated buffer. | ||
380 | * here -- The named entry found, or the place for inserting, within | ||
381 | * the block pointed to by header. | ||
382 | * last -- Points right after the last named entry within the block | ||
383 | * pointed to by header. | ||
384 | * min_offs -- The offset of the first value (values are aligned | ||
385 | * towards the end of the block). | ||
386 | * end -- Points right after the block pointed to by header. | ||
387 | */ | ||
388 | |||
389 | ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", | ||
390 | name_index, name, value, (long)value_len); | ||
391 | |||
392 | if (IS_RDONLY(inode)) | ||
393 | return -EROFS; | ||
394 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | ||
395 | return -EPERM; | ||
396 | if (value == NULL) | ||
397 | value_len = 0; | ||
398 | if (name == NULL) | ||
399 | return -EINVAL; | ||
400 | name_len = strlen(name); | ||
401 | if (name_len > 255 || value_len > sb->s_blocksize) | ||
402 | return -ERANGE; | ||
403 | down_write(&EXT2_I(inode)->xattr_sem); | ||
404 | if (EXT2_I(inode)->i_file_acl) { | ||
405 | /* The inode already has an extended attribute block. */ | ||
406 | bh = sb_bread(sb, EXT2_I(inode)->i_file_acl); | ||
407 | error = -EIO; | ||
408 | if (!bh) | ||
409 | goto cleanup; | ||
410 | ea_bdebug(bh, "b_count=%d, refcount=%d", | ||
411 | atomic_read(&(bh->b_count)), | ||
412 | le32_to_cpu(HDR(bh)->h_refcount)); | ||
413 | header = HDR(bh); | ||
414 | end = bh->b_data + bh->b_size; | ||
415 | if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || | ||
416 | header->h_blocks != cpu_to_le32(1)) { | ||
417 | bad_block: ext2_error(sb, "ext2_xattr_set", | ||
418 | "inode %ld: bad block %d", inode->i_ino, | ||
419 | EXT2_I(inode)->i_file_acl); | ||
420 | error = -EIO; | ||
421 | goto cleanup; | ||
422 | } | ||
423 | /* Find the named attribute. */ | ||
424 | here = FIRST_ENTRY(bh); | ||
425 | while (!IS_LAST_ENTRY(here)) { | ||
426 | struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); | ||
427 | if ((char *)next >= end) | ||
428 | goto bad_block; | ||
429 | if (!here->e_value_block && here->e_value_size) { | ||
430 | size_t offs = le16_to_cpu(here->e_value_offs); | ||
431 | if (offs < min_offs) | ||
432 | min_offs = offs; | ||
433 | } | ||
434 | not_found = name_index - here->e_name_index; | ||
435 | if (!not_found) | ||
436 | not_found = name_len - here->e_name_len; | ||
437 | if (!not_found) | ||
438 | not_found = memcmp(name, here->e_name,name_len); | ||
439 | if (not_found <= 0) | ||
440 | break; | ||
441 | here = next; | ||
442 | } | ||
443 | last = here; | ||
444 | /* We still need to compute min_offs and last. */ | ||
445 | while (!IS_LAST_ENTRY(last)) { | ||
446 | struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); | ||
447 | if ((char *)next >= end) | ||
448 | goto bad_block; | ||
449 | if (!last->e_value_block && last->e_value_size) { | ||
450 | size_t offs = le16_to_cpu(last->e_value_offs); | ||
451 | if (offs < min_offs) | ||
452 | min_offs = offs; | ||
453 | } | ||
454 | last = next; | ||
455 | } | ||
456 | |||
457 | /* Check whether we have enough space left. */ | ||
458 | free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); | ||
459 | } else { | ||
460 | /* We will use a new extended attribute block. */ | ||
461 | free = sb->s_blocksize - | ||
462 | sizeof(struct ext2_xattr_header) - sizeof(__u32); | ||
463 | here = last = NULL; /* avoid gcc uninitialized warning. */ | ||
464 | } | ||
465 | |||
466 | if (not_found) { | ||
467 | /* Request to remove a nonexistent attribute? */ | ||
468 | error = -ENODATA; | ||
469 | if (flags & XATTR_REPLACE) | ||
470 | goto cleanup; | ||
471 | error = 0; | ||
472 | if (value == NULL) | ||
473 | goto cleanup; | ||
474 | } else { | ||
475 | /* Request to create an existing attribute? */ | ||
476 | error = -EEXIST; | ||
477 | if (flags & XATTR_CREATE) | ||
478 | goto cleanup; | ||
479 | if (!here->e_value_block && here->e_value_size) { | ||
480 | size_t size = le32_to_cpu(here->e_value_size); | ||
481 | |||
482 | if (le16_to_cpu(here->e_value_offs) + size > | ||
483 | sb->s_blocksize || size > sb->s_blocksize) | ||
484 | goto bad_block; | ||
485 | free += EXT2_XATTR_SIZE(size); | ||
486 | } | ||
487 | free += EXT2_XATTR_LEN(name_len); | ||
488 | } | ||
489 | error = -ENOSPC; | ||
490 | if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len)) | ||
491 | goto cleanup; | ||
492 | |||
493 | /* Here we know that we can set the new attribute. */ | ||
494 | |||
495 | if (header) { | ||
496 | struct mb_cache_entry *ce; | ||
497 | |||
498 | /* assert(header == HDR(bh)); */ | ||
499 | ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, | ||
500 | bh->b_blocknr); | ||
501 | lock_buffer(bh); | ||
502 | if (header->h_refcount == cpu_to_le32(1)) { | ||
503 | ea_bdebug(bh, "modifying in-place"); | ||
504 | if (ce) | ||
505 | mb_cache_entry_free(ce); | ||
506 | /* keep the buffer locked while modifying it. */ | ||
507 | } else { | ||
508 | int offset; | ||
509 | |||
510 | if (ce) | ||
511 | mb_cache_entry_release(ce); | ||
512 | unlock_buffer(bh); | ||
513 | ea_bdebug(bh, "cloning"); | ||
514 | header = kmalloc(bh->b_size, GFP_KERNEL); | ||
515 | error = -ENOMEM; | ||
516 | if (header == NULL) | ||
517 | goto cleanup; | ||
518 | memcpy(header, HDR(bh), bh->b_size); | ||
519 | header->h_refcount = cpu_to_le32(1); | ||
520 | |||
521 | offset = (char *)here - bh->b_data; | ||
522 | here = ENTRY((char *)header + offset); | ||
523 | offset = (char *)last - bh->b_data; | ||
524 | last = ENTRY((char *)header + offset); | ||
525 | } | ||
526 | } else { | ||
527 | /* Allocate a buffer where we construct the new block. */ | ||
528 | header = kmalloc(sb->s_blocksize, GFP_KERNEL); | ||
529 | error = -ENOMEM; | ||
530 | if (header == NULL) | ||
531 | goto cleanup; | ||
532 | memset(header, 0, sb->s_blocksize); | ||
533 | end = (char *)header + sb->s_blocksize; | ||
534 | header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); | ||
535 | header->h_blocks = header->h_refcount = cpu_to_le32(1); | ||
536 | last = here = ENTRY(header+1); | ||
537 | } | ||
538 | |||
539 | /* Iff we are modifying the block in-place, bh is locked here. */ | ||
540 | |||
541 | if (not_found) { | ||
542 | /* Insert the new name. */ | ||
543 | size_t size = EXT2_XATTR_LEN(name_len); | ||
544 | size_t rest = (char *)last - (char *)here; | ||
545 | memmove((char *)here + size, here, rest); | ||
546 | memset(here, 0, size); | ||
547 | here->e_name_index = name_index; | ||
548 | here->e_name_len = name_len; | ||
549 | memcpy(here->e_name, name, name_len); | ||
550 | } else { | ||
551 | if (!here->e_value_block && here->e_value_size) { | ||
552 | char *first_val = (char *)header + min_offs; | ||
553 | size_t offs = le16_to_cpu(here->e_value_offs); | ||
554 | char *val = (char *)header + offs; | ||
555 | size_t size = EXT2_XATTR_SIZE( | ||
556 | le32_to_cpu(here->e_value_size)); | ||
557 | |||
558 | if (size == EXT2_XATTR_SIZE(value_len)) { | ||
559 | /* The old and the new value have the same | ||
560 | size. Just replace. */ | ||
561 | here->e_value_size = cpu_to_le32(value_len); | ||
562 | memset(val + size - EXT2_XATTR_PAD, 0, | ||
563 | EXT2_XATTR_PAD); /* Clear pad bytes. */ | ||
564 | memcpy(val, value, value_len); | ||
565 | goto skip_replace; | ||
566 | } | ||
567 | |||
568 | /* Remove the old value. */ | ||
569 | memmove(first_val + size, first_val, val - first_val); | ||
570 | memset(first_val, 0, size); | ||
571 | here->e_value_offs = 0; | ||
572 | min_offs += size; | ||
573 | |||
574 | /* Adjust all value offsets. */ | ||
575 | last = ENTRY(header+1); | ||
576 | while (!IS_LAST_ENTRY(last)) { | ||
577 | size_t o = le16_to_cpu(last->e_value_offs); | ||
578 | if (!last->e_value_block && o < offs) | ||
579 | last->e_value_offs = | ||
580 | cpu_to_le16(o + size); | ||
581 | last = EXT2_XATTR_NEXT(last); | ||
582 | } | ||
583 | } | ||
584 | if (value == NULL) { | ||
585 | /* Remove the old name. */ | ||
586 | size_t size = EXT2_XATTR_LEN(name_len); | ||
587 | last = ENTRY((char *)last - size); | ||
588 | memmove(here, (char*)here + size, | ||
589 | (char*)last - (char*)here); | ||
590 | memset(last, 0, size); | ||
591 | } | ||
592 | } | ||
593 | |||
594 | if (value != NULL) { | ||
595 | /* Insert the new value. */ | ||
596 | here->e_value_size = cpu_to_le32(value_len); | ||
597 | if (value_len) { | ||
598 | size_t size = EXT2_XATTR_SIZE(value_len); | ||
599 | char *val = (char *)header + min_offs - size; | ||
600 | here->e_value_offs = | ||
601 | cpu_to_le16((char *)val - (char *)header); | ||
602 | memset(val + size - EXT2_XATTR_PAD, 0, | ||
603 | EXT2_XATTR_PAD); /* Clear the pad bytes. */ | ||
604 | memcpy(val, value, value_len); | ||
605 | } | ||
606 | } | ||
607 | |||
608 | skip_replace: | ||
609 | if (IS_LAST_ENTRY(ENTRY(header+1))) { | ||
610 | /* This block is now empty. */ | ||
611 | if (bh && header == HDR(bh)) | ||
612 | unlock_buffer(bh); /* we were modifying in-place. */ | ||
613 | error = ext2_xattr_set2(inode, bh, NULL); | ||
614 | } else { | ||
615 | ext2_xattr_rehash(header, here); | ||
616 | if (bh && header == HDR(bh)) | ||
617 | unlock_buffer(bh); /* we were modifying in-place. */ | ||
618 | error = ext2_xattr_set2(inode, bh, header); | ||
619 | } | ||
620 | |||
621 | cleanup: | ||
622 | brelse(bh); | ||
623 | if (!(bh && header == HDR(bh))) | ||
624 | kfree(header); | ||
625 | up_write(&EXT2_I(inode)->xattr_sem); | ||
626 | |||
627 | return error; | ||
628 | } | ||
629 | |||
630 | /* | ||
631 | * Second half of ext2_xattr_set(): Update the file system. | ||
632 | */ | ||
633 | static int | ||
634 | ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, | ||
635 | struct ext2_xattr_header *header) | ||
636 | { | ||
637 | struct super_block *sb = inode->i_sb; | ||
638 | struct buffer_head *new_bh = NULL; | ||
639 | int error; | ||
640 | |||
641 | if (header) { | ||
642 | new_bh = ext2_xattr_cache_find(inode, header); | ||
643 | if (new_bh) { | ||
644 | /* We found an identical block in the cache. */ | ||
645 | if (new_bh == old_bh) { | ||
646 | ea_bdebug(new_bh, "keeping this block"); | ||
647 | } else { | ||
648 | /* The old block is released after updating | ||
649 | the inode. */ | ||
650 | ea_bdebug(new_bh, "reusing block"); | ||
651 | |||
652 | error = -EDQUOT; | ||
653 | if (DQUOT_ALLOC_BLOCK(inode, 1)) { | ||
654 | unlock_buffer(new_bh); | ||
655 | goto cleanup; | ||
656 | } | ||
657 | HDR(new_bh)->h_refcount = cpu_to_le32(1 + | ||
658 | le32_to_cpu(HDR(new_bh)->h_refcount)); | ||
659 | ea_bdebug(new_bh, "refcount now=%d", | ||
660 | le32_to_cpu(HDR(new_bh)->h_refcount)); | ||
661 | } | ||
662 | unlock_buffer(new_bh); | ||
663 | } else if (old_bh && header == HDR(old_bh)) { | ||
664 | /* Keep this block. No need to lock the block as we | ||
665 | don't need to change the reference count. */ | ||
666 | new_bh = old_bh; | ||
667 | get_bh(new_bh); | ||
668 | ext2_xattr_cache_insert(new_bh); | ||
669 | } else { | ||
670 | /* We need to allocate a new block */ | ||
671 | int goal = le32_to_cpu(EXT2_SB(sb)->s_es-> | ||
672 | s_first_data_block) + | ||
673 | EXT2_I(inode)->i_block_group * | ||
674 | EXT2_BLOCKS_PER_GROUP(sb); | ||
675 | int block = ext2_new_block(inode, goal, | ||
676 | NULL, NULL, &error); | ||
677 | if (error) | ||
678 | goto cleanup; | ||
679 | ea_idebug(inode, "creating block %d", block); | ||
680 | |||
681 | new_bh = sb_getblk(sb, block); | ||
682 | if (!new_bh) { | ||
683 | ext2_free_blocks(inode, block, 1); | ||
684 | error = -EIO; | ||
685 | goto cleanup; | ||
686 | } | ||
687 | lock_buffer(new_bh); | ||
688 | memcpy(new_bh->b_data, header, new_bh->b_size); | ||
689 | set_buffer_uptodate(new_bh); | ||
690 | unlock_buffer(new_bh); | ||
691 | ext2_xattr_cache_insert(new_bh); | ||
692 | |||
693 | ext2_xattr_update_super_block(sb); | ||
694 | } | ||
695 | mark_buffer_dirty(new_bh); | ||
696 | if (IS_SYNC(inode)) { | ||
697 | sync_dirty_buffer(new_bh); | ||
698 | error = -EIO; | ||
699 | if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) | ||
700 | goto cleanup; | ||
701 | } | ||
702 | } | ||
703 | |||
704 | /* Update the inode. */ | ||
705 | EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; | ||
706 | inode->i_ctime = CURRENT_TIME_SEC; | ||
707 | if (IS_SYNC(inode)) { | ||
708 | error = ext2_sync_inode (inode); | ||
709 | /* In case sync failed due to ENOSPC the inode was actually | ||
710 | * written (only some dirty data were not) so we just proceed | ||
711 | * as if nothing happened and cleanup the unused block */ | ||
712 | if (error && error != -ENOSPC) { | ||
713 | if (new_bh && new_bh != old_bh) | ||
714 | DQUOT_FREE_BLOCK(inode, 1); | ||
715 | goto cleanup; | ||
716 | } | ||
717 | } else | ||
718 | mark_inode_dirty(inode); | ||
719 | |||
720 | error = 0; | ||
721 | if (old_bh && old_bh != new_bh) { | ||
722 | struct mb_cache_entry *ce; | ||
723 | |||
724 | /* | ||
725 | * If there was an old block and we are no longer using it, | ||
726 | * release the old block. | ||
727 | */ | ||
728 | ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev, | ||
729 | old_bh->b_blocknr); | ||
730 | lock_buffer(old_bh); | ||
731 | if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { | ||
732 | /* Free the old block. */ | ||
733 | if (ce) | ||
734 | mb_cache_entry_free(ce); | ||
735 | ea_bdebug(old_bh, "freeing"); | ||
736 | ext2_free_blocks(inode, old_bh->b_blocknr, 1); | ||
737 | /* We let our caller release old_bh, so we | ||
738 | * need to duplicate the buffer before. */ | ||
739 | get_bh(old_bh); | ||
740 | bforget(old_bh); | ||
741 | } else { | ||
742 | /* Decrement the refcount only. */ | ||
743 | HDR(old_bh)->h_refcount = cpu_to_le32( | ||
744 | le32_to_cpu(HDR(old_bh)->h_refcount) - 1); | ||
745 | if (ce) | ||
746 | mb_cache_entry_release(ce); | ||
747 | DQUOT_FREE_BLOCK(inode, 1); | ||
748 | mark_buffer_dirty(old_bh); | ||
749 | ea_bdebug(old_bh, "refcount now=%d", | ||
750 | le32_to_cpu(HDR(old_bh)->h_refcount)); | ||
751 | } | ||
752 | unlock_buffer(old_bh); | ||
753 | } | ||
754 | |||
755 | cleanup: | ||
756 | brelse(new_bh); | ||
757 | |||
758 | return error; | ||
759 | } | ||
760 | |||
761 | /* | ||
762 | * ext2_xattr_delete_inode() | ||
763 | * | ||
764 | * Free extended attribute resources associated with this inode. This | ||
765 | * is called immediately before an inode is freed. | ||
766 | */ | ||
767 | void | ||
768 | ext2_xattr_delete_inode(struct inode *inode) | ||
769 | { | ||
770 | struct buffer_head *bh = NULL; | ||
771 | struct mb_cache_entry *ce; | ||
772 | |||
773 | down_write(&EXT2_I(inode)->xattr_sem); | ||
774 | if (!EXT2_I(inode)->i_file_acl) | ||
775 | goto cleanup; | ||
776 | bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); | ||
777 | if (!bh) { | ||
778 | ext2_error(inode->i_sb, "ext2_xattr_delete_inode", | ||
779 | "inode %ld: block %d read error", inode->i_ino, | ||
780 | EXT2_I(inode)->i_file_acl); | ||
781 | goto cleanup; | ||
782 | } | ||
783 | ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); | ||
784 | if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || | ||
785 | HDR(bh)->h_blocks != cpu_to_le32(1)) { | ||
786 | ext2_error(inode->i_sb, "ext2_xattr_delete_inode", | ||
787 | "inode %ld: bad block %d", inode->i_ino, | ||
788 | EXT2_I(inode)->i_file_acl); | ||
789 | goto cleanup; | ||
790 | } | ||
791 | ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr); | ||
792 | lock_buffer(bh); | ||
793 | if (HDR(bh)->h_refcount == cpu_to_le32(1)) { | ||
794 | if (ce) | ||
795 | mb_cache_entry_free(ce); | ||
796 | ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); | ||
797 | get_bh(bh); | ||
798 | bforget(bh); | ||
799 | } else { | ||
800 | HDR(bh)->h_refcount = cpu_to_le32( | ||
801 | le32_to_cpu(HDR(bh)->h_refcount) - 1); | ||
802 | if (ce) | ||
803 | mb_cache_entry_release(ce); | ||
804 | mark_buffer_dirty(bh); | ||
805 | if (IS_SYNC(inode)) | ||
806 | sync_dirty_buffer(bh); | ||
807 | DQUOT_FREE_BLOCK(inode, 1); | ||
808 | } | ||
809 | ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); | ||
810 | unlock_buffer(bh); | ||
811 | EXT2_I(inode)->i_file_acl = 0; | ||
812 | |||
813 | cleanup: | ||
814 | brelse(bh); | ||
815 | up_write(&EXT2_I(inode)->xattr_sem); | ||
816 | } | ||
817 | |||
818 | /* | ||
819 | * ext2_xattr_put_super() | ||
820 | * | ||
821 | * This is called when a file system is unmounted. | ||
822 | */ | ||
823 | void | ||
824 | ext2_xattr_put_super(struct super_block *sb) | ||
825 | { | ||
826 | mb_cache_shrink(ext2_xattr_cache, sb->s_bdev); | ||
827 | } | ||
828 | |||
829 | |||
830 | /* | ||
831 | * ext2_xattr_cache_insert() | ||
832 | * | ||
833 | * Create a new entry in the extended attribute cache, and insert | ||
834 | * it unless such an entry is already in the cache. | ||
835 | * | ||
836 | * Returns 0, or a negative error number on failure. | ||
837 | */ | ||
838 | static int | ||
839 | ext2_xattr_cache_insert(struct buffer_head *bh) | ||
840 | { | ||
841 | __u32 hash = le32_to_cpu(HDR(bh)->h_hash); | ||
842 | struct mb_cache_entry *ce; | ||
843 | int error; | ||
844 | |||
845 | ce = mb_cache_entry_alloc(ext2_xattr_cache); | ||
846 | if (!ce) | ||
847 | return -ENOMEM; | ||
848 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); | ||
849 | if (error) { | ||
850 | mb_cache_entry_free(ce); | ||
851 | if (error == -EBUSY) { | ||
852 | ea_bdebug(bh, "already in cache (%d cache entries)", | ||
853 | atomic_read(&ext2_xattr_cache->c_entry_count)); | ||
854 | error = 0; | ||
855 | } | ||
856 | } else { | ||
857 | ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, | ||
858 | atomic_read(&ext2_xattr_cache->c_entry_count)); | ||
859 | mb_cache_entry_release(ce); | ||
860 | } | ||
861 | return error; | ||
862 | } | ||
863 | |||
864 | /* | ||
865 | * ext2_xattr_cmp() | ||
866 | * | ||
867 | * Compare two extended attribute blocks for equality. | ||
868 | * | ||
869 | * Returns 0 if the blocks are equal, 1 if they differ, and | ||
870 | * a negative error number on errors. | ||
871 | */ | ||
872 | static int | ||
873 | ext2_xattr_cmp(struct ext2_xattr_header *header1, | ||
874 | struct ext2_xattr_header *header2) | ||
875 | { | ||
876 | struct ext2_xattr_entry *entry1, *entry2; | ||
877 | |||
878 | entry1 = ENTRY(header1+1); | ||
879 | entry2 = ENTRY(header2+1); | ||
880 | while (!IS_LAST_ENTRY(entry1)) { | ||
881 | if (IS_LAST_ENTRY(entry2)) | ||
882 | return 1; | ||
883 | if (entry1->e_hash != entry2->e_hash || | ||
884 | entry1->e_name_index != entry2->e_name_index || | ||
885 | entry1->e_name_len != entry2->e_name_len || | ||
886 | entry1->e_value_size != entry2->e_value_size || | ||
887 | memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) | ||
888 | return 1; | ||
889 | if (entry1->e_value_block != 0 || entry2->e_value_block != 0) | ||
890 | return -EIO; | ||
891 | if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), | ||
892 | (char *)header2 + le16_to_cpu(entry2->e_value_offs), | ||
893 | le32_to_cpu(entry1->e_value_size))) | ||
894 | return 1; | ||
895 | |||
896 | entry1 = EXT2_XATTR_NEXT(entry1); | ||
897 | entry2 = EXT2_XATTR_NEXT(entry2); | ||
898 | } | ||
899 | if (!IS_LAST_ENTRY(entry2)) | ||
900 | return 1; | ||
901 | return 0; | ||
902 | } | ||
903 | |||
904 | /* | ||
905 | * ext2_xattr_cache_find() | ||
906 | * | ||
907 | * Find an identical extended attribute block. | ||
908 | * | ||
909 | * Returns a locked buffer head to the block found, or NULL if such | ||
910 | * a block was not found or an error occurred. | ||
911 | */ | ||
912 | static struct buffer_head * | ||
913 | ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) | ||
914 | { | ||
915 | __u32 hash = le32_to_cpu(header->h_hash); | ||
916 | struct mb_cache_entry *ce; | ||
917 | |||
918 | if (!header->h_hash) | ||
919 | return NULL; /* never share */ | ||
920 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); | ||
921 | again: | ||
922 | ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, | ||
923 | inode->i_sb->s_bdev, hash); | ||
924 | while (ce) { | ||
925 | struct buffer_head *bh; | ||
926 | |||
927 | if (IS_ERR(ce)) { | ||
928 | if (PTR_ERR(ce) == -EAGAIN) | ||
929 | goto again; | ||
930 | break; | ||
931 | } | ||
932 | |||
933 | bh = sb_bread(inode->i_sb, ce->e_block); | ||
934 | if (!bh) { | ||
935 | ext2_error(inode->i_sb, "ext2_xattr_cache_find", | ||
936 | "inode %ld: block %ld read error", | ||
937 | inode->i_ino, (unsigned long) ce->e_block); | ||
938 | } else { | ||
939 | lock_buffer(bh); | ||
940 | if (le32_to_cpu(HDR(bh)->h_refcount) > | ||
941 | EXT2_XATTR_REFCOUNT_MAX) { | ||
942 | ea_idebug(inode, "block %ld refcount %d>%d", | ||
943 | (unsigned long) ce->e_block, | ||
944 | le32_to_cpu(HDR(bh)->h_refcount), | ||
945 | EXT2_XATTR_REFCOUNT_MAX); | ||
946 | } else if (!ext2_xattr_cmp(header, HDR(bh))) { | ||
947 | ea_bdebug(bh, "b_count=%d", | ||
948 | atomic_read(&(bh->b_count))); | ||
949 | mb_cache_entry_release(ce); | ||
950 | return bh; | ||
951 | } | ||
952 | unlock_buffer(bh); | ||
953 | brelse(bh); | ||
954 | } | ||
955 | ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); | ||
956 | } | ||
957 | return NULL; | ||
958 | } | ||
959 | |||
960 | #define NAME_HASH_SHIFT 5 | ||
961 | #define VALUE_HASH_SHIFT 16 | ||
962 | |||
963 | /* | ||
964 | * ext2_xattr_hash_entry() | ||
965 | * | ||
966 | * Compute the hash of an extended attribute. | ||
967 | */ | ||
968 | static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, | ||
969 | struct ext2_xattr_entry *entry) | ||
970 | { | ||
971 | __u32 hash = 0; | ||
972 | char *name = entry->e_name; | ||
973 | int n; | ||
974 | |||
975 | for (n=0; n < entry->e_name_len; n++) { | ||
976 | hash = (hash << NAME_HASH_SHIFT) ^ | ||
977 | (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ | ||
978 | *name++; | ||
979 | } | ||
980 | |||
981 | if (entry->e_value_block == 0 && entry->e_value_size != 0) { | ||
982 | __le32 *value = (__le32 *)((char *)header + | ||
983 | le16_to_cpu(entry->e_value_offs)); | ||
984 | for (n = (le32_to_cpu(entry->e_value_size) + | ||
985 | EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { | ||
986 | hash = (hash << VALUE_HASH_SHIFT) ^ | ||
987 | (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ | ||
988 | le32_to_cpu(*value++); | ||
989 | } | ||
990 | } | ||
991 | entry->e_hash = cpu_to_le32(hash); | ||
992 | } | ||
993 | |||
994 | #undef NAME_HASH_SHIFT | ||
995 | #undef VALUE_HASH_SHIFT | ||
996 | |||
997 | #define BLOCK_HASH_SHIFT 16 | ||
998 | |||
999 | /* | ||
1000 | * ext2_xattr_rehash() | ||
1001 | * | ||
1002 | * Re-compute the extended attribute hash value after an entry has changed. | ||
1003 | */ | ||
1004 | static void ext2_xattr_rehash(struct ext2_xattr_header *header, | ||
1005 | struct ext2_xattr_entry *entry) | ||
1006 | { | ||
1007 | struct ext2_xattr_entry *here; | ||
1008 | __u32 hash = 0; | ||
1009 | |||
1010 | ext2_xattr_hash_entry(header, entry); | ||
1011 | here = ENTRY(header+1); | ||
1012 | while (!IS_LAST_ENTRY(here)) { | ||
1013 | if (!here->e_hash) { | ||
1014 | /* Block is not shared if an entry's hash value == 0 */ | ||
1015 | hash = 0; | ||
1016 | break; | ||
1017 | } | ||
1018 | hash = (hash << BLOCK_HASH_SHIFT) ^ | ||
1019 | (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ | ||
1020 | le32_to_cpu(here->e_hash); | ||
1021 | here = EXT2_XATTR_NEXT(here); | ||
1022 | } | ||
1023 | header->h_hash = cpu_to_le32(hash); | ||
1024 | } | ||
1025 | |||
1026 | #undef BLOCK_HASH_SHIFT | ||
1027 | |||
1028 | int __init | ||
1029 | init_ext2_xattr(void) | ||
1030 | { | ||
1031 | ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, | ||
1032 | sizeof(struct mb_cache_entry) + | ||
1033 | sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); | ||
1034 | if (!ext2_xattr_cache) | ||
1035 | return -ENOMEM; | ||
1036 | return 0; | ||
1037 | } | ||
1038 | |||
1039 | void | ||
1040 | exit_ext2_xattr(void) | ||
1041 | { | ||
1042 | mb_cache_destroy(ext2_xattr_cache); | ||
1043 | } | ||
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h new file mode 100644 index 000000000000..5f3bfde3b810 --- /dev/null +++ b/fs/ext2/xattr.h | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | File: linux/ext2_xattr.h | ||
3 | |||
4 | On-disk format of extended attributes for the ext2 filesystem. | ||
5 | |||
6 | (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> | ||
7 | */ | ||
8 | |||
9 | #include <linux/config.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/xattr.h> | ||
12 | |||
13 | /* Magic value in attribute blocks */ | ||
14 | #define EXT2_XATTR_MAGIC 0xEA020000 | ||
15 | |||
16 | /* Maximum number of references to one attribute block */ | ||
17 | #define EXT2_XATTR_REFCOUNT_MAX 1024 | ||
18 | |||
19 | /* Name indexes */ | ||
20 | #define EXT2_XATTR_INDEX_USER 1 | ||
21 | #define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 | ||
22 | #define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 | ||
23 | #define EXT2_XATTR_INDEX_TRUSTED 4 | ||
24 | #define EXT2_XATTR_INDEX_LUSTRE 5 | ||
25 | #define EXT2_XATTR_INDEX_SECURITY 6 | ||
26 | |||
27 | struct ext2_xattr_header { | ||
28 | __le32 h_magic; /* magic number for identification */ | ||
29 | __le32 h_refcount; /* reference count */ | ||
30 | __le32 h_blocks; /* number of disk blocks used */ | ||
31 | __le32 h_hash; /* hash value of all attributes */ | ||
32 | __u32 h_reserved[4]; /* zero right now */ | ||
33 | }; | ||
34 | |||
35 | struct ext2_xattr_entry { | ||
36 | __u8 e_name_len; /* length of name */ | ||
37 | __u8 e_name_index; /* attribute name index */ | ||
38 | __le16 e_value_offs; /* offset in disk block of value */ | ||
39 | __le32 e_value_block; /* disk block attribute is stored on (n/i) */ | ||
40 | __le32 e_value_size; /* size of attribute value */ | ||
41 | __le32 e_hash; /* hash value of name and value */ | ||
42 | char e_name[0]; /* attribute name */ | ||
43 | }; | ||
44 | |||
45 | #define EXT2_XATTR_PAD_BITS 2 | ||
46 | #define EXT2_XATTR_PAD (1<<EXT2_XATTR_PAD_BITS) | ||
47 | #define EXT2_XATTR_ROUND (EXT2_XATTR_PAD-1) | ||
48 | #define EXT2_XATTR_LEN(name_len) \ | ||
49 | (((name_len) + EXT2_XATTR_ROUND + \ | ||
50 | sizeof(struct ext2_xattr_entry)) & ~EXT2_XATTR_ROUND) | ||
51 | #define EXT2_XATTR_NEXT(entry) \ | ||
52 | ( (struct ext2_xattr_entry *)( \ | ||
53 | (char *)(entry) + EXT2_XATTR_LEN((entry)->e_name_len)) ) | ||
54 | #define EXT2_XATTR_SIZE(size) \ | ||
55 | (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) | ||
56 | |||
57 | # ifdef CONFIG_EXT2_FS_XATTR | ||
58 | |||
59 | extern struct xattr_handler ext2_xattr_user_handler; | ||
60 | extern struct xattr_handler ext2_xattr_trusted_handler; | ||
61 | extern struct xattr_handler ext2_xattr_acl_access_handler; | ||
62 | extern struct xattr_handler ext2_xattr_acl_default_handler; | ||
63 | extern struct xattr_handler ext2_xattr_security_handler; | ||
64 | |||
65 | extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); | ||
66 | |||
67 | extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); | ||
68 | extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); | ||
69 | |||
70 | extern void ext2_xattr_delete_inode(struct inode *); | ||
71 | extern void ext2_xattr_put_super(struct super_block *); | ||
72 | |||
73 | extern int init_ext2_xattr(void); | ||
74 | extern void exit_ext2_xattr(void); | ||
75 | |||
76 | extern struct xattr_handler *ext2_xattr_handlers[]; | ||
77 | |||
78 | # else /* CONFIG_EXT2_FS_XATTR */ | ||
79 | |||
80 | static inline int | ||
81 | ext2_xattr_get(struct inode *inode, int name_index, | ||
82 | const char *name, void *buffer, size_t size) | ||
83 | { | ||
84 | return -EOPNOTSUPP; | ||
85 | } | ||
86 | |||
87 | static inline int | ||
88 | ext2_xattr_set(struct inode *inode, int name_index, const char *name, | ||
89 | const void *value, size_t size, int flags) | ||
90 | { | ||
91 | return -EOPNOTSUPP; | ||
92 | } | ||
93 | |||
94 | static inline void | ||
95 | ext2_xattr_delete_inode(struct inode *inode) | ||
96 | { | ||
97 | } | ||
98 | |||
99 | static inline void | ||
100 | ext2_xattr_put_super(struct super_block *sb) | ||
101 | { | ||
102 | } | ||
103 | |||
104 | static inline int | ||
105 | init_ext2_xattr(void) | ||
106 | { | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | static inline void | ||
111 | exit_ext2_xattr(void) | ||
112 | { | ||
113 | } | ||
114 | |||
115 | #define ext2_xattr_handlers NULL | ||
116 | |||
117 | # endif /* CONFIG_EXT2_FS_XATTR */ | ||
118 | |||
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c new file mode 100644 index 000000000000..6a6c59fbe599 --- /dev/null +++ b/fs/ext2/xattr_security.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/xattr_security.c | ||
3 | * Handler for storing security labels as extended attributes. | ||
4 | */ | ||
5 | |||
6 | #include <linux/module.h> | ||
7 | #include <linux/string.h> | ||
8 | #include <linux/fs.h> | ||
9 | #include <linux/smp_lock.h> | ||
10 | #include <linux/ext2_fs.h> | ||
11 | #include "xattr.h" | ||
12 | |||
13 | static size_t | ||
14 | ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, | ||
15 | const char *name, size_t name_len) | ||
16 | { | ||
17 | const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; | ||
18 | const size_t total_len = prefix_len + name_len + 1; | ||
19 | |||
20 | if (list && total_len <= list_size) { | ||
21 | memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); | ||
22 | memcpy(list+prefix_len, name, name_len); | ||
23 | list[prefix_len + name_len] = '\0'; | ||
24 | } | ||
25 | return total_len; | ||
26 | } | ||
27 | |||
28 | static int | ||
29 | ext2_xattr_security_get(struct inode *inode, const char *name, | ||
30 | void *buffer, size_t size) | ||
31 | { | ||
32 | if (strcmp(name, "") == 0) | ||
33 | return -EINVAL; | ||
34 | return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, | ||
35 | buffer, size); | ||
36 | } | ||
37 | |||
38 | static int | ||
39 | ext2_xattr_security_set(struct inode *inode, const char *name, | ||
40 | const void *value, size_t size, int flags) | ||
41 | { | ||
42 | if (strcmp(name, "") == 0) | ||
43 | return -EINVAL; | ||
44 | return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, | ||
45 | value, size, flags); | ||
46 | } | ||
47 | |||
48 | struct xattr_handler ext2_xattr_security_handler = { | ||
49 | .prefix = XATTR_SECURITY_PREFIX, | ||
50 | .list = ext2_xattr_security_list, | ||
51 | .get = ext2_xattr_security_get, | ||
52 | .set = ext2_xattr_security_set, | ||
53 | }; | ||
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c new file mode 100644 index 000000000000..52b30ee6a25f --- /dev/null +++ b/fs/ext2/xattr_trusted.c | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/xattr_trusted.c | ||
3 | * Handler for trusted extended attributes. | ||
4 | * | ||
5 | * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> | ||
6 | */ | ||
7 | |||
8 | #include <linux/module.h> | ||
9 | #include <linux/string.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/smp_lock.h> | ||
12 | #include <linux/ext2_fs.h> | ||
13 | #include "xattr.h" | ||
14 | |||
15 | #define XATTR_TRUSTED_PREFIX "trusted." | ||
16 | |||
17 | static size_t | ||
18 | ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, | ||
19 | const char *name, size_t name_len) | ||
20 | { | ||
21 | const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; | ||
22 | const size_t total_len = prefix_len + name_len + 1; | ||
23 | |||
24 | if (!capable(CAP_SYS_ADMIN)) | ||
25 | return 0; | ||
26 | |||
27 | if (list && total_len <= list_size) { | ||
28 | memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); | ||
29 | memcpy(list+prefix_len, name, name_len); | ||
30 | list[prefix_len + name_len] = '\0'; | ||
31 | } | ||
32 | return total_len; | ||
33 | } | ||
34 | |||
35 | static int | ||
36 | ext2_xattr_trusted_get(struct inode *inode, const char *name, | ||
37 | void *buffer, size_t size) | ||
38 | { | ||
39 | if (strcmp(name, "") == 0) | ||
40 | return -EINVAL; | ||
41 | if (!capable(CAP_SYS_ADMIN)) | ||
42 | return -EPERM; | ||
43 | return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, | ||
44 | buffer, size); | ||
45 | } | ||
46 | |||
47 | static int | ||
48 | ext2_xattr_trusted_set(struct inode *inode, const char *name, | ||
49 | const void *value, size_t size, int flags) | ||
50 | { | ||
51 | if (strcmp(name, "") == 0) | ||
52 | return -EINVAL; | ||
53 | if (!capable(CAP_SYS_ADMIN)) | ||
54 | return -EPERM; | ||
55 | return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, | ||
56 | value, size, flags); | ||
57 | } | ||
58 | |||
59 | struct xattr_handler ext2_xattr_trusted_handler = { | ||
60 | .prefix = XATTR_TRUSTED_PREFIX, | ||
61 | .list = ext2_xattr_trusted_list, | ||
62 | .get = ext2_xattr_trusted_get, | ||
63 | .set = ext2_xattr_trusted_set, | ||
64 | }; | ||
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c new file mode 100644 index 000000000000..0c03ea131a94 --- /dev/null +++ b/fs/ext2/xattr_user.c | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/xattr_user.c | ||
3 | * Handler for extended user attributes. | ||
4 | * | ||
5 | * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> | ||
6 | */ | ||
7 | |||
8 | #include <linux/init.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/string.h> | ||
11 | #include "ext2.h" | ||
12 | #include "xattr.h" | ||
13 | |||
14 | #define XATTR_USER_PREFIX "user." | ||
15 | |||
16 | static size_t | ||
17 | ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, | ||
18 | const char *name, size_t name_len) | ||
19 | { | ||
20 | const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; | ||
21 | const size_t total_len = prefix_len + name_len + 1; | ||
22 | |||
23 | if (!test_opt(inode->i_sb, XATTR_USER)) | ||
24 | return 0; | ||
25 | |||
26 | if (list && total_len <= list_size) { | ||
27 | memcpy(list, XATTR_USER_PREFIX, prefix_len); | ||
28 | memcpy(list+prefix_len, name, name_len); | ||
29 | list[prefix_len + name_len] = '\0'; | ||
30 | } | ||
31 | return total_len; | ||
32 | } | ||
33 | |||
34 | static int | ||
35 | ext2_xattr_user_get(struct inode *inode, const char *name, | ||
36 | void *buffer, size_t size) | ||
37 | { | ||
38 | int error; | ||
39 | |||
40 | if (strcmp(name, "") == 0) | ||
41 | return -EINVAL; | ||
42 | if (!test_opt(inode->i_sb, XATTR_USER)) | ||
43 | return -EOPNOTSUPP; | ||
44 | error = permission(inode, MAY_READ, NULL); | ||
45 | if (error) | ||
46 | return error; | ||
47 | |||
48 | return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); | ||
49 | } | ||
50 | |||
51 | static int | ||
52 | ext2_xattr_user_set(struct inode *inode, const char *name, | ||
53 | const void *value, size_t size, int flags) | ||
54 | { | ||
55 | int error; | ||
56 | |||
57 | if (strcmp(name, "") == 0) | ||
58 | return -EINVAL; | ||
59 | if (!test_opt(inode->i_sb, XATTR_USER)) | ||
60 | return -EOPNOTSUPP; | ||
61 | if ( !S_ISREG(inode->i_mode) && | ||
62 | (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) | ||
63 | return -EPERM; | ||
64 | error = permission(inode, MAY_WRITE, NULL); | ||
65 | if (error) | ||
66 | return error; | ||
67 | |||
68 | return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, | ||
69 | value, size, flags); | ||
70 | } | ||
71 | |||
72 | struct xattr_handler ext2_xattr_user_handler = { | ||
73 | .prefix = XATTR_USER_PREFIX, | ||
74 | .list = ext2_xattr_user_list, | ||
75 | .get = ext2_xattr_user_get, | ||
76 | .set = ext2_xattr_user_set, | ||
77 | }; | ||