diff options
Diffstat (limited to 'fs')
275 files changed, 22122 insertions, 4432 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index cef8b18ceaa3..86b203fc3c56 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -66,6 +66,13 @@ config GENERIC_ACL | |||
66 | bool | 66 | bool |
67 | select FS_POSIX_ACL | 67 | select FS_POSIX_ACL |
68 | 68 | ||
69 | menu "Caches" | ||
70 | |||
71 | source "fs/fscache/Kconfig" | ||
72 | source "fs/cachefiles/Kconfig" | ||
73 | |||
74 | endmenu | ||
75 | |||
69 | if BLOCK | 76 | if BLOCK |
70 | menu "CD-ROM/DVD Filesystems" | 77 | menu "CD-ROM/DVD Filesystems" |
71 | 78 | ||
@@ -169,6 +176,8 @@ source "fs/romfs/Kconfig" | |||
169 | source "fs/sysv/Kconfig" | 176 | source "fs/sysv/Kconfig" |
170 | source "fs/ufs/Kconfig" | 177 | source "fs/ufs/Kconfig" |
171 | 178 | ||
179 | source "fs/exofs/Kconfig" | ||
180 | |||
172 | endif # MISC_FILESYSTEMS | 181 | endif # MISC_FILESYSTEMS |
173 | 182 | ||
174 | menuconfig NETWORK_FILESYSTEMS | 183 | menuconfig NETWORK_FILESYSTEMS |
diff --git a/fs/Makefile b/fs/Makefile index 6e82a307bcd4..70b2aed87133 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ | 13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ |
14 | stack.o | 14 | stack.o fs_struct.o |
15 | 15 | ||
16 | ifeq ($(CONFIG_BLOCK),y) | 16 | ifeq ($(CONFIG_BLOCK),y) |
17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o | 17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o |
@@ -63,6 +63,7 @@ obj-$(CONFIG_PROFILING) += dcookies.o | |||
63 | obj-$(CONFIG_DLM) += dlm/ | 63 | obj-$(CONFIG_DLM) += dlm/ |
64 | 64 | ||
65 | # Do not add any filesystems before this line | 65 | # Do not add any filesystems before this line |
66 | obj-$(CONFIG_FSCACHE) += fscache/ | ||
66 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 67 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
67 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 68 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
68 | obj-$(CONFIG_EXT2_FS) += ext2/ | 69 | obj-$(CONFIG_EXT2_FS) += ext2/ |
@@ -116,7 +117,9 @@ obj-$(CONFIG_AFS_FS) += afs/ | |||
116 | obj-$(CONFIG_BEFS_FS) += befs/ | 117 | obj-$(CONFIG_BEFS_FS) += befs/ |
117 | obj-$(CONFIG_HOSTFS) += hostfs/ | 118 | obj-$(CONFIG_HOSTFS) += hostfs/ |
118 | obj-$(CONFIG_HPPFS) += hppfs/ | 119 | obj-$(CONFIG_HPPFS) += hppfs/ |
120 | obj-$(CONFIG_CACHEFILES) += cachefiles/ | ||
119 | obj-$(CONFIG_DEBUG_FS) += debugfs/ | 121 | obj-$(CONFIG_DEBUG_FS) += debugfs/ |
120 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ | 122 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ |
121 | obj-$(CONFIG_BTRFS_FS) += btrfs/ | 123 | obj-$(CONFIG_BTRFS_FS) += btrfs/ |
122 | obj-$(CONFIG_GFS2_FS) += gfs2/ | 124 | obj-$(CONFIG_GFS2_FS) += gfs2/ |
125 | obj-$(CONFIG_EXOFS_FS) += exofs/ | ||
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 7f83a46f2b7e..dd9becca4241 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -219,16 +219,20 @@ static int adfs_remount(struct super_block *sb, int *flags, char *data) | |||
219 | 219 | ||
220 | static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 220 | static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
221 | { | 221 | { |
222 | struct adfs_sb_info *asb = ADFS_SB(dentry->d_sb); | 222 | struct super_block *sb = dentry->d_sb; |
223 | struct adfs_sb_info *sbi = ADFS_SB(sb); | ||
224 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
223 | 225 | ||
224 | buf->f_type = ADFS_SUPER_MAGIC; | 226 | buf->f_type = ADFS_SUPER_MAGIC; |
225 | buf->f_namelen = asb->s_namelen; | 227 | buf->f_namelen = sbi->s_namelen; |
226 | buf->f_bsize = dentry->d_sb->s_blocksize; | 228 | buf->f_bsize = sb->s_blocksize; |
227 | buf->f_blocks = asb->s_size; | 229 | buf->f_blocks = sbi->s_size; |
228 | buf->f_files = asb->s_ids_per_zone * asb->s_map_size; | 230 | buf->f_files = sbi->s_ids_per_zone * sbi->s_map_size; |
229 | buf->f_bavail = | 231 | buf->f_bavail = |
230 | buf->f_bfree = adfs_map_free(dentry->d_sb); | 232 | buf->f_bfree = adfs_map_free(sb); |
231 | buf->f_ffree = (long)(buf->f_bfree * buf->f_files) / (long)buf->f_blocks; | 233 | buf->f_ffree = (long)(buf->f_bfree * buf->f_files) / (long)buf->f_blocks; |
234 | buf->f_fsid.val[0] = (u32)id; | ||
235 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
232 | 236 | ||
233 | return 0; | 237 | return 0; |
234 | } | 238 | } |
diff --git a/fs/affs/super.c b/fs/affs/super.c index a19d64b582aa..5ce695e707fe 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -533,6 +533,7 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
533 | { | 533 | { |
534 | struct super_block *sb = dentry->d_sb; | 534 | struct super_block *sb = dentry->d_sb; |
535 | int free; | 535 | int free; |
536 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
536 | 537 | ||
537 | pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size, | 538 | pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size, |
538 | AFFS_SB(sb)->s_reserved); | 539 | AFFS_SB(sb)->s_reserved); |
@@ -543,6 +544,9 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
543 | buf->f_blocks = AFFS_SB(sb)->s_partition_size - AFFS_SB(sb)->s_reserved; | 544 | buf->f_blocks = AFFS_SB(sb)->s_partition_size - AFFS_SB(sb)->s_reserved; |
544 | buf->f_bfree = free; | 545 | buf->f_bfree = free; |
545 | buf->f_bavail = free; | 546 | buf->f_bavail = free; |
547 | buf->f_fsid.val[0] = (u32)id; | ||
548 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
549 | buf->f_namelen = 30; | ||
546 | return 0; | 550 | return 0; |
547 | } | 551 | } |
548 | 552 | ||
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index e7b522fe15e1..5c4e61d3c772 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig | |||
@@ -19,3 +19,11 @@ config AFS_DEBUG | |||
19 | See <file:Documentation/filesystems/afs.txt> for more information. | 19 | See <file:Documentation/filesystems/afs.txt> for more information. |
20 | 20 | ||
21 | If unsure, say N. | 21 | If unsure, say N. |
22 | |||
23 | config AFS_FSCACHE | ||
24 | bool "Provide AFS client caching support (EXPERIMENTAL)" | ||
25 | depends on EXPERIMENTAL | ||
26 | depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y | ||
27 | help | ||
28 | Say Y here if you want AFS data to be cached locally on disk through | ||
29 | the generic filesystem cache manager | ||
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index a66671082cfb..4f64b95d57bd 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile | |||
@@ -2,7 +2,10 @@ | |||
2 | # Makefile for Red Hat Linux AFS client. | 2 | # Makefile for Red Hat Linux AFS client. |
3 | # | 3 | # |
4 | 4 | ||
5 | afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o | ||
6 | |||
5 | kafs-objs := \ | 7 | kafs-objs := \ |
8 | $(afs-cache-y) \ | ||
6 | callback.o \ | 9 | callback.o \ |
7 | cell.o \ | 10 | cell.o \ |
8 | cmservice.o \ | 11 | cmservice.o \ |
diff --git a/fs/afs/cache.c b/fs/afs/cache.c index de0d7de69edc..e2b1d3f16519 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* AFS caching stuff | 1 | /* AFS caching stuff |
2 | * | 2 | * |
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
@@ -9,248 +9,395 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #ifdef AFS_CACHING_SUPPORT | 12 | #include <linux/slab.h> |
13 | static cachefs_match_val_t afs_cell_cache_match(void *target, | 13 | #include <linux/sched.h> |
14 | const void *entry); | 14 | #include "internal.h" |
15 | static void afs_cell_cache_update(void *source, void *entry); | 15 | |
16 | 16 | static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, | |
17 | struct cachefs_index_def afs_cache_cell_index_def = { | 17 | void *buffer, uint16_t buflen); |
18 | .name = "cell_ix", | 18 | static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, |
19 | .data_size = sizeof(struct afs_cache_cell), | 19 | void *buffer, uint16_t buflen); |
20 | .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, | 20 | static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, |
21 | .match = afs_cell_cache_match, | 21 | const void *buffer, |
22 | .update = afs_cell_cache_update, | 22 | uint16_t buflen); |
23 | |||
24 | static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, | ||
25 | void *buffer, uint16_t buflen); | ||
26 | static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, | ||
27 | void *buffer, uint16_t buflen); | ||
28 | static enum fscache_checkaux afs_vlocation_cache_check_aux( | ||
29 | void *cookie_netfs_data, const void *buffer, uint16_t buflen); | ||
30 | |||
31 | static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, | ||
32 | void *buffer, uint16_t buflen); | ||
33 | |||
34 | static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, | ||
35 | void *buffer, uint16_t buflen); | ||
36 | static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, | ||
37 | uint64_t *size); | ||
38 | static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, | ||
39 | void *buffer, uint16_t buflen); | ||
40 | static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, | ||
41 | const void *buffer, | ||
42 | uint16_t buflen); | ||
43 | static void afs_vnode_cache_now_uncached(void *cookie_netfs_data); | ||
44 | |||
45 | struct fscache_netfs afs_cache_netfs = { | ||
46 | .name = "afs", | ||
47 | .version = 0, | ||
48 | }; | ||
49 | |||
50 | struct fscache_cookie_def afs_cell_cache_index_def = { | ||
51 | .name = "AFS.cell", | ||
52 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
53 | .get_key = afs_cell_cache_get_key, | ||
54 | .get_aux = afs_cell_cache_get_aux, | ||
55 | .check_aux = afs_cell_cache_check_aux, | ||
56 | }; | ||
57 | |||
58 | struct fscache_cookie_def afs_vlocation_cache_index_def = { | ||
59 | .name = "AFS.vldb", | ||
60 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
61 | .get_key = afs_vlocation_cache_get_key, | ||
62 | .get_aux = afs_vlocation_cache_get_aux, | ||
63 | .check_aux = afs_vlocation_cache_check_aux, | ||
64 | }; | ||
65 | |||
66 | struct fscache_cookie_def afs_volume_cache_index_def = { | ||
67 | .name = "AFS.volume", | ||
68 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
69 | .get_key = afs_volume_cache_get_key, | ||
70 | }; | ||
71 | |||
72 | struct fscache_cookie_def afs_vnode_cache_index_def = { | ||
73 | .name = "AFS.vnode", | ||
74 | .type = FSCACHE_COOKIE_TYPE_DATAFILE, | ||
75 | .get_key = afs_vnode_cache_get_key, | ||
76 | .get_attr = afs_vnode_cache_get_attr, | ||
77 | .get_aux = afs_vnode_cache_get_aux, | ||
78 | .check_aux = afs_vnode_cache_check_aux, | ||
79 | .now_uncached = afs_vnode_cache_now_uncached, | ||
23 | }; | 80 | }; |
24 | #endif | ||
25 | 81 | ||
26 | /* | 82 | /* |
27 | * match a cell record obtained from the cache | 83 | * set the key for the index entry |
28 | */ | 84 | */ |
29 | #ifdef AFS_CACHING_SUPPORT | 85 | static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, |
30 | static cachefs_match_val_t afs_cell_cache_match(void *target, | 86 | void *buffer, uint16_t bufmax) |
31 | const void *entry) | ||
32 | { | 87 | { |
33 | const struct afs_cache_cell *ccell = entry; | 88 | const struct afs_cell *cell = cookie_netfs_data; |
34 | struct afs_cell *cell = target; | 89 | uint16_t klen; |
35 | 90 | ||
36 | _enter("{%s},{%s}", ccell->name, cell->name); | 91 | _enter("%p,%p,%u", cell, buffer, bufmax); |
37 | 92 | ||
38 | if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) { | 93 | klen = strlen(cell->name); |
39 | _leave(" = SUCCESS"); | 94 | if (klen > bufmax) |
40 | return CACHEFS_MATCH_SUCCESS; | 95 | return 0; |
41 | } | ||
42 | 96 | ||
43 | _leave(" = FAILED"); | 97 | memcpy(buffer, cell->name, klen); |
44 | return CACHEFS_MATCH_FAILED; | 98 | return klen; |
45 | } | 99 | } |
46 | #endif | ||
47 | 100 | ||
48 | /* | 101 | /* |
49 | * update a cell record in the cache | 102 | * provide new auxilliary cache data |
50 | */ | 103 | */ |
51 | #ifdef AFS_CACHING_SUPPORT | 104 | static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, |
52 | static void afs_cell_cache_update(void *source, void *entry) | 105 | void *buffer, uint16_t bufmax) |
53 | { | 106 | { |
54 | struct afs_cache_cell *ccell = entry; | 107 | const struct afs_cell *cell = cookie_netfs_data; |
55 | struct afs_cell *cell = source; | 108 | uint16_t dlen; |
56 | 109 | ||
57 | _enter("%p,%p", source, entry); | 110 | _enter("%p,%p,%u", cell, buffer, bufmax); |
58 | 111 | ||
59 | strncpy(ccell->name, cell->name, sizeof(ccell->name)); | 112 | dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]); |
113 | dlen = min(dlen, bufmax); | ||
114 | dlen &= ~(sizeof(cell->vl_addrs[0]) - 1); | ||
60 | 115 | ||
61 | memcpy(ccell->vl_servers, | 116 | memcpy(buffer, cell->vl_addrs, dlen); |
62 | cell->vl_addrs, | 117 | return dlen; |
63 | min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs))); | 118 | } |
64 | 119 | ||
120 | /* | ||
121 | * check that the auxilliary data indicates that the entry is still valid | ||
122 | */ | ||
123 | static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, | ||
124 | const void *buffer, | ||
125 | uint16_t buflen) | ||
126 | { | ||
127 | _leave(" = OKAY"); | ||
128 | return FSCACHE_CHECKAUX_OKAY; | ||
65 | } | 129 | } |
66 | #endif | ||
67 | |||
68 | #ifdef AFS_CACHING_SUPPORT | ||
69 | static cachefs_match_val_t afs_vlocation_cache_match(void *target, | ||
70 | const void *entry); | ||
71 | static void afs_vlocation_cache_update(void *source, void *entry); | ||
72 | |||
73 | struct cachefs_index_def afs_vlocation_cache_index_def = { | ||
74 | .name = "vldb", | ||
75 | .data_size = sizeof(struct afs_cache_vlocation), | ||
76 | .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, | ||
77 | .match = afs_vlocation_cache_match, | ||
78 | .update = afs_vlocation_cache_update, | ||
79 | }; | ||
80 | #endif | ||
81 | 130 | ||
131 | /*****************************************************************************/ | ||
82 | /* | 132 | /* |
83 | * match a VLDB record stored in the cache | 133 | * set the key for the index entry |
84 | * - may also load target from entry | ||
85 | */ | 134 | */ |
86 | #ifdef AFS_CACHING_SUPPORT | 135 | static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, |
87 | static cachefs_match_val_t afs_vlocation_cache_match(void *target, | 136 | void *buffer, uint16_t bufmax) |
88 | const void *entry) | ||
89 | { | 137 | { |
90 | const struct afs_cache_vlocation *vldb = entry; | 138 | const struct afs_vlocation *vlocation = cookie_netfs_data; |
91 | struct afs_vlocation *vlocation = target; | 139 | uint16_t klen; |
140 | |||
141 | _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); | ||
142 | |||
143 | klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name)); | ||
144 | if (klen > bufmax) | ||
145 | return 0; | ||
92 | 146 | ||
93 | _enter("{%s},{%s}", vlocation->vldb.name, vldb->name); | 147 | memcpy(buffer, vlocation->vldb.name, klen); |
94 | 148 | ||
95 | if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0 | 149 | _leave(" = %u", klen); |
96 | ) { | 150 | return klen; |
97 | if (!vlocation->valid || | 151 | } |
98 | vlocation->vldb.rtime == vldb->rtime | 152 | |
153 | /* | ||
154 | * provide new auxilliary cache data | ||
155 | */ | ||
156 | static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, | ||
157 | void *buffer, uint16_t bufmax) | ||
158 | { | ||
159 | const struct afs_vlocation *vlocation = cookie_netfs_data; | ||
160 | uint16_t dlen; | ||
161 | |||
162 | _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); | ||
163 | |||
164 | dlen = sizeof(struct afs_cache_vlocation); | ||
165 | dlen -= offsetof(struct afs_cache_vlocation, nservers); | ||
166 | if (dlen > bufmax) | ||
167 | return 0; | ||
168 | |||
169 | memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen); | ||
170 | |||
171 | _leave(" = %u", dlen); | ||
172 | return dlen; | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * check that the auxilliary data indicates that the entry is still valid | ||
177 | */ | ||
178 | static | ||
179 | enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, | ||
180 | const void *buffer, | ||
181 | uint16_t buflen) | ||
182 | { | ||
183 | const struct afs_cache_vlocation *cvldb; | ||
184 | struct afs_vlocation *vlocation = cookie_netfs_data; | ||
185 | uint16_t dlen; | ||
186 | |||
187 | _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen); | ||
188 | |||
189 | /* check the size of the data is what we're expecting */ | ||
190 | dlen = sizeof(struct afs_cache_vlocation); | ||
191 | dlen -= offsetof(struct afs_cache_vlocation, nservers); | ||
192 | if (dlen != buflen) | ||
193 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
194 | |||
195 | cvldb = container_of(buffer, struct afs_cache_vlocation, nservers); | ||
196 | |||
197 | /* if what's on disk is more valid than what's in memory, then use the | ||
198 | * VL record from the cache */ | ||
199 | if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) { | ||
200 | memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen); | ||
201 | vlocation->valid = 1; | ||
202 | _leave(" = SUCCESS [c->m]"); | ||
203 | return FSCACHE_CHECKAUX_OKAY; | ||
204 | } | ||
205 | |||
206 | /* need to update the cache if the cached info differs */ | ||
207 | if (memcmp(&vlocation->vldb, buffer, dlen) != 0) { | ||
208 | /* delete if the volume IDs for this name differ */ | ||
209 | if (memcmp(&vlocation->vldb.vid, &cvldb->vid, | ||
210 | sizeof(cvldb->vid)) != 0 | ||
99 | ) { | 211 | ) { |
100 | vlocation->vldb = *vldb; | 212 | _leave(" = OBSOLETE"); |
101 | vlocation->valid = 1; | 213 | return FSCACHE_CHECKAUX_OBSOLETE; |
102 | _leave(" = SUCCESS [c->m]"); | ||
103 | return CACHEFS_MATCH_SUCCESS; | ||
104 | } else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) { | ||
105 | /* delete if VIDs for this name differ */ | ||
106 | if (memcmp(&vlocation->vldb.vid, | ||
107 | &vldb->vid, | ||
108 | sizeof(vldb->vid)) != 0) { | ||
109 | _leave(" = DELETE"); | ||
110 | return CACHEFS_MATCH_SUCCESS_DELETE; | ||
111 | } | ||
112 | |||
113 | _leave(" = UPDATE"); | ||
114 | return CACHEFS_MATCH_SUCCESS_UPDATE; | ||
115 | } else { | ||
116 | _leave(" = SUCCESS"); | ||
117 | return CACHEFS_MATCH_SUCCESS; | ||
118 | } | 214 | } |
215 | |||
216 | _leave(" = UPDATE"); | ||
217 | return FSCACHE_CHECKAUX_NEEDS_UPDATE; | ||
119 | } | 218 | } |
120 | 219 | ||
121 | _leave(" = FAILED"); | 220 | _leave(" = OKAY"); |
122 | return CACHEFS_MATCH_FAILED; | 221 | return FSCACHE_CHECKAUX_OKAY; |
123 | } | 222 | } |
124 | #endif | ||
125 | 223 | ||
224 | /*****************************************************************************/ | ||
126 | /* | 225 | /* |
127 | * update a VLDB record stored in the cache | 226 | * set the key for the volume index entry |
128 | */ | 227 | */ |
129 | #ifdef AFS_CACHING_SUPPORT | 228 | static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, |
130 | static void afs_vlocation_cache_update(void *source, void *entry) | 229 | void *buffer, uint16_t bufmax) |
131 | { | 230 | { |
132 | struct afs_cache_vlocation *vldb = entry; | 231 | const struct afs_volume *volume = cookie_netfs_data; |
133 | struct afs_vlocation *vlocation = source; | 232 | uint16_t klen; |
233 | |||
234 | _enter("{%u},%p,%u", volume->type, buffer, bufmax); | ||
235 | |||
236 | klen = sizeof(volume->type); | ||
237 | if (klen > bufmax) | ||
238 | return 0; | ||
134 | 239 | ||
135 | _enter(""); | 240 | memcpy(buffer, &volume->type, sizeof(volume->type)); |
241 | |||
242 | _leave(" = %u", klen); | ||
243 | return klen; | ||
136 | 244 | ||
137 | *vldb = vlocation->vldb; | ||
138 | } | 245 | } |
139 | #endif | ||
140 | |||
141 | #ifdef AFS_CACHING_SUPPORT | ||
142 | static cachefs_match_val_t afs_volume_cache_match(void *target, | ||
143 | const void *entry); | ||
144 | static void afs_volume_cache_update(void *source, void *entry); | ||
145 | |||
146 | struct cachefs_index_def afs_volume_cache_index_def = { | ||
147 | .name = "volume", | ||
148 | .data_size = sizeof(struct afs_cache_vhash), | ||
149 | .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 }, | ||
150 | .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 }, | ||
151 | .match = afs_volume_cache_match, | ||
152 | .update = afs_volume_cache_update, | ||
153 | }; | ||
154 | #endif | ||
155 | 246 | ||
247 | /*****************************************************************************/ | ||
156 | /* | 248 | /* |
157 | * match a volume hash record stored in the cache | 249 | * set the key for the index entry |
158 | */ | 250 | */ |
159 | #ifdef AFS_CACHING_SUPPORT | 251 | static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, |
160 | static cachefs_match_val_t afs_volume_cache_match(void *target, | 252 | void *buffer, uint16_t bufmax) |
161 | const void *entry) | ||
162 | { | 253 | { |
163 | const struct afs_cache_vhash *vhash = entry; | 254 | const struct afs_vnode *vnode = cookie_netfs_data; |
164 | struct afs_volume *volume = target; | 255 | uint16_t klen; |
165 | 256 | ||
166 | _enter("{%u},{%u}", volume->type, vhash->vtype); | 257 | _enter("{%x,%x,%llx},%p,%u", |
258 | vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, | ||
259 | buffer, bufmax); | ||
167 | 260 | ||
168 | if (volume->type == vhash->vtype) { | 261 | klen = sizeof(vnode->fid.vnode); |
169 | _leave(" = SUCCESS"); | 262 | if (klen > bufmax) |
170 | return CACHEFS_MATCH_SUCCESS; | 263 | return 0; |
171 | } | 264 | |
265 | memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode)); | ||
172 | 266 | ||
173 | _leave(" = FAILED"); | 267 | _leave(" = %u", klen); |
174 | return CACHEFS_MATCH_FAILED; | 268 | return klen; |
175 | } | 269 | } |
176 | #endif | ||
177 | 270 | ||
178 | /* | 271 | /* |
179 | * update a volume hash record stored in the cache | 272 | * provide updated file attributes |
180 | */ | 273 | */ |
181 | #ifdef AFS_CACHING_SUPPORT | 274 | static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, |
182 | static void afs_volume_cache_update(void *source, void *entry) | 275 | uint64_t *size) |
183 | { | 276 | { |
184 | struct afs_cache_vhash *vhash = entry; | 277 | const struct afs_vnode *vnode = cookie_netfs_data; |
185 | struct afs_volume *volume = source; | ||
186 | 278 | ||
187 | _enter(""); | 279 | _enter("{%x,%x,%llx},", |
280 | vnode->fid.vnode, vnode->fid.unique, | ||
281 | vnode->status.data_version); | ||
188 | 282 | ||
189 | vhash->vtype = volume->type; | 283 | *size = vnode->status.size; |
190 | } | 284 | } |
191 | #endif | ||
192 | |||
193 | #ifdef AFS_CACHING_SUPPORT | ||
194 | static cachefs_match_val_t afs_vnode_cache_match(void *target, | ||
195 | const void *entry); | ||
196 | static void afs_vnode_cache_update(void *source, void *entry); | ||
197 | |||
198 | struct cachefs_index_def afs_vnode_cache_index_def = { | ||
199 | .name = "vnode", | ||
200 | .data_size = sizeof(struct afs_cache_vnode), | ||
201 | .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 }, | ||
202 | .match = afs_vnode_cache_match, | ||
203 | .update = afs_vnode_cache_update, | ||
204 | }; | ||
205 | #endif | ||
206 | 285 | ||
207 | /* | 286 | /* |
208 | * match a vnode record stored in the cache | 287 | * provide new auxilliary cache data |
288 | */ | ||
289 | static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, | ||
290 | void *buffer, uint16_t bufmax) | ||
291 | { | ||
292 | const struct afs_vnode *vnode = cookie_netfs_data; | ||
293 | uint16_t dlen; | ||
294 | |||
295 | _enter("{%x,%x,%Lx},%p,%u", | ||
296 | vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, | ||
297 | buffer, bufmax); | ||
298 | |||
299 | dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); | ||
300 | if (dlen > bufmax) | ||
301 | return 0; | ||
302 | |||
303 | memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique)); | ||
304 | buffer += sizeof(vnode->fid.unique); | ||
305 | memcpy(buffer, &vnode->status.data_version, | ||
306 | sizeof(vnode->status.data_version)); | ||
307 | |||
308 | _leave(" = %u", dlen); | ||
309 | return dlen; | ||
310 | } | ||
311 | |||
312 | /* | ||
313 | * check that the auxilliary data indicates that the entry is still valid | ||
209 | */ | 314 | */ |
210 | #ifdef AFS_CACHING_SUPPORT | 315 | static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, |
211 | static cachefs_match_val_t afs_vnode_cache_match(void *target, | 316 | const void *buffer, |
212 | const void *entry) | 317 | uint16_t buflen) |
213 | { | 318 | { |
214 | const struct afs_cache_vnode *cvnode = entry; | 319 | struct afs_vnode *vnode = cookie_netfs_data; |
215 | struct afs_vnode *vnode = target; | 320 | uint16_t dlen; |
216 | 321 | ||
217 | _enter("{%x,%x,%Lx},{%x,%x,%Lx}", | 322 | _enter("{%x,%x,%llx},%p,%u", |
218 | vnode->fid.vnode, | 323 | vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, |
219 | vnode->fid.unique, | 324 | buffer, buflen); |
220 | vnode->status.version, | 325 | |
221 | cvnode->vnode_id, | 326 | /* check the size of the data is what we're expecting */ |
222 | cvnode->vnode_unique, | 327 | dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); |
223 | cvnode->data_version); | 328 | if (dlen != buflen) { |
224 | 329 | _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen); | |
225 | if (vnode->fid.vnode != cvnode->vnode_id) { | 330 | return FSCACHE_CHECKAUX_OBSOLETE; |
226 | _leave(" = FAILED"); | ||
227 | return CACHEFS_MATCH_FAILED; | ||
228 | } | 331 | } |
229 | 332 | ||
230 | if (vnode->fid.unique != cvnode->vnode_unique || | 333 | if (memcmp(buffer, |
231 | vnode->status.version != cvnode->data_version) { | 334 | &vnode->fid.unique, |
232 | _leave(" = DELETE"); | 335 | sizeof(vnode->fid.unique) |
233 | return CACHEFS_MATCH_SUCCESS_DELETE; | 336 | ) != 0) { |
337 | unsigned unique; | ||
338 | |||
339 | memcpy(&unique, buffer, sizeof(unique)); | ||
340 | |||
341 | _leave(" = OBSOLETE [uniq %x != %x]", | ||
342 | unique, vnode->fid.unique); | ||
343 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
344 | } | ||
345 | |||
346 | if (memcmp(buffer + sizeof(vnode->fid.unique), | ||
347 | &vnode->status.data_version, | ||
348 | sizeof(vnode->status.data_version) | ||
349 | ) != 0) { | ||
350 | afs_dataversion_t version; | ||
351 | |||
352 | memcpy(&version, buffer + sizeof(vnode->fid.unique), | ||
353 | sizeof(version)); | ||
354 | |||
355 | _leave(" = OBSOLETE [vers %llx != %llx]", | ||
356 | version, vnode->status.data_version); | ||
357 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
234 | } | 358 | } |
235 | 359 | ||
236 | _leave(" = SUCCESS"); | 360 | _leave(" = SUCCESS"); |
237 | return CACHEFS_MATCH_SUCCESS; | 361 | return FSCACHE_CHECKAUX_OKAY; |
238 | } | 362 | } |
239 | #endif | ||
240 | 363 | ||
241 | /* | 364 | /* |
242 | * update a vnode record stored in the cache | 365 | * indication the cookie is no longer uncached |
366 | * - this function is called when the backing store currently caching a cookie | ||
367 | * is removed | ||
368 | * - the netfs should use this to clean up any markers indicating cached pages | ||
369 | * - this is mandatory for any object that may have data | ||
243 | */ | 370 | */ |
244 | #ifdef AFS_CACHING_SUPPORT | 371 | static void afs_vnode_cache_now_uncached(void *cookie_netfs_data) |
245 | static void afs_vnode_cache_update(void *source, void *entry) | ||
246 | { | 372 | { |
247 | struct afs_cache_vnode *cvnode = entry; | 373 | struct afs_vnode *vnode = cookie_netfs_data; |
248 | struct afs_vnode *vnode = source; | 374 | struct pagevec pvec; |
375 | pgoff_t first; | ||
376 | int loop, nr_pages; | ||
377 | |||
378 | _enter("{%x,%x,%Lx}", | ||
379 | vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version); | ||
380 | |||
381 | pagevec_init(&pvec, 0); | ||
382 | first = 0; | ||
383 | |||
384 | for (;;) { | ||
385 | /* grab a bunch of pages to clean */ | ||
386 | nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping, | ||
387 | first, | ||
388 | PAGEVEC_SIZE - pagevec_count(&pvec)); | ||
389 | if (!nr_pages) | ||
390 | break; | ||
249 | 391 | ||
250 | _enter(""); | 392 | for (loop = 0; loop < nr_pages; loop++) |
393 | ClearPageFsCache(pvec.pages[loop]); | ||
394 | |||
395 | first = pvec.pages[nr_pages - 1]->index + 1; | ||
396 | |||
397 | pvec.nr = nr_pages; | ||
398 | pagevec_release(&pvec); | ||
399 | cond_resched(); | ||
400 | } | ||
251 | 401 | ||
252 | cvnode->vnode_id = vnode->fid.vnode; | 402 | _leave(""); |
253 | cvnode->vnode_unique = vnode->fid.unique; | ||
254 | cvnode->data_version = vnode->status.version; | ||
255 | } | 403 | } |
256 | #endif | ||
diff --git a/fs/afs/cache.h b/fs/afs/cache.h index 36a3642cf90e..5c4f6b499e90 100644 --- a/fs/afs/cache.h +++ b/fs/afs/cache.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* AFS local cache management interface | 1 | /* AFS local cache management interface |
2 | * | 2 | * |
3 | * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
@@ -9,15 +9,4 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #ifndef AFS_CACHE_H | 12 | #include <linux/fscache.h> |
13 | #define AFS_CACHE_H | ||
14 | |||
15 | #undef AFS_CACHING_SUPPORT | ||
16 | |||
17 | #include <linux/mm.h> | ||
18 | #ifdef AFS_CACHING_SUPPORT | ||
19 | #include <linux/cachefs.h> | ||
20 | #endif | ||
21 | #include "types.h" | ||
22 | |||
23 | #endif /* AFS_CACHE_H */ | ||
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 5e1df14e16b1..e19c13f059ed 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c | |||
@@ -147,12 +147,11 @@ struct afs_cell *afs_cell_create(const char *name, char *vllist) | |||
147 | if (ret < 0) | 147 | if (ret < 0) |
148 | goto error; | 148 | goto error; |
149 | 149 | ||
150 | #ifdef AFS_CACHING_SUPPORT | 150 | #ifdef CONFIG_AFS_FSCACHE |
151 | /* put it up for caching */ | 151 | /* put it up for caching (this never returns an error) */ |
152 | cachefs_acquire_cookie(afs_cache_netfs.primary_index, | 152 | cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, |
153 | &afs_vlocation_cache_index_def, | 153 | &afs_cell_cache_index_def, |
154 | cell, | 154 | cell); |
155 | &cell->cache); | ||
156 | #endif | 155 | #endif |
157 | 156 | ||
158 | /* add to the cell lists */ | 157 | /* add to the cell lists */ |
@@ -362,10 +361,9 @@ static void afs_cell_destroy(struct afs_cell *cell) | |||
362 | list_del_init(&cell->proc_link); | 361 | list_del_init(&cell->proc_link); |
363 | up_write(&afs_proc_cells_sem); | 362 | up_write(&afs_proc_cells_sem); |
364 | 363 | ||
365 | #ifdef AFS_CACHING_SUPPORT | 364 | #ifdef CONFIG_AFS_FSCACHE |
366 | cachefs_relinquish_cookie(cell->cache, 0); | 365 | fscache_relinquish_cookie(cell->cache, 0); |
367 | #endif | 366 | #endif |
368 | |||
369 | key_put(cell->anonymous_key); | 367 | key_put(cell->anonymous_key); |
370 | kfree(cell); | 368 | kfree(cell); |
371 | 369 | ||
diff --git a/fs/afs/file.c b/fs/afs/file.c index a3901769a96c..7a1d942ef68d 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -23,6 +23,9 @@ static void afs_invalidatepage(struct page *page, unsigned long offset); | |||
23 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); | 23 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); |
24 | static int afs_launder_page(struct page *page); | 24 | static int afs_launder_page(struct page *page); |
25 | 25 | ||
26 | static int afs_readpages(struct file *filp, struct address_space *mapping, | ||
27 | struct list_head *pages, unsigned nr_pages); | ||
28 | |||
26 | const struct file_operations afs_file_operations = { | 29 | const struct file_operations afs_file_operations = { |
27 | .open = afs_open, | 30 | .open = afs_open, |
28 | .release = afs_release, | 31 | .release = afs_release, |
@@ -46,6 +49,7 @@ const struct inode_operations afs_file_inode_operations = { | |||
46 | 49 | ||
47 | const struct address_space_operations afs_fs_aops = { | 50 | const struct address_space_operations afs_fs_aops = { |
48 | .readpage = afs_readpage, | 51 | .readpage = afs_readpage, |
52 | .readpages = afs_readpages, | ||
49 | .set_page_dirty = afs_set_page_dirty, | 53 | .set_page_dirty = afs_set_page_dirty, |
50 | .launder_page = afs_launder_page, | 54 | .launder_page = afs_launder_page, |
51 | .releasepage = afs_releasepage, | 55 | .releasepage = afs_releasepage, |
@@ -101,37 +105,18 @@ int afs_release(struct inode *inode, struct file *file) | |||
101 | /* | 105 | /* |
102 | * deal with notification that a page was read from the cache | 106 | * deal with notification that a page was read from the cache |
103 | */ | 107 | */ |
104 | #ifdef AFS_CACHING_SUPPORT | 108 | static void afs_file_readpage_read_complete(struct page *page, |
105 | static void afs_readpage_read_complete(void *cookie_data, | 109 | void *data, |
106 | struct page *page, | 110 | int error) |
107 | void *data, | ||
108 | int error) | ||
109 | { | 111 | { |
110 | _enter("%p,%p,%p,%d", cookie_data, page, data, error); | 112 | _enter("%p,%p,%d", page, data, error); |
111 | 113 | ||
112 | if (error) | 114 | /* if the read completes with an error, we just unlock the page and let |
113 | SetPageError(page); | 115 | * the VM reissue the readpage */ |
114 | else | 116 | if (!error) |
115 | SetPageUptodate(page); | 117 | SetPageUptodate(page); |
116 | unlock_page(page); | 118 | unlock_page(page); |
117 | |||
118 | } | 119 | } |
119 | #endif | ||
120 | |||
121 | /* | ||
122 | * deal with notification that a page was written to the cache | ||
123 | */ | ||
124 | #ifdef AFS_CACHING_SUPPORT | ||
125 | static void afs_readpage_write_complete(void *cookie_data, | ||
126 | struct page *page, | ||
127 | void *data, | ||
128 | int error) | ||
129 | { | ||
130 | _enter("%p,%p,%p,%d", cookie_data, page, data, error); | ||
131 | |||
132 | unlock_page(page); | ||
133 | } | ||
134 | #endif | ||
135 | 120 | ||
136 | /* | 121 | /* |
137 | * AFS read page from file, directory or symlink | 122 | * AFS read page from file, directory or symlink |
@@ -161,9 +146,9 @@ static int afs_readpage(struct file *file, struct page *page) | |||
161 | if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) | 146 | if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) |
162 | goto error; | 147 | goto error; |
163 | 148 | ||
164 | #ifdef AFS_CACHING_SUPPORT | ||
165 | /* is it cached? */ | 149 | /* is it cached? */ |
166 | ret = cachefs_read_or_alloc_page(vnode->cache, | 150 | #ifdef CONFIG_AFS_FSCACHE |
151 | ret = fscache_read_or_alloc_page(vnode->cache, | ||
167 | page, | 152 | page, |
168 | afs_file_readpage_read_complete, | 153 | afs_file_readpage_read_complete, |
169 | NULL, | 154 | NULL, |
@@ -171,20 +156,21 @@ static int afs_readpage(struct file *file, struct page *page) | |||
171 | #else | 156 | #else |
172 | ret = -ENOBUFS; | 157 | ret = -ENOBUFS; |
173 | #endif | 158 | #endif |
174 | |||
175 | switch (ret) { | 159 | switch (ret) { |
176 | /* read BIO submitted and wb-journal entry found */ | ||
177 | case 1: | ||
178 | BUG(); // TODO - handle wb-journal match | ||
179 | |||
180 | /* read BIO submitted (page in cache) */ | 160 | /* read BIO submitted (page in cache) */ |
181 | case 0: | 161 | case 0: |
182 | break; | 162 | break; |
183 | 163 | ||
184 | /* no page available in cache */ | 164 | /* page not yet cached */ |
185 | case -ENOBUFS: | ||
186 | case -ENODATA: | 165 | case -ENODATA: |
166 | _debug("cache said ENODATA"); | ||
167 | goto go_on; | ||
168 | |||
169 | /* page will not be cached */ | ||
170 | case -ENOBUFS: | ||
171 | _debug("cache said ENOBUFS"); | ||
187 | default: | 172 | default: |
173 | go_on: | ||
188 | offset = page->index << PAGE_CACHE_SHIFT; | 174 | offset = page->index << PAGE_CACHE_SHIFT; |
189 | len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE); | 175 | len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE); |
190 | 176 | ||
@@ -198,27 +184,25 @@ static int afs_readpage(struct file *file, struct page *page) | |||
198 | set_bit(AFS_VNODE_DELETED, &vnode->flags); | 184 | set_bit(AFS_VNODE_DELETED, &vnode->flags); |
199 | ret = -ESTALE; | 185 | ret = -ESTALE; |
200 | } | 186 | } |
201 | #ifdef AFS_CACHING_SUPPORT | 187 | |
202 | cachefs_uncache_page(vnode->cache, page); | 188 | #ifdef CONFIG_AFS_FSCACHE |
189 | fscache_uncache_page(vnode->cache, page); | ||
203 | #endif | 190 | #endif |
191 | BUG_ON(PageFsCache(page)); | ||
204 | goto error; | 192 | goto error; |
205 | } | 193 | } |
206 | 194 | ||
207 | SetPageUptodate(page); | 195 | SetPageUptodate(page); |
208 | 196 | ||
209 | #ifdef AFS_CACHING_SUPPORT | 197 | /* send the page to the cache */ |
210 | if (cachefs_write_page(vnode->cache, | 198 | #ifdef CONFIG_AFS_FSCACHE |
211 | page, | 199 | if (PageFsCache(page) && |
212 | afs_file_readpage_write_complete, | 200 | fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) { |
213 | NULL, | 201 | fscache_uncache_page(vnode->cache, page); |
214 | GFP_KERNEL) != 0 | 202 | BUG_ON(PageFsCache(page)); |
215 | ) { | ||
216 | cachefs_uncache_page(vnode->cache, page); | ||
217 | unlock_page(page); | ||
218 | } | 203 | } |
219 | #else | ||
220 | unlock_page(page); | ||
221 | #endif | 204 | #endif |
205 | unlock_page(page); | ||
222 | } | 206 | } |
223 | 207 | ||
224 | _leave(" = 0"); | 208 | _leave(" = 0"); |
@@ -232,34 +216,59 @@ error: | |||
232 | } | 216 | } |
233 | 217 | ||
234 | /* | 218 | /* |
235 | * invalidate part or all of a page | 219 | * read a set of pages |
236 | */ | 220 | */ |
237 | static void afs_invalidatepage(struct page *page, unsigned long offset) | 221 | static int afs_readpages(struct file *file, struct address_space *mapping, |
222 | struct list_head *pages, unsigned nr_pages) | ||
238 | { | 223 | { |
239 | int ret = 1; | 224 | struct afs_vnode *vnode; |
225 | int ret = 0; | ||
240 | 226 | ||
241 | _enter("{%lu},%lu", page->index, offset); | 227 | _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages); |
242 | 228 | ||
243 | BUG_ON(!PageLocked(page)); | 229 | vnode = AFS_FS_I(mapping->host); |
230 | if (vnode->flags & AFS_VNODE_DELETED) { | ||
231 | _leave(" = -ESTALE"); | ||
232 | return -ESTALE; | ||
233 | } | ||
244 | 234 | ||
245 | if (PagePrivate(page)) { | 235 | /* attempt to read as many of the pages as possible */ |
246 | /* We release buffers only if the entire page is being | 236 | #ifdef CONFIG_AFS_FSCACHE |
247 | * invalidated. | 237 | ret = fscache_read_or_alloc_pages(vnode->cache, |
248 | * The get_block cached value has been unconditionally | 238 | mapping, |
249 | * invalidated, so real IO is not possible anymore. | 239 | pages, |
250 | */ | 240 | &nr_pages, |
251 | if (offset == 0) { | 241 | afs_file_readpage_read_complete, |
252 | BUG_ON(!PageLocked(page)); | 242 | NULL, |
253 | 243 | mapping_gfp_mask(mapping)); | |
254 | ret = 0; | 244 | #else |
255 | if (!PageWriteback(page)) | 245 | ret = -ENOBUFS; |
256 | ret = page->mapping->a_ops->releasepage(page, | 246 | #endif |
257 | 0); | 247 | |
258 | /* possibly should BUG_ON(!ret); - neilb */ | 248 | switch (ret) { |
259 | } | 249 | /* all pages are being read from the cache */ |
250 | case 0: | ||
251 | BUG_ON(!list_empty(pages)); | ||
252 | BUG_ON(nr_pages != 0); | ||
253 | _leave(" = 0 [reading all]"); | ||
254 | return 0; | ||
255 | |||
256 | /* there were pages that couldn't be read from the cache */ | ||
257 | case -ENODATA: | ||
258 | case -ENOBUFS: | ||
259 | break; | ||
260 | |||
261 | /* other error */ | ||
262 | default: | ||
263 | _leave(" = %d", ret); | ||
264 | return ret; | ||
260 | } | 265 | } |
261 | 266 | ||
262 | _leave(" = %d", ret); | 267 | /* load the missing pages from the network */ |
268 | ret = read_cache_pages(mapping, pages, (void *) afs_readpage, file); | ||
269 | |||
270 | _leave(" = %d [netting]", ret); | ||
271 | return ret; | ||
263 | } | 272 | } |
264 | 273 | ||
265 | /* | 274 | /* |
@@ -273,25 +282,82 @@ static int afs_launder_page(struct page *page) | |||
273 | } | 282 | } |
274 | 283 | ||
275 | /* | 284 | /* |
276 | * release a page and cleanup its private data | 285 | * invalidate part or all of a page |
286 | * - release a page and clean up its private data if offset is 0 (indicating | ||
287 | * the entire page) | ||
288 | */ | ||
289 | static void afs_invalidatepage(struct page *page, unsigned long offset) | ||
290 | { | ||
291 | struct afs_writeback *wb = (struct afs_writeback *) page_private(page); | ||
292 | |||
293 | _enter("{%lu},%lu", page->index, offset); | ||
294 | |||
295 | BUG_ON(!PageLocked(page)); | ||
296 | |||
297 | /* we clean up only if the entire page is being invalidated */ | ||
298 | if (offset == 0) { | ||
299 | #ifdef CONFIG_AFS_FSCACHE | ||
300 | if (PageFsCache(page)) { | ||
301 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); | ||
302 | fscache_wait_on_page_write(vnode->cache, page); | ||
303 | fscache_uncache_page(vnode->cache, page); | ||
304 | ClearPageFsCache(page); | ||
305 | } | ||
306 | #endif | ||
307 | |||
308 | if (PagePrivate(page)) { | ||
309 | if (wb && !PageWriteback(page)) { | ||
310 | set_page_private(page, 0); | ||
311 | afs_put_writeback(wb); | ||
312 | } | ||
313 | |||
314 | if (!page_private(page)) | ||
315 | ClearPagePrivate(page); | ||
316 | } | ||
317 | } | ||
318 | |||
319 | _leave(""); | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * release a page and clean up its private state if it's not busy | ||
324 | * - return true if the page can now be released, false if not | ||
277 | */ | 325 | */ |
278 | static int afs_releasepage(struct page *page, gfp_t gfp_flags) | 326 | static int afs_releasepage(struct page *page, gfp_t gfp_flags) |
279 | { | 327 | { |
328 | struct afs_writeback *wb = (struct afs_writeback *) page_private(page); | ||
280 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); | 329 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); |
281 | struct afs_writeback *wb; | ||
282 | 330 | ||
283 | _enter("{{%x:%u}[%lu],%lx},%x", | 331 | _enter("{{%x:%u}[%lu],%lx},%x", |
284 | vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, | 332 | vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, |
285 | gfp_flags); | 333 | gfp_flags); |
286 | 334 | ||
335 | /* deny if page is being written to the cache and the caller hasn't | ||
336 | * elected to wait */ | ||
337 | #ifdef CONFIG_AFS_FSCACHE | ||
338 | if (PageFsCache(page)) { | ||
339 | if (fscache_check_page_write(vnode->cache, page)) { | ||
340 | if (!(gfp_flags & __GFP_WAIT)) { | ||
341 | _leave(" = F [cache busy]"); | ||
342 | return 0; | ||
343 | } | ||
344 | fscache_wait_on_page_write(vnode->cache, page); | ||
345 | } | ||
346 | |||
347 | fscache_uncache_page(vnode->cache, page); | ||
348 | ClearPageFsCache(page); | ||
349 | } | ||
350 | #endif | ||
351 | |||
287 | if (PagePrivate(page)) { | 352 | if (PagePrivate(page)) { |
288 | wb = (struct afs_writeback *) page_private(page); | 353 | if (wb) { |
289 | ASSERT(wb != NULL); | 354 | set_page_private(page, 0); |
290 | set_page_private(page, 0); | 355 | afs_put_writeback(wb); |
356 | } | ||
291 | ClearPagePrivate(page); | 357 | ClearPagePrivate(page); |
292 | afs_put_writeback(wb); | ||
293 | } | 358 | } |
294 | 359 | ||
295 | _leave(" = 0"); | 360 | /* indicate that the page can be released */ |
296 | return 0; | 361 | _leave(" = T"); |
362 | return 1; | ||
297 | } | 363 | } |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index bb47217f6a18..c048f0658751 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -61,6 +61,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) | |||
61 | return -EBADMSG; | 61 | return -EBADMSG; |
62 | } | 62 | } |
63 | 63 | ||
64 | #ifdef CONFIG_AFS_FSCACHE | ||
65 | if (vnode->status.size != inode->i_size) | ||
66 | fscache_attr_changed(vnode->cache); | ||
67 | #endif | ||
68 | |||
64 | inode->i_nlink = vnode->status.nlink; | 69 | inode->i_nlink = vnode->status.nlink; |
65 | inode->i_uid = vnode->status.owner; | 70 | inode->i_uid = vnode->status.owner; |
66 | inode->i_gid = 0; | 71 | inode->i_gid = 0; |
@@ -149,15 +154,6 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, | |||
149 | return inode; | 154 | return inode; |
150 | } | 155 | } |
151 | 156 | ||
152 | #ifdef AFS_CACHING_SUPPORT | ||
153 | /* set up caching before reading the status, as fetch-status reads the | ||
154 | * first page of symlinks to see if they're really mntpts */ | ||
155 | cachefs_acquire_cookie(vnode->volume->cache, | ||
156 | NULL, | ||
157 | vnode, | ||
158 | &vnode->cache); | ||
159 | #endif | ||
160 | |||
161 | if (!status) { | 157 | if (!status) { |
162 | /* it's a remotely extant inode */ | 158 | /* it's a remotely extant inode */ |
163 | set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); | 159 | set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); |
@@ -183,6 +179,15 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, | |||
183 | } | 179 | } |
184 | } | 180 | } |
185 | 181 | ||
182 | /* set up caching before mapping the status, as map-status reads the | ||
183 | * first page of symlinks to see if they're really mountpoints */ | ||
184 | inode->i_size = vnode->status.size; | ||
185 | #ifdef CONFIG_AFS_FSCACHE | ||
186 | vnode->cache = fscache_acquire_cookie(vnode->volume->cache, | ||
187 | &afs_vnode_cache_index_def, | ||
188 | vnode); | ||
189 | #endif | ||
190 | |||
186 | ret = afs_inode_map_status(vnode, key); | 191 | ret = afs_inode_map_status(vnode, key); |
187 | if (ret < 0) | 192 | if (ret < 0) |
188 | goto bad_inode; | 193 | goto bad_inode; |
@@ -196,6 +201,10 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, | |||
196 | 201 | ||
197 | /* failure */ | 202 | /* failure */ |
198 | bad_inode: | 203 | bad_inode: |
204 | #ifdef CONFIG_AFS_FSCACHE | ||
205 | fscache_relinquish_cookie(vnode->cache, 0); | ||
206 | vnode->cache = NULL; | ||
207 | #endif | ||
199 | iget_failed(inode); | 208 | iget_failed(inode); |
200 | _leave(" = %d [bad]", ret); | 209 | _leave(" = %d [bad]", ret); |
201 | return ERR_PTR(ret); | 210 | return ERR_PTR(ret); |
@@ -340,8 +349,8 @@ void afs_clear_inode(struct inode *inode) | |||
340 | ASSERT(list_empty(&vnode->writebacks)); | 349 | ASSERT(list_empty(&vnode->writebacks)); |
341 | ASSERT(!vnode->cb_promised); | 350 | ASSERT(!vnode->cb_promised); |
342 | 351 | ||
343 | #ifdef AFS_CACHING_SUPPORT | 352 | #ifdef CONFIG_AFS_FSCACHE |
344 | cachefs_relinquish_cookie(vnode->cache, 0); | 353 | fscache_relinquish_cookie(vnode->cache, 0); |
345 | vnode->cache = NULL; | 354 | vnode->cache = NULL; |
346 | #endif | 355 | #endif |
347 | 356 | ||
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 67f259d99cd6..106be66dafd2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include "afs.h" | 22 | #include "afs.h" |
23 | #include "afs_vl.h" | 23 | #include "afs_vl.h" |
24 | #include "cache.h" | ||
24 | 25 | ||
25 | #define AFS_CELL_MAX_ADDRS 15 | 26 | #define AFS_CELL_MAX_ADDRS 15 |
26 | 27 | ||
@@ -193,8 +194,8 @@ struct afs_cell { | |||
193 | struct key *anonymous_key; /* anonymous user key for this cell */ | 194 | struct key *anonymous_key; /* anonymous user key for this cell */ |
194 | struct list_head proc_link; /* /proc cell list link */ | 195 | struct list_head proc_link; /* /proc cell list link */ |
195 | struct proc_dir_entry *proc_dir; /* /proc dir for this cell */ | 196 | struct proc_dir_entry *proc_dir; /* /proc dir for this cell */ |
196 | #ifdef AFS_CACHING_SUPPORT | 197 | #ifdef CONFIG_AFS_FSCACHE |
197 | struct cachefs_cookie *cache; /* caching cookie */ | 198 | struct fscache_cookie *cache; /* caching cookie */ |
198 | #endif | 199 | #endif |
199 | 200 | ||
200 | /* server record management */ | 201 | /* server record management */ |
@@ -249,8 +250,8 @@ struct afs_vlocation { | |||
249 | struct list_head grave; /* link in master graveyard list */ | 250 | struct list_head grave; /* link in master graveyard list */ |
250 | struct list_head update; /* link in master update list */ | 251 | struct list_head update; /* link in master update list */ |
251 | struct afs_cell *cell; /* cell to which volume belongs */ | 252 | struct afs_cell *cell; /* cell to which volume belongs */ |
252 | #ifdef AFS_CACHING_SUPPORT | 253 | #ifdef CONFIG_AFS_FSCACHE |
253 | struct cachefs_cookie *cache; /* caching cookie */ | 254 | struct fscache_cookie *cache; /* caching cookie */ |
254 | #endif | 255 | #endif |
255 | struct afs_cache_vlocation vldb; /* volume information DB record */ | 256 | struct afs_cache_vlocation vldb; /* volume information DB record */ |
256 | struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ | 257 | struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ |
@@ -302,8 +303,8 @@ struct afs_volume { | |||
302 | atomic_t usage; | 303 | atomic_t usage; |
303 | struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ | 304 | struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ |
304 | struct afs_vlocation *vlocation; /* volume location */ | 305 | struct afs_vlocation *vlocation; /* volume location */ |
305 | #ifdef AFS_CACHING_SUPPORT | 306 | #ifdef CONFIG_AFS_FSCACHE |
306 | struct cachefs_cookie *cache; /* caching cookie */ | 307 | struct fscache_cookie *cache; /* caching cookie */ |
307 | #endif | 308 | #endif |
308 | afs_volid_t vid; /* volume ID */ | 309 | afs_volid_t vid; /* volume ID */ |
309 | afs_voltype_t type; /* type of volume */ | 310 | afs_voltype_t type; /* type of volume */ |
@@ -333,8 +334,8 @@ struct afs_vnode { | |||
333 | struct afs_server *server; /* server currently supplying this file */ | 334 | struct afs_server *server; /* server currently supplying this file */ |
334 | struct afs_fid fid; /* the file identifier for this inode */ | 335 | struct afs_fid fid; /* the file identifier for this inode */ |
335 | struct afs_file_status status; /* AFS status info for this file */ | 336 | struct afs_file_status status; /* AFS status info for this file */ |
336 | #ifdef AFS_CACHING_SUPPORT | 337 | #ifdef CONFIG_AFS_FSCACHE |
337 | struct cachefs_cookie *cache; /* caching cookie */ | 338 | struct fscache_cookie *cache; /* caching cookie */ |
338 | #endif | 339 | #endif |
339 | struct afs_permits *permits; /* cache of permits so far obtained */ | 340 | struct afs_permits *permits; /* cache of permits so far obtained */ |
340 | struct mutex permits_lock; /* lock for altering permits list */ | 341 | struct mutex permits_lock; /* lock for altering permits list */ |
@@ -428,6 +429,22 @@ struct afs_uuid { | |||
428 | 429 | ||
429 | /*****************************************************************************/ | 430 | /*****************************************************************************/ |
430 | /* | 431 | /* |
432 | * cache.c | ||
433 | */ | ||
434 | #ifdef CONFIG_AFS_FSCACHE | ||
435 | extern struct fscache_netfs afs_cache_netfs; | ||
436 | extern struct fscache_cookie_def afs_cell_cache_index_def; | ||
437 | extern struct fscache_cookie_def afs_vlocation_cache_index_def; | ||
438 | extern struct fscache_cookie_def afs_volume_cache_index_def; | ||
439 | extern struct fscache_cookie_def afs_vnode_cache_index_def; | ||
440 | #else | ||
441 | #define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL) | ||
442 | #define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL) | ||
443 | #define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL) | ||
444 | #define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL) | ||
445 | #endif | ||
446 | |||
447 | /* | ||
431 | * callback.c | 448 | * callback.c |
432 | */ | 449 | */ |
433 | extern void afs_init_callback_state(struct afs_server *); | 450 | extern void afs_init_callback_state(struct afs_server *); |
@@ -446,9 +463,6 @@ extern void afs_callback_update_kill(void); | |||
446 | */ | 463 | */ |
447 | extern struct rw_semaphore afs_proc_cells_sem; | 464 | extern struct rw_semaphore afs_proc_cells_sem; |
448 | extern struct list_head afs_proc_cells; | 465 | extern struct list_head afs_proc_cells; |
449 | #ifdef AFS_CACHING_SUPPORT | ||
450 | extern struct cachefs_index_def afs_cache_cell_index_def; | ||
451 | #endif | ||
452 | 466 | ||
453 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) | 467 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) |
454 | extern int afs_cell_init(char *); | 468 | extern int afs_cell_init(char *); |
@@ -554,9 +568,6 @@ extern void afs_clear_inode(struct inode *); | |||
554 | * main.c | 568 | * main.c |
555 | */ | 569 | */ |
556 | extern struct afs_uuid afs_uuid; | 570 | extern struct afs_uuid afs_uuid; |
557 | #ifdef AFS_CACHING_SUPPORT | ||
558 | extern struct cachefs_netfs afs_cache_netfs; | ||
559 | #endif | ||
560 | 571 | ||
561 | /* | 572 | /* |
562 | * misc.c | 573 | * misc.c |
@@ -637,10 +648,6 @@ extern int afs_get_MAC_address(u8 *, size_t); | |||
637 | /* | 648 | /* |
638 | * vlclient.c | 649 | * vlclient.c |
639 | */ | 650 | */ |
640 | #ifdef AFS_CACHING_SUPPORT | ||
641 | extern struct cachefs_index_def afs_vlocation_cache_index_def; | ||
642 | #endif | ||
643 | |||
644 | extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, | 651 | extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, |
645 | const char *, struct afs_cache_vlocation *, | 652 | const char *, struct afs_cache_vlocation *, |
646 | const struct afs_wait_mode *); | 653 | const struct afs_wait_mode *); |
@@ -664,12 +671,6 @@ extern void afs_vlocation_purge(void); | |||
664 | /* | 671 | /* |
665 | * vnode.c | 672 | * vnode.c |
666 | */ | 673 | */ |
667 | #ifdef AFS_CACHING_SUPPORT | ||
668 | extern struct cachefs_index_def afs_vnode_cache_index_def; | ||
669 | #endif | ||
670 | |||
671 | extern struct afs_timer_ops afs_vnode_cb_timed_out_ops; | ||
672 | |||
673 | static inline struct afs_vnode *AFS_FS_I(struct inode *inode) | 674 | static inline struct afs_vnode *AFS_FS_I(struct inode *inode) |
674 | { | 675 | { |
675 | return container_of(inode, struct afs_vnode, vfs_inode); | 676 | return container_of(inode, struct afs_vnode, vfs_inode); |
@@ -711,10 +712,6 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); | |||
711 | /* | 712 | /* |
712 | * volume.c | 713 | * volume.c |
713 | */ | 714 | */ |
714 | #ifdef AFS_CACHING_SUPPORT | ||
715 | extern struct cachefs_index_def afs_volume_cache_index_def; | ||
716 | #endif | ||
717 | |||
718 | #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) | 715 | #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) |
719 | 716 | ||
720 | extern void afs_put_volume(struct afs_volume *); | 717 | extern void afs_put_volume(struct afs_volume *); |
diff --git a/fs/afs/main.c b/fs/afs/main.c index 2d3e5d4fb9f7..66d54d348c55 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* AFS client file system | 1 | /* AFS client file system |
2 | * | 2 | * |
3 | * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
@@ -29,18 +29,6 @@ static char *rootcell; | |||
29 | module_param(rootcell, charp, 0); | 29 | module_param(rootcell, charp, 0); |
30 | MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); | 30 | MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); |
31 | 31 | ||
32 | #ifdef AFS_CACHING_SUPPORT | ||
33 | static struct cachefs_netfs_operations afs_cache_ops = { | ||
34 | .get_page_cookie = afs_cache_get_page_cookie, | ||
35 | }; | ||
36 | |||
37 | struct cachefs_netfs afs_cache_netfs = { | ||
38 | .name = "afs", | ||
39 | .version = 0, | ||
40 | .ops = &afs_cache_ops, | ||
41 | }; | ||
42 | #endif | ||
43 | |||
44 | struct afs_uuid afs_uuid; | 32 | struct afs_uuid afs_uuid; |
45 | 33 | ||
46 | /* | 34 | /* |
@@ -104,10 +92,9 @@ static int __init afs_init(void) | |||
104 | if (ret < 0) | 92 | if (ret < 0) |
105 | return ret; | 93 | return ret; |
106 | 94 | ||
107 | #ifdef AFS_CACHING_SUPPORT | 95 | #ifdef CONFIG_AFS_FSCACHE |
108 | /* we want to be able to cache */ | 96 | /* we want to be able to cache */ |
109 | ret = cachefs_register_netfs(&afs_cache_netfs, | 97 | ret = fscache_register_netfs(&afs_cache_netfs); |
110 | &afs_cache_cell_index_def); | ||
111 | if (ret < 0) | 98 | if (ret < 0) |
112 | goto error_cache; | 99 | goto error_cache; |
113 | #endif | 100 | #endif |
@@ -142,8 +129,8 @@ error_fs: | |||
142 | error_open_socket: | 129 | error_open_socket: |
143 | error_vl_update_init: | 130 | error_vl_update_init: |
144 | error_cell_init: | 131 | error_cell_init: |
145 | #ifdef AFS_CACHING_SUPPORT | 132 | #ifdef CONFIG_AFS_FSCACHE |
146 | cachefs_unregister_netfs(&afs_cache_netfs); | 133 | fscache_unregister_netfs(&afs_cache_netfs); |
147 | error_cache: | 134 | error_cache: |
148 | #endif | 135 | #endif |
149 | afs_callback_update_kill(); | 136 | afs_callback_update_kill(); |
@@ -175,8 +162,8 @@ static void __exit afs_exit(void) | |||
175 | afs_vlocation_purge(); | 162 | afs_vlocation_purge(); |
176 | flush_scheduled_work(); | 163 | flush_scheduled_work(); |
177 | afs_cell_purge(); | 164 | afs_cell_purge(); |
178 | #ifdef AFS_CACHING_SUPPORT | 165 | #ifdef CONFIG_AFS_FSCACHE |
179 | cachefs_unregister_netfs(&afs_cache_netfs); | 166 | fscache_unregister_netfs(&afs_cache_netfs); |
180 | #endif | 167 | #endif |
181 | afs_proc_cleanup(); | 168 | afs_proc_cleanup(); |
182 | rcu_barrier(); | 169 | rcu_barrier(); |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 78db4953a800..2b9e2d03a390 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -173,9 +173,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
173 | if (PageError(page)) | 173 | if (PageError(page)) |
174 | goto error; | 174 | goto error; |
175 | 175 | ||
176 | buf = kmap(page); | 176 | buf = kmap_atomic(page, KM_USER0); |
177 | memcpy(devname, buf, size); | 177 | memcpy(devname, buf, size); |
178 | kunmap(page); | 178 | kunmap_atomic(buf, KM_USER0); |
179 | page_cache_release(page); | 179 | page_cache_release(page); |
180 | page = NULL; | 180 | page = NULL; |
181 | 181 | ||
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 849fc3160cb5..ec2a7431e458 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c | |||
@@ -281,9 +281,8 @@ static void afs_vlocation_apply_update(struct afs_vlocation *vl, | |||
281 | 281 | ||
282 | vl->vldb = *vldb; | 282 | vl->vldb = *vldb; |
283 | 283 | ||
284 | #ifdef AFS_CACHING_SUPPORT | 284 | #ifdef CONFIG_AFS_FSCACHE |
285 | /* update volume entry in local cache */ | 285 | fscache_update_cookie(vl->cache); |
286 | cachefs_update_cookie(vl->cache); | ||
287 | #endif | 286 | #endif |
288 | } | 287 | } |
289 | 288 | ||
@@ -304,11 +303,9 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, | |||
304 | memset(&vldb, 0, sizeof(vldb)); | 303 | memset(&vldb, 0, sizeof(vldb)); |
305 | 304 | ||
306 | /* see if we have an in-cache copy (will set vl->valid if there is) */ | 305 | /* see if we have an in-cache copy (will set vl->valid if there is) */ |
307 | #ifdef AFS_CACHING_SUPPORT | 306 | #ifdef CONFIG_AFS_FSCACHE |
308 | cachefs_acquire_cookie(cell->cache, | 307 | vl->cache = fscache_acquire_cookie(vl->cell->cache, |
309 | &afs_volume_cache_index_def, | 308 | &afs_vlocation_cache_index_def, vl); |
310 | vlocation, | ||
311 | &vl->cache); | ||
312 | #endif | 309 | #endif |
313 | 310 | ||
314 | if (vl->valid) { | 311 | if (vl->valid) { |
@@ -420,6 +417,11 @@ fill_in_record: | |||
420 | spin_unlock(&vl->lock); | 417 | spin_unlock(&vl->lock); |
421 | wake_up(&vl->waitq); | 418 | wake_up(&vl->waitq); |
422 | 419 | ||
420 | /* update volume entry in local cache */ | ||
421 | #ifdef CONFIG_AFS_FSCACHE | ||
422 | fscache_update_cookie(vl->cache); | ||
423 | #endif | ||
424 | |||
423 | /* schedule for regular updates */ | 425 | /* schedule for regular updates */ |
424 | afs_vlocation_queue_for_updates(vl); | 426 | afs_vlocation_queue_for_updates(vl); |
425 | goto success; | 427 | goto success; |
@@ -465,7 +467,7 @@ found_in_memory: | |||
465 | spin_unlock(&vl->lock); | 467 | spin_unlock(&vl->lock); |
466 | 468 | ||
467 | success: | 469 | success: |
468 | _leave(" = %p",vl); | 470 | _leave(" = %p", vl); |
469 | return vl; | 471 | return vl; |
470 | 472 | ||
471 | error_abandon: | 473 | error_abandon: |
@@ -523,10 +525,9 @@ static void afs_vlocation_destroy(struct afs_vlocation *vl) | |||
523 | { | 525 | { |
524 | _enter("%p", vl); | 526 | _enter("%p", vl); |
525 | 527 | ||
526 | #ifdef AFS_CACHING_SUPPORT | 528 | #ifdef CONFIG_AFS_FSCACHE |
527 | cachefs_relinquish_cookie(vl->cache, 0); | 529 | fscache_relinquish_cookie(vl->cache, 0); |
528 | #endif | 530 | #endif |
529 | |||
530 | afs_put_cell(vl->cell); | 531 | afs_put_cell(vl->cell); |
531 | kfree(vl); | 532 | kfree(vl); |
532 | } | 533 | } |
diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 8bab0e3437f9..a353e69e2391 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c | |||
@@ -124,13 +124,11 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) | |||
124 | } | 124 | } |
125 | 125 | ||
126 | /* attach the cache and volume location */ | 126 | /* attach the cache and volume location */ |
127 | #ifdef AFS_CACHING_SUPPORT | 127 | #ifdef CONFIG_AFS_FSCACHE |
128 | cachefs_acquire_cookie(vlocation->cache, | 128 | volume->cache = fscache_acquire_cookie(vlocation->cache, |
129 | &afs_vnode_cache_index_def, | 129 | &afs_volume_cache_index_def, |
130 | volume, | 130 | volume); |
131 | &volume->cache); | ||
132 | #endif | 131 | #endif |
133 | |||
134 | afs_get_vlocation(vlocation); | 132 | afs_get_vlocation(vlocation); |
135 | volume->vlocation = vlocation; | 133 | volume->vlocation = vlocation; |
136 | 134 | ||
@@ -194,8 +192,8 @@ void afs_put_volume(struct afs_volume *volume) | |||
194 | up_write(&vlocation->cell->vl_sem); | 192 | up_write(&vlocation->cell->vl_sem); |
195 | 193 | ||
196 | /* finish cleaning up the volume */ | 194 | /* finish cleaning up the volume */ |
197 | #ifdef AFS_CACHING_SUPPORT | 195 | #ifdef CONFIG_AFS_FSCACHE |
198 | cachefs_relinquish_cookie(volume->cache, 0); | 196 | fscache_relinquish_cookie(volume->cache, 0); |
199 | #endif | 197 | #endif |
200 | afs_put_vlocation(vlocation); | 198 | afs_put_vlocation(vlocation); |
201 | 199 | ||
diff --git a/fs/afs/write.c b/fs/afs/write.c index 3fb36d433621..c2e7a7ff0080 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -780,3 +780,24 @@ int afs_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
780 | _leave(" = %d", ret); | 780 | _leave(" = %d", ret); |
781 | return ret; | 781 | return ret; |
782 | } | 782 | } |
783 | |||
784 | /* | ||
785 | * notification that a previously read-only page is about to become writable | ||
786 | * - if it returns an error, the caller will deliver a bus error signal | ||
787 | */ | ||
788 | int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
789 | { | ||
790 | struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); | ||
791 | |||
792 | _enter("{{%x:%u}},{%lx}", | ||
793 | vnode->fid.vid, vnode->fid.vnode, page->index); | ||
794 | |||
795 | /* wait for the page to be written to the cache before we allow it to | ||
796 | * be modified */ | ||
797 | #ifdef CONFIG_AFS_FSCACHE | ||
798 | fscache_wait_on_page_write(vnode->cache, page); | ||
799 | #endif | ||
800 | |||
801 | _leave(" = 0"); | ||
802 | return 0; | ||
803 | } | ||
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index d06cb023ad02..76afd0d6b86c 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -900,6 +900,7 @@ static int | |||
900 | befs_statfs(struct dentry *dentry, struct kstatfs *buf) | 900 | befs_statfs(struct dentry *dentry, struct kstatfs *buf) |
901 | { | 901 | { |
902 | struct super_block *sb = dentry->d_sb; | 902 | struct super_block *sb = dentry->d_sb; |
903 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
903 | 904 | ||
904 | befs_debug(sb, "---> befs_statfs()"); | 905 | befs_debug(sb, "---> befs_statfs()"); |
905 | 906 | ||
@@ -910,6 +911,8 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
910 | buf->f_bavail = buf->f_bfree; | 911 | buf->f_bavail = buf->f_bfree; |
911 | buf->f_files = 0; /* UNKNOWN */ | 912 | buf->f_files = 0; /* UNKNOWN */ |
912 | buf->f_ffree = 0; /* UNKNOWN */ | 913 | buf->f_ffree = 0; /* UNKNOWN */ |
914 | buf->f_fsid.val[0] = (u32)id; | ||
915 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
913 | buf->f_namelen = BEFS_NAME_LEN; | 916 | buf->f_namelen = BEFS_NAME_LEN; |
914 | 917 | ||
915 | befs_debug(sb, "<--- befs_statfs()"); | 918 | befs_debug(sb, "<--- befs_statfs()"); |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 33b7235f853b..40381df34869 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -12,8 +12,6 @@ | |||
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/stat.h> | ||
16 | #include <linux/time.h> | ||
17 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
18 | #include <linux/mman.h> | 16 | #include <linux/mman.h> |
19 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
@@ -21,20 +19,15 @@ | |||
21 | #include <linux/binfmts.h> | 19 | #include <linux/binfmts.h> |
22 | #include <linux/string.h> | 20 | #include <linux/string.h> |
23 | #include <linux/file.h> | 21 | #include <linux/file.h> |
24 | #include <linux/fcntl.h> | ||
25 | #include <linux/ptrace.h> | ||
26 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
27 | #include <linux/shm.h> | ||
28 | #include <linux/personality.h> | 23 | #include <linux/personality.h> |
29 | #include <linux/elfcore.h> | 24 | #include <linux/elfcore.h> |
30 | #include <linux/init.h> | 25 | #include <linux/init.h> |
31 | #include <linux/highuid.h> | 26 | #include <linux/highuid.h> |
32 | #include <linux/smp.h> | ||
33 | #include <linux/compiler.h> | 27 | #include <linux/compiler.h> |
34 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
35 | #include <linux/pagemap.h> | 29 | #include <linux/pagemap.h> |
36 | #include <linux/security.h> | 30 | #include <linux/security.h> |
37 | #include <linux/syscalls.h> | ||
38 | #include <linux/random.h> | 31 | #include <linux/random.h> |
39 | #include <linux/elf.h> | 32 | #include <linux/elf.h> |
40 | #include <linux/utsname.h> | 33 | #include <linux/utsname.h> |
@@ -576,7 +569,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
576 | unsigned long error; | 569 | unsigned long error; |
577 | struct elf_phdr *elf_ppnt, *elf_phdata; | 570 | struct elf_phdr *elf_ppnt, *elf_phdata; |
578 | unsigned long elf_bss, elf_brk; | 571 | unsigned long elf_bss, elf_brk; |
579 | int elf_exec_fileno; | ||
580 | int retval, i; | 572 | int retval, i; |
581 | unsigned int size; | 573 | unsigned int size; |
582 | unsigned long elf_entry; | 574 | unsigned long elf_entry; |
@@ -631,12 +623,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
631 | goto out_free_ph; | 623 | goto out_free_ph; |
632 | } | 624 | } |
633 | 625 | ||
634 | retval = get_unused_fd(); | ||
635 | if (retval < 0) | ||
636 | goto out_free_ph; | ||
637 | get_file(bprm->file); | ||
638 | fd_install(elf_exec_fileno = retval, bprm->file); | ||
639 | |||
640 | elf_ppnt = elf_phdata; | 626 | elf_ppnt = elf_phdata; |
641 | elf_bss = 0; | 627 | elf_bss = 0; |
642 | elf_brk = 0; | 628 | elf_brk = 0; |
@@ -655,13 +641,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
655 | retval = -ENOEXEC; | 641 | retval = -ENOEXEC; |
656 | if (elf_ppnt->p_filesz > PATH_MAX || | 642 | if (elf_ppnt->p_filesz > PATH_MAX || |
657 | elf_ppnt->p_filesz < 2) | 643 | elf_ppnt->p_filesz < 2) |
658 | goto out_free_file; | 644 | goto out_free_ph; |
659 | 645 | ||
660 | retval = -ENOMEM; | 646 | retval = -ENOMEM; |
661 | elf_interpreter = kmalloc(elf_ppnt->p_filesz, | 647 | elf_interpreter = kmalloc(elf_ppnt->p_filesz, |
662 | GFP_KERNEL); | 648 | GFP_KERNEL); |
663 | if (!elf_interpreter) | 649 | if (!elf_interpreter) |
664 | goto out_free_file; | 650 | goto out_free_ph; |
665 | 651 | ||
666 | retval = kernel_read(bprm->file, elf_ppnt->p_offset, | 652 | retval = kernel_read(bprm->file, elf_ppnt->p_offset, |
667 | elf_interpreter, | 653 | elf_interpreter, |
@@ -956,8 +942,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
956 | 942 | ||
957 | kfree(elf_phdata); | 943 | kfree(elf_phdata); |
958 | 944 | ||
959 | sys_close(elf_exec_fileno); | ||
960 | |||
961 | set_binfmt(&elf_format); | 945 | set_binfmt(&elf_format); |
962 | 946 | ||
963 | #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES | 947 | #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES |
@@ -1028,8 +1012,6 @@ out_free_dentry: | |||
1028 | fput(interpreter); | 1012 | fput(interpreter); |
1029 | out_free_interp: | 1013 | out_free_interp: |
1030 | kfree(elf_interpreter); | 1014 | kfree(elf_interpreter); |
1031 | out_free_file: | ||
1032 | sys_close(elf_exec_fileno); | ||
1033 | out_free_ph: | 1015 | out_free_ph: |
1034 | kfree(elf_phdata); | 1016 | kfree(elf_phdata); |
1035 | goto out; | 1017 | goto out; |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index f3e72c5c19f5..70cfc4b84ae0 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -972,9 +972,12 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( | |||
972 | params->elfhdr_addr = seg->addr; | 972 | params->elfhdr_addr = seg->addr; |
973 | 973 | ||
974 | /* clear any space allocated but not loaded */ | 974 | /* clear any space allocated but not loaded */ |
975 | if (phdr->p_filesz < phdr->p_memsz) | 975 | if (phdr->p_filesz < phdr->p_memsz) { |
976 | clear_user((void *) (seg->addr + phdr->p_filesz), | 976 | ret = clear_user((void *) (seg->addr + phdr->p_filesz), |
977 | phdr->p_memsz - phdr->p_filesz); | 977 | phdr->p_memsz - phdr->p_filesz); |
978 | if (ret) | ||
979 | return ret; | ||
980 | } | ||
978 | 981 | ||
979 | if (mm) { | 982 | if (mm) { |
980 | if (phdr->p_flags & PF_X) { | 983 | if (phdr->p_flags & PF_X) { |
@@ -1014,7 +1017,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1014 | struct elf32_fdpic_loadseg *seg; | 1017 | struct elf32_fdpic_loadseg *seg; |
1015 | struct elf32_phdr *phdr; | 1018 | struct elf32_phdr *phdr; |
1016 | unsigned long load_addr, delta_vaddr; | 1019 | unsigned long load_addr, delta_vaddr; |
1017 | int loop, dvset; | 1020 | int loop, dvset, ret; |
1018 | 1021 | ||
1019 | load_addr = params->load_addr; | 1022 | load_addr = params->load_addr; |
1020 | delta_vaddr = 0; | 1023 | delta_vaddr = 0; |
@@ -1114,7 +1117,9 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1114 | * PT_LOAD */ | 1117 | * PT_LOAD */ |
1115 | if (prot & PROT_WRITE && disp > 0) { | 1118 | if (prot & PROT_WRITE && disp > 0) { |
1116 | kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); | 1119 | kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); |
1117 | clear_user((void __user *) maddr, disp); | 1120 | ret = clear_user((void __user *) maddr, disp); |
1121 | if (ret) | ||
1122 | return ret; | ||
1118 | maddr += disp; | 1123 | maddr += disp; |
1119 | } | 1124 | } |
1120 | 1125 | ||
@@ -1149,15 +1154,19 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1149 | if (prot & PROT_WRITE && excess1 > 0) { | 1154 | if (prot & PROT_WRITE && excess1 > 0) { |
1150 | kdebug("clear[%d] ad=%lx sz=%lx", | 1155 | kdebug("clear[%d] ad=%lx sz=%lx", |
1151 | loop, maddr + phdr->p_filesz, excess1); | 1156 | loop, maddr + phdr->p_filesz, excess1); |
1152 | clear_user((void __user *) maddr + phdr->p_filesz, | 1157 | ret = clear_user((void __user *) maddr + phdr->p_filesz, |
1153 | excess1); | 1158 | excess1); |
1159 | if (ret) | ||
1160 | return ret; | ||
1154 | } | 1161 | } |
1155 | 1162 | ||
1156 | #else | 1163 | #else |
1157 | if (excess > 0) { | 1164 | if (excess > 0) { |
1158 | kdebug("clear[%d] ad=%lx sz=%lx", | 1165 | kdebug("clear[%d] ad=%lx sz=%lx", |
1159 | loop, maddr + phdr->p_filesz, excess); | 1166 | loop, maddr + phdr->p_filesz, excess); |
1160 | clear_user((void *) maddr + phdr->p_filesz, excess); | 1167 | ret = clear_user((void *) maddr + phdr->p_filesz, excess); |
1168 | if (ret) | ||
1169 | return ret; | ||
1161 | } | 1170 | } |
1162 | #endif | 1171 | #endif |
1163 | 1172 | ||
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 08644a61616e..eff74b9c9e77 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c | |||
@@ -188,7 +188,6 @@ out: | |||
188 | static int | 188 | static int |
189 | load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) | 189 | load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) |
190 | { | 190 | { |
191 | int som_exec_fileno; | ||
192 | int retval; | 191 | int retval; |
193 | unsigned int size; | 192 | unsigned int size; |
194 | unsigned long som_entry; | 193 | unsigned long som_entry; |
@@ -220,12 +219,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
220 | goto out_free; | 219 | goto out_free; |
221 | } | 220 | } |
222 | 221 | ||
223 | retval = get_unused_fd(); | ||
224 | if (retval < 0) | ||
225 | goto out_free; | ||
226 | get_file(bprm->file); | ||
227 | fd_install(som_exec_fileno = retval, bprm->file); | ||
228 | |||
229 | /* Flush all traces of the currently running executable */ | 222 | /* Flush all traces of the currently running executable */ |
230 | retval = flush_old_exec(bprm); | 223 | retval = flush_old_exec(bprm); |
231 | if (retval) | 224 | if (retval) |
@@ -1420,8 +1420,7 @@ static void bio_pair_end_2(struct bio *bi, int err) | |||
1420 | } | 1420 | } |
1421 | 1421 | ||
1422 | /* | 1422 | /* |
1423 | * split a bio - only worry about a bio with a single page | 1423 | * split a bio - only worry about a bio with a single page in its iovec |
1424 | * in it's iovec | ||
1425 | */ | 1424 | */ |
1426 | struct bio_pair *bio_split(struct bio *bi, int first_sectors) | 1425 | struct bio_pair *bio_split(struct bio *bi, int first_sectors) |
1427 | { | 1426 | { |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 8c3c6899ccf3..f45dbc18dd17 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -204,6 +204,7 @@ int fsync_bdev(struct block_device *bdev) | |||
204 | } | 204 | } |
205 | return sync_blockdev(bdev); | 205 | return sync_blockdev(bdev); |
206 | } | 206 | } |
207 | EXPORT_SYMBOL(fsync_bdev); | ||
207 | 208 | ||
208 | /** | 209 | /** |
209 | * freeze_bdev -- lock a filesystem and force it into a consistent state | 210 | * freeze_bdev -- lock a filesystem and force it into a consistent state |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 1d53b62dbba5..7fdd184a528d 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -256,7 +256,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) | |||
256 | } | 256 | } |
257 | 257 | ||
258 | if (!acl) | 258 | if (!acl) |
259 | inode->i_mode &= ~current->fs->umask; | 259 | inode->i_mode &= ~current_umask(); |
260 | } | 260 | } |
261 | 261 | ||
262 | if (IS_POSIXACL(dir) && acl) { | 262 | if (IS_POSIXACL(dir) && acl) { |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c84ca1f5259a..51bfdfc8fcda 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/ftrace.h> | ||
24 | #include "async-thread.h" | 23 | #include "async-thread.h" |
25 | 24 | ||
26 | #define WORK_QUEUED_BIT 0 | 25 | #define WORK_QUEUED_BIT 0 |
@@ -195,6 +194,9 @@ again_locked: | |||
195 | if (!list_empty(&worker->pending)) | 194 | if (!list_empty(&worker->pending)) |
196 | continue; | 195 | continue; |
197 | 196 | ||
197 | if (kthread_should_stop()) | ||
198 | break; | ||
199 | |||
198 | /* still no more work?, sleep for real */ | 200 | /* still no more work?, sleep for real */ |
199 | spin_lock_irq(&worker->lock); | 201 | spin_lock_irq(&worker->lock); |
200 | set_current_state(TASK_INTERRUPTIBLE); | 202 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -208,7 +210,8 @@ again_locked: | |||
208 | worker->working = 0; | 210 | worker->working = 0; |
209 | spin_unlock_irq(&worker->lock); | 211 | spin_unlock_irq(&worker->lock); |
210 | 212 | ||
211 | schedule(); | 213 | if (!kthread_should_stop()) |
214 | schedule(); | ||
212 | } | 215 | } |
213 | __set_current_state(TASK_RUNNING); | 216 | __set_current_state(TASK_RUNNING); |
214 | } | 217 | } |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dbb724124633..e5b2533b691a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1244,9 +1244,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1244 | * readahead one full node of leaves, finding things that are close | 1244 | * readahead one full node of leaves, finding things that are close |
1245 | * to the block in 'slot', and triggering ra on them. | 1245 | * to the block in 'slot', and triggering ra on them. |
1246 | */ | 1246 | */ |
1247 | static noinline void reada_for_search(struct btrfs_root *root, | 1247 | static void reada_for_search(struct btrfs_root *root, |
1248 | struct btrfs_path *path, | 1248 | struct btrfs_path *path, |
1249 | int level, int slot, u64 objectid) | 1249 | int level, int slot, u64 objectid) |
1250 | { | 1250 | { |
1251 | struct extent_buffer *node; | 1251 | struct extent_buffer *node; |
1252 | struct btrfs_disk_key disk_key; | 1252 | struct btrfs_disk_key disk_key; |
@@ -1447,6 +1447,117 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1447 | } | 1447 | } |
1448 | 1448 | ||
1449 | /* | 1449 | /* |
1450 | * helper function for btrfs_search_slot. The goal is to find a block | ||
1451 | * in cache without setting the path to blocking. If we find the block | ||
1452 | * we return zero and the path is unchanged. | ||
1453 | * | ||
1454 | * If we can't find the block, we set the path blocking and do some | ||
1455 | * reada. -EAGAIN is returned and the search must be repeated. | ||
1456 | */ | ||
1457 | static int | ||
1458 | read_block_for_search(struct btrfs_trans_handle *trans, | ||
1459 | struct btrfs_root *root, struct btrfs_path *p, | ||
1460 | struct extent_buffer **eb_ret, int level, int slot, | ||
1461 | struct btrfs_key *key) | ||
1462 | { | ||
1463 | u64 blocknr; | ||
1464 | u64 gen; | ||
1465 | u32 blocksize; | ||
1466 | struct extent_buffer *b = *eb_ret; | ||
1467 | struct extent_buffer *tmp; | ||
1468 | |||
1469 | blocknr = btrfs_node_blockptr(b, slot); | ||
1470 | gen = btrfs_node_ptr_generation(b, slot); | ||
1471 | blocksize = btrfs_level_size(root, level - 1); | ||
1472 | |||
1473 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
1474 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1475 | *eb_ret = tmp; | ||
1476 | return 0; | ||
1477 | } | ||
1478 | |||
1479 | /* | ||
1480 | * reduce lock contention at high levels | ||
1481 | * of the btree by dropping locks before | ||
1482 | * we read. | ||
1483 | */ | ||
1484 | btrfs_release_path(NULL, p); | ||
1485 | if (tmp) | ||
1486 | free_extent_buffer(tmp); | ||
1487 | if (p->reada) | ||
1488 | reada_for_search(root, p, level, slot, key->objectid); | ||
1489 | |||
1490 | tmp = read_tree_block(root, blocknr, blocksize, gen); | ||
1491 | if (tmp) | ||
1492 | free_extent_buffer(tmp); | ||
1493 | return -EAGAIN; | ||
1494 | } | ||
1495 | |||
1496 | /* | ||
1497 | * helper function for btrfs_search_slot. This does all of the checks | ||
1498 | * for node-level blocks and does any balancing required based on | ||
1499 | * the ins_len. | ||
1500 | * | ||
1501 | * If no extra work was required, zero is returned. If we had to | ||
1502 | * drop the path, -EAGAIN is returned and btrfs_search_slot must | ||
1503 | * start over | ||
1504 | */ | ||
1505 | static int | ||
1506 | setup_nodes_for_search(struct btrfs_trans_handle *trans, | ||
1507 | struct btrfs_root *root, struct btrfs_path *p, | ||
1508 | struct extent_buffer *b, int level, int ins_len) | ||
1509 | { | ||
1510 | int ret; | ||
1511 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= | ||
1512 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | ||
1513 | int sret; | ||
1514 | |||
1515 | sret = reada_for_balance(root, p, level); | ||
1516 | if (sret) | ||
1517 | goto again; | ||
1518 | |||
1519 | btrfs_set_path_blocking(p); | ||
1520 | sret = split_node(trans, root, p, level); | ||
1521 | btrfs_clear_path_blocking(p, NULL); | ||
1522 | |||
1523 | BUG_ON(sret > 0); | ||
1524 | if (sret) { | ||
1525 | ret = sret; | ||
1526 | goto done; | ||
1527 | } | ||
1528 | b = p->nodes[level]; | ||
1529 | } else if (ins_len < 0 && btrfs_header_nritems(b) < | ||
1530 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | ||
1531 | int sret; | ||
1532 | |||
1533 | sret = reada_for_balance(root, p, level); | ||
1534 | if (sret) | ||
1535 | goto again; | ||
1536 | |||
1537 | btrfs_set_path_blocking(p); | ||
1538 | sret = balance_level(trans, root, p, level); | ||
1539 | btrfs_clear_path_blocking(p, NULL); | ||
1540 | |||
1541 | if (sret) { | ||
1542 | ret = sret; | ||
1543 | goto done; | ||
1544 | } | ||
1545 | b = p->nodes[level]; | ||
1546 | if (!b) { | ||
1547 | btrfs_release_path(NULL, p); | ||
1548 | goto again; | ||
1549 | } | ||
1550 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
1551 | } | ||
1552 | return 0; | ||
1553 | |||
1554 | again: | ||
1555 | ret = -EAGAIN; | ||
1556 | done: | ||
1557 | return ret; | ||
1558 | } | ||
1559 | |||
1560 | /* | ||
1450 | * look for key in the tree. path is filled in with nodes along the way | 1561 | * look for key in the tree. path is filled in with nodes along the way |
1451 | * if key is found, we return zero and you can find the item in the leaf | 1562 | * if key is found, we return zero and you can find the item in the leaf |
1452 | * level of the path (level 0) | 1563 | * level of the path (level 0) |
@@ -1464,16 +1575,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1464 | ins_len, int cow) | 1575 | ins_len, int cow) |
1465 | { | 1576 | { |
1466 | struct extent_buffer *b; | 1577 | struct extent_buffer *b; |
1467 | struct extent_buffer *tmp; | ||
1468 | int slot; | 1578 | int slot; |
1469 | int ret; | 1579 | int ret; |
1470 | int level; | 1580 | int level; |
1471 | int should_reada = p->reada; | ||
1472 | int lowest_unlock = 1; | 1581 | int lowest_unlock = 1; |
1473 | int blocksize; | ||
1474 | u8 lowest_level = 0; | 1582 | u8 lowest_level = 0; |
1475 | u64 blocknr; | ||
1476 | u64 gen; | ||
1477 | 1583 | ||
1478 | lowest_level = p->lowest_level; | 1584 | lowest_level = p->lowest_level; |
1479 | WARN_ON(lowest_level && ins_len > 0); | 1585 | WARN_ON(lowest_level && ins_len > 0); |
@@ -1502,7 +1608,11 @@ again: | |||
1502 | if (cow) { | 1608 | if (cow) { |
1503 | int wret; | 1609 | int wret; |
1504 | 1610 | ||
1505 | /* is a cow on this block not required */ | 1611 | /* |
1612 | * if we don't really need to cow this block | ||
1613 | * then we don't want to set the path blocking, | ||
1614 | * so we test it here | ||
1615 | */ | ||
1506 | if (btrfs_header_generation(b) == trans->transid && | 1616 | if (btrfs_header_generation(b) == trans->transid && |
1507 | btrfs_header_owner(b) == root->root_key.objectid && | 1617 | btrfs_header_owner(b) == root->root_key.objectid && |
1508 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | 1618 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { |
@@ -1557,51 +1667,15 @@ cow_done: | |||
1557 | if (ret && slot > 0) | 1667 | if (ret && slot > 0) |
1558 | slot -= 1; | 1668 | slot -= 1; |
1559 | p->slots[level] = slot; | 1669 | p->slots[level] = slot; |
1560 | if ((p->search_for_split || ins_len > 0) && | 1670 | ret = setup_nodes_for_search(trans, root, p, b, level, |
1561 | btrfs_header_nritems(b) >= | 1671 | ins_len); |
1562 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1672 | if (ret == -EAGAIN) |
1563 | int sret; | 1673 | goto again; |
1564 | 1674 | else if (ret) | |
1565 | sret = reada_for_balance(root, p, level); | 1675 | goto done; |
1566 | if (sret) | 1676 | b = p->nodes[level]; |
1567 | goto again; | 1677 | slot = p->slots[level]; |
1568 | |||
1569 | btrfs_set_path_blocking(p); | ||
1570 | sret = split_node(trans, root, p, level); | ||
1571 | btrfs_clear_path_blocking(p, NULL); | ||
1572 | |||
1573 | BUG_ON(sret > 0); | ||
1574 | if (sret) { | ||
1575 | ret = sret; | ||
1576 | goto done; | ||
1577 | } | ||
1578 | b = p->nodes[level]; | ||
1579 | slot = p->slots[level]; | ||
1580 | } else if (ins_len < 0 && | ||
1581 | btrfs_header_nritems(b) < | ||
1582 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | ||
1583 | int sret; | ||
1584 | |||
1585 | sret = reada_for_balance(root, p, level); | ||
1586 | if (sret) | ||
1587 | goto again; | ||
1588 | |||
1589 | btrfs_set_path_blocking(p); | ||
1590 | sret = balance_level(trans, root, p, level); | ||
1591 | btrfs_clear_path_blocking(p, NULL); | ||
1592 | 1678 | ||
1593 | if (sret) { | ||
1594 | ret = sret; | ||
1595 | goto done; | ||
1596 | } | ||
1597 | b = p->nodes[level]; | ||
1598 | if (!b) { | ||
1599 | btrfs_release_path(NULL, p); | ||
1600 | goto again; | ||
1601 | } | ||
1602 | slot = p->slots[level]; | ||
1603 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
1604 | } | ||
1605 | unlock_up(p, level, lowest_unlock); | 1679 | unlock_up(p, level, lowest_unlock); |
1606 | 1680 | ||
1607 | /* this is only true while dropping a snapshot */ | 1681 | /* this is only true while dropping a snapshot */ |
@@ -1610,44 +1684,11 @@ cow_done: | |||
1610 | goto done; | 1684 | goto done; |
1611 | } | 1685 | } |
1612 | 1686 | ||
1613 | blocknr = btrfs_node_blockptr(b, slot); | 1687 | ret = read_block_for_search(trans, root, p, |
1614 | gen = btrfs_node_ptr_generation(b, slot); | 1688 | &b, level, slot, key); |
1615 | blocksize = btrfs_level_size(root, level - 1); | 1689 | if (ret == -EAGAIN) |
1690 | goto again; | ||
1616 | 1691 | ||
1617 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
1618 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1619 | b = tmp; | ||
1620 | } else { | ||
1621 | /* | ||
1622 | * reduce lock contention at high levels | ||
1623 | * of the btree by dropping locks before | ||
1624 | * we read. | ||
1625 | */ | ||
1626 | if (level > 0) { | ||
1627 | btrfs_release_path(NULL, p); | ||
1628 | if (tmp) | ||
1629 | free_extent_buffer(tmp); | ||
1630 | if (should_reada) | ||
1631 | reada_for_search(root, p, | ||
1632 | level, slot, | ||
1633 | key->objectid); | ||
1634 | |||
1635 | tmp = read_tree_block(root, blocknr, | ||
1636 | blocksize, gen); | ||
1637 | if (tmp) | ||
1638 | free_extent_buffer(tmp); | ||
1639 | goto again; | ||
1640 | } else { | ||
1641 | btrfs_set_path_blocking(p); | ||
1642 | if (tmp) | ||
1643 | free_extent_buffer(tmp); | ||
1644 | if (should_reada) | ||
1645 | reada_for_search(root, p, | ||
1646 | level, slot, | ||
1647 | key->objectid); | ||
1648 | b = read_node_slot(root, b, slot); | ||
1649 | } | ||
1650 | } | ||
1651 | if (!p->skip_locking) { | 1692 | if (!p->skip_locking) { |
1652 | int lret; | 1693 | int lret; |
1653 | 1694 | ||
@@ -2116,8 +2157,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2116 | BUG_ON(!path->nodes[level]); | 2157 | BUG_ON(!path->nodes[level]); |
2117 | lower = path->nodes[level]; | 2158 | lower = path->nodes[level]; |
2118 | nritems = btrfs_header_nritems(lower); | 2159 | nritems = btrfs_header_nritems(lower); |
2119 | if (slot > nritems) | 2160 | BUG_ON(slot > nritems); |
2120 | BUG(); | ||
2121 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) | 2161 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) |
2122 | BUG(); | 2162 | BUG(); |
2123 | if (slot != nritems) { | 2163 | if (slot != nritems) { |
@@ -4086,28 +4126,44 @@ next: | |||
4086 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | 4126 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) |
4087 | { | 4127 | { |
4088 | int slot; | 4128 | int slot; |
4089 | int level = 1; | 4129 | int level; |
4090 | struct extent_buffer *c; | 4130 | struct extent_buffer *c; |
4091 | struct extent_buffer *next = NULL; | 4131 | struct extent_buffer *next; |
4092 | struct btrfs_key key; | 4132 | struct btrfs_key key; |
4093 | u32 nritems; | 4133 | u32 nritems; |
4094 | int ret; | 4134 | int ret; |
4135 | int old_spinning = path->leave_spinning; | ||
4136 | int force_blocking = 0; | ||
4095 | 4137 | ||
4096 | nritems = btrfs_header_nritems(path->nodes[0]); | 4138 | nritems = btrfs_header_nritems(path->nodes[0]); |
4097 | if (nritems == 0) | 4139 | if (nritems == 0) |
4098 | return 1; | 4140 | return 1; |
4099 | 4141 | ||
4100 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | 4142 | /* |
4143 | * we take the blocks in an order that upsets lockdep. Using | ||
4144 | * blocking mode is the only way around it. | ||
4145 | */ | ||
4146 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
4147 | force_blocking = 1; | ||
4148 | #endif | ||
4101 | 4149 | ||
4150 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | ||
4151 | again: | ||
4152 | level = 1; | ||
4153 | next = NULL; | ||
4102 | btrfs_release_path(root, path); | 4154 | btrfs_release_path(root, path); |
4155 | |||
4103 | path->keep_locks = 1; | 4156 | path->keep_locks = 1; |
4157 | |||
4158 | if (!force_blocking) | ||
4159 | path->leave_spinning = 1; | ||
4160 | |||
4104 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4161 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4105 | path->keep_locks = 0; | 4162 | path->keep_locks = 0; |
4106 | 4163 | ||
4107 | if (ret < 0) | 4164 | if (ret < 0) |
4108 | return ret; | 4165 | return ret; |
4109 | 4166 | ||
4110 | btrfs_set_path_blocking(path); | ||
4111 | nritems = btrfs_header_nritems(path->nodes[0]); | 4167 | nritems = btrfs_header_nritems(path->nodes[0]); |
4112 | /* | 4168 | /* |
4113 | * by releasing the path above we dropped all our locks. A balance | 4169 | * by releasing the path above we dropped all our locks. A balance |
@@ -4117,19 +4173,24 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4117 | */ | 4173 | */ |
4118 | if (nritems > 0 && path->slots[0] < nritems - 1) { | 4174 | if (nritems > 0 && path->slots[0] < nritems - 1) { |
4119 | path->slots[0]++; | 4175 | path->slots[0]++; |
4176 | ret = 0; | ||
4120 | goto done; | 4177 | goto done; |
4121 | } | 4178 | } |
4122 | 4179 | ||
4123 | while (level < BTRFS_MAX_LEVEL) { | 4180 | while (level < BTRFS_MAX_LEVEL) { |
4124 | if (!path->nodes[level]) | 4181 | if (!path->nodes[level]) { |
4125 | return 1; | 4182 | ret = 1; |
4183 | goto done; | ||
4184 | } | ||
4126 | 4185 | ||
4127 | slot = path->slots[level] + 1; | 4186 | slot = path->slots[level] + 1; |
4128 | c = path->nodes[level]; | 4187 | c = path->nodes[level]; |
4129 | if (slot >= btrfs_header_nritems(c)) { | 4188 | if (slot >= btrfs_header_nritems(c)) { |
4130 | level++; | 4189 | level++; |
4131 | if (level == BTRFS_MAX_LEVEL) | 4190 | if (level == BTRFS_MAX_LEVEL) { |
4132 | return 1; | 4191 | ret = 1; |
4192 | goto done; | ||
4193 | } | ||
4133 | continue; | 4194 | continue; |
4134 | } | 4195 | } |
4135 | 4196 | ||
@@ -4138,16 +4199,22 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4138 | free_extent_buffer(next); | 4199 | free_extent_buffer(next); |
4139 | } | 4200 | } |
4140 | 4201 | ||
4141 | /* the path was set to blocking above */ | 4202 | next = c; |
4142 | if (level == 1 && (path->locks[1] || path->skip_locking) && | 4203 | ret = read_block_for_search(NULL, root, path, &next, level, |
4143 | path->reada) | 4204 | slot, &key); |
4144 | reada_for_search(root, path, level, slot, 0); | 4205 | if (ret == -EAGAIN) |
4206 | goto again; | ||
4145 | 4207 | ||
4146 | next = read_node_slot(root, c, slot); | ||
4147 | if (!path->skip_locking) { | 4208 | if (!path->skip_locking) { |
4148 | btrfs_assert_tree_locked(c); | 4209 | ret = btrfs_try_spin_lock(next); |
4149 | btrfs_tree_lock(next); | 4210 | if (!ret) { |
4150 | btrfs_set_lock_blocking(next); | 4211 | btrfs_set_path_blocking(path); |
4212 | btrfs_tree_lock(next); | ||
4213 | if (!force_blocking) | ||
4214 | btrfs_clear_path_blocking(path, next); | ||
4215 | } | ||
4216 | if (force_blocking) | ||
4217 | btrfs_set_lock_blocking(next); | ||
4151 | } | 4218 | } |
4152 | break; | 4219 | break; |
4153 | } | 4220 | } |
@@ -4157,27 +4224,42 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4157 | c = path->nodes[level]; | 4224 | c = path->nodes[level]; |
4158 | if (path->locks[level]) | 4225 | if (path->locks[level]) |
4159 | btrfs_tree_unlock(c); | 4226 | btrfs_tree_unlock(c); |
4227 | |||
4160 | free_extent_buffer(c); | 4228 | free_extent_buffer(c); |
4161 | path->nodes[level] = next; | 4229 | path->nodes[level] = next; |
4162 | path->slots[level] = 0; | 4230 | path->slots[level] = 0; |
4163 | if (!path->skip_locking) | 4231 | if (!path->skip_locking) |
4164 | path->locks[level] = 1; | 4232 | path->locks[level] = 1; |
4233 | |||
4165 | if (!level) | 4234 | if (!level) |
4166 | break; | 4235 | break; |
4167 | 4236 | ||
4168 | btrfs_set_path_blocking(path); | 4237 | ret = read_block_for_search(NULL, root, path, &next, level, |
4169 | if (level == 1 && path->locks[1] && path->reada) | 4238 | 0, &key); |
4170 | reada_for_search(root, path, level, slot, 0); | 4239 | if (ret == -EAGAIN) |
4171 | next = read_node_slot(root, next, 0); | 4240 | goto again; |
4241 | |||
4172 | if (!path->skip_locking) { | 4242 | if (!path->skip_locking) { |
4173 | btrfs_assert_tree_locked(path->nodes[level]); | 4243 | btrfs_assert_tree_locked(path->nodes[level]); |
4174 | btrfs_tree_lock(next); | 4244 | ret = btrfs_try_spin_lock(next); |
4175 | btrfs_set_lock_blocking(next); | 4245 | if (!ret) { |
4246 | btrfs_set_path_blocking(path); | ||
4247 | btrfs_tree_lock(next); | ||
4248 | if (!force_blocking) | ||
4249 | btrfs_clear_path_blocking(path, next); | ||
4250 | } | ||
4251 | if (force_blocking) | ||
4252 | btrfs_set_lock_blocking(next); | ||
4176 | } | 4253 | } |
4177 | } | 4254 | } |
4255 | ret = 0; | ||
4178 | done: | 4256 | done: |
4179 | unlock_up(path, 0, 1); | 4257 | unlock_up(path, 0, 1); |
4180 | return 0; | 4258 | path->leave_spinning = old_spinning; |
4259 | if (!old_spinning) | ||
4260 | btrfs_set_path_blocking(path); | ||
4261 | |||
4262 | return ret; | ||
4181 | } | 4263 | } |
4182 | 4264 | ||
4183 | /* | 4265 | /* |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9417713542a2..ad96495dedc5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -143,12 +143,15 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
143 | #define BTRFS_FT_MAX 9 | 143 | #define BTRFS_FT_MAX 9 |
144 | 144 | ||
145 | /* | 145 | /* |
146 | * the key defines the order in the tree, and so it also defines (optimal) | 146 | * The key defines the order in the tree, and so it also defines (optimal) |
147 | * block layout. objectid corresonds to the inode number. The flags | 147 | * block layout. |
148 | * tells us things about the object, and is a kind of stream selector. | 148 | * |
149 | * so for a given inode, keys with flags of 1 might refer to the inode | 149 | * objectid corresponds to the inode number. |
150 | * data, flags of 2 may point to file data in the btree and flags == 3 | 150 | * |
151 | * may point to extents. | 151 | * type tells us things about the object, and is a kind of stream selector. |
152 | * so for a given inode, keys with type of 1 might refer to the inode data, | ||
153 | * type of 2 may point to file data in the btree and type == 3 may point to | ||
154 | * extents. | ||
152 | * | 155 | * |
153 | * offset is the starting byte offset for this key in the stream. | 156 | * offset is the starting byte offset for this key in the stream. |
154 | * | 157 | * |
@@ -200,7 +203,7 @@ struct btrfs_dev_item { | |||
200 | 203 | ||
201 | /* | 204 | /* |
202 | * starting byte of this partition on the device, | 205 | * starting byte of this partition on the device, |
203 | * to allowr for stripe alignment in the future | 206 | * to allow for stripe alignment in the future |
204 | */ | 207 | */ |
205 | __le64 start_offset; | 208 | __le64 start_offset; |
206 | 209 | ||
@@ -633,18 +636,35 @@ struct btrfs_space_info { | |||
633 | struct rw_semaphore groups_sem; | 636 | struct rw_semaphore groups_sem; |
634 | }; | 637 | }; |
635 | 638 | ||
636 | struct btrfs_free_space { | 639 | /* |
637 | struct rb_node bytes_index; | 640 | * free clusters are used to claim free space in relatively large chunks, |
638 | struct rb_node offset_index; | 641 | * allowing us to do less seeky writes. They are used for all metadata |
639 | u64 offset; | 642 | * allocations and data allocations in ssd mode. |
640 | u64 bytes; | 643 | */ |
644 | struct btrfs_free_cluster { | ||
645 | spinlock_t lock; | ||
646 | spinlock_t refill_lock; | ||
647 | struct rb_root root; | ||
648 | |||
649 | /* largest extent in this cluster */ | ||
650 | u64 max_size; | ||
651 | |||
652 | /* first extent starting offset */ | ||
653 | u64 window_start; | ||
654 | |||
655 | struct btrfs_block_group_cache *block_group; | ||
656 | /* | ||
657 | * when a cluster is allocated from a block group, we put the | ||
658 | * cluster onto a list in the block group so that it can | ||
659 | * be freed before the block group is freed. | ||
660 | */ | ||
661 | struct list_head block_group_list; | ||
641 | }; | 662 | }; |
642 | 663 | ||
643 | struct btrfs_block_group_cache { | 664 | struct btrfs_block_group_cache { |
644 | struct btrfs_key key; | 665 | struct btrfs_key key; |
645 | struct btrfs_block_group_item item; | 666 | struct btrfs_block_group_item item; |
646 | spinlock_t lock; | 667 | spinlock_t lock; |
647 | struct mutex alloc_mutex; | ||
648 | struct mutex cache_mutex; | 668 | struct mutex cache_mutex; |
649 | u64 pinned; | 669 | u64 pinned; |
650 | u64 reserved; | 670 | u64 reserved; |
@@ -656,6 +676,7 @@ struct btrfs_block_group_cache { | |||
656 | struct btrfs_space_info *space_info; | 676 | struct btrfs_space_info *space_info; |
657 | 677 | ||
658 | /* free space cache stuff */ | 678 | /* free space cache stuff */ |
679 | spinlock_t tree_lock; | ||
659 | struct rb_root free_space_bytes; | 680 | struct rb_root free_space_bytes; |
660 | struct rb_root free_space_offset; | 681 | struct rb_root free_space_offset; |
661 | 682 | ||
@@ -667,6 +688,11 @@ struct btrfs_block_group_cache { | |||
667 | 688 | ||
668 | /* usage count */ | 689 | /* usage count */ |
669 | atomic_t count; | 690 | atomic_t count; |
691 | |||
692 | /* List of struct btrfs_free_clusters for this block group. | ||
693 | * Today it will only have one thing on it, but that may change | ||
694 | */ | ||
695 | struct list_head cluster_list; | ||
670 | }; | 696 | }; |
671 | 697 | ||
672 | struct btrfs_leaf_ref_tree { | 698 | struct btrfs_leaf_ref_tree { |
@@ -728,7 +754,6 @@ struct btrfs_fs_info { | |||
728 | struct mutex tree_log_mutex; | 754 | struct mutex tree_log_mutex; |
729 | struct mutex transaction_kthread_mutex; | 755 | struct mutex transaction_kthread_mutex; |
730 | struct mutex cleaner_mutex; | 756 | struct mutex cleaner_mutex; |
731 | struct mutex pinned_mutex; | ||
732 | struct mutex chunk_mutex; | 757 | struct mutex chunk_mutex; |
733 | struct mutex drop_mutex; | 758 | struct mutex drop_mutex; |
734 | struct mutex volume_mutex; | 759 | struct mutex volume_mutex; |
@@ -839,8 +864,12 @@ struct btrfs_fs_info { | |||
839 | spinlock_t delalloc_lock; | 864 | spinlock_t delalloc_lock; |
840 | spinlock_t new_trans_lock; | 865 | spinlock_t new_trans_lock; |
841 | u64 delalloc_bytes; | 866 | u64 delalloc_bytes; |
842 | u64 last_alloc; | 867 | |
843 | u64 last_data_alloc; | 868 | /* data_alloc_cluster is only used in ssd mode */ |
869 | struct btrfs_free_cluster data_alloc_cluster; | ||
870 | |||
871 | /* all metadata allocations go through this cluster */ | ||
872 | struct btrfs_free_cluster meta_alloc_cluster; | ||
844 | 873 | ||
845 | spinlock_t ref_cache_lock; | 874 | spinlock_t ref_cache_lock; |
846 | u64 total_ref_cache_size; | 875 | u64 total_ref_cache_size; |
@@ -932,7 +961,6 @@ struct btrfs_root { | |||
932 | }; | 961 | }; |
933 | 962 | ||
934 | /* | 963 | /* |
935 | |||
936 | * inode items have the data typically returned from stat and store other | 964 | * inode items have the data typically returned from stat and store other |
937 | * info about object characteristics. There is one for every file and dir in | 965 | * info about object characteristics. There is one for every file and dir in |
938 | * the FS | 966 | * the FS |
@@ -963,7 +991,7 @@ struct btrfs_root { | |||
963 | #define BTRFS_EXTENT_CSUM_KEY 128 | 991 | #define BTRFS_EXTENT_CSUM_KEY 128 |
964 | 992 | ||
965 | /* | 993 | /* |
966 | * root items point to tree roots. There are typically in the root | 994 | * root items point to tree roots. They are typically in the root |
967 | * tree used by the super block to find all the other trees | 995 | * tree used by the super block to find all the other trees |
968 | */ | 996 | */ |
969 | #define BTRFS_ROOT_ITEM_KEY 132 | 997 | #define BTRFS_ROOT_ITEM_KEY 132 |
@@ -1010,6 +1038,8 @@ struct btrfs_root { | |||
1010 | #define BTRFS_MOUNT_SSD (1 << 3) | 1038 | #define BTRFS_MOUNT_SSD (1 << 3) |
1011 | #define BTRFS_MOUNT_DEGRADED (1 << 4) | 1039 | #define BTRFS_MOUNT_DEGRADED (1 << 4) |
1012 | #define BTRFS_MOUNT_COMPRESS (1 << 5) | 1040 | #define BTRFS_MOUNT_COMPRESS (1 << 5) |
1041 | #define BTRFS_MOUNT_NOTREELOG (1 << 6) | ||
1042 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) | ||
1013 | 1043 | ||
1014 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1044 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1015 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1045 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1748,6 +1778,7 @@ static inline struct dentry *fdentry(struct file *file) | |||
1748 | } | 1778 | } |
1749 | 1779 | ||
1750 | /* extent-tree.c */ | 1780 | /* extent-tree.c */ |
1781 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | ||
1751 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1782 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1752 | struct btrfs_root *root, unsigned long count); | 1783 | struct btrfs_root *root, unsigned long count); |
1753 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1784 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
@@ -2174,21 +2205,4 @@ int btrfs_check_acl(struct inode *inode, int mask); | |||
2174 | int btrfs_init_acl(struct inode *inode, struct inode *dir); | 2205 | int btrfs_init_acl(struct inode *inode, struct inode *dir); |
2175 | int btrfs_acl_chmod(struct inode *inode); | 2206 | int btrfs_acl_chmod(struct inode *inode); |
2176 | 2207 | ||
2177 | /* free-space-cache.c */ | ||
2178 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
2179 | u64 bytenr, u64 size); | ||
2180 | int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
2181 | u64 offset, u64 bytes); | ||
2182 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
2183 | u64 bytenr, u64 size); | ||
2184 | int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
2185 | u64 offset, u64 bytes); | ||
2186 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
2187 | *block_group); | ||
2188 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | ||
2189 | *block_group, u64 offset, | ||
2190 | u64 bytes); | ||
2191 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
2192 | u64 bytes); | ||
2193 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
2194 | #endif | 2208 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index cbf7dc8ae3ec..d6c01c096a40 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -18,7 +18,6 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/sort.h> | 20 | #include <linux/sort.h> |
21 | #include <linux/ftrace.h> | ||
22 | #include "ctree.h" | 21 | #include "ctree.h" |
23 | #include "delayed-ref.h" | 22 | #include "delayed-ref.h" |
24 | #include "transaction.h" | 23 | #include "transaction.h" |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92d73929d381..92caa8035f36 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "locking.h" | 38 | #include "locking.h" |
39 | #include "ref-cache.h" | 39 | #include "ref-cache.h" |
40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "free-space-cache.h" | ||
41 | 42 | ||
42 | static struct extent_io_ops btree_extent_io_ops; | 43 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 44 | static void end_workqueue_fn(struct btrfs_work *work); |
@@ -1412,8 +1413,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1412 | 1413 | ||
1413 | ret = extent_range_uptodate(io_tree, start + length, | 1414 | ret = extent_range_uptodate(io_tree, start + length, |
1414 | start + buf_len - 1); | 1415 | start + buf_len - 1); |
1415 | if (ret == 1) | ||
1416 | return ret; | ||
1417 | return ret; | 1416 | return ret; |
1418 | } | 1417 | } |
1419 | 1418 | ||
@@ -1647,12 +1646,15 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1647 | mutex_init(&fs_info->ordered_operations_mutex); | 1646 | mutex_init(&fs_info->ordered_operations_mutex); |
1648 | mutex_init(&fs_info->tree_log_mutex); | 1647 | mutex_init(&fs_info->tree_log_mutex); |
1649 | mutex_init(&fs_info->drop_mutex); | 1648 | mutex_init(&fs_info->drop_mutex); |
1650 | mutex_init(&fs_info->pinned_mutex); | ||
1651 | mutex_init(&fs_info->chunk_mutex); | 1649 | mutex_init(&fs_info->chunk_mutex); |
1652 | mutex_init(&fs_info->transaction_kthread_mutex); | 1650 | mutex_init(&fs_info->transaction_kthread_mutex); |
1653 | mutex_init(&fs_info->cleaner_mutex); | 1651 | mutex_init(&fs_info->cleaner_mutex); |
1654 | mutex_init(&fs_info->volume_mutex); | 1652 | mutex_init(&fs_info->volume_mutex); |
1655 | mutex_init(&fs_info->tree_reloc_mutex); | 1653 | mutex_init(&fs_info->tree_reloc_mutex); |
1654 | |||
1655 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | ||
1656 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | ||
1657 | |||
1656 | init_waitqueue_head(&fs_info->transaction_throttle); | 1658 | init_waitqueue_head(&fs_info->transaction_throttle); |
1657 | init_waitqueue_head(&fs_info->transaction_wait); | 1659 | init_waitqueue_head(&fs_info->transaction_wait); |
1658 | init_waitqueue_head(&fs_info->async_submit_wait); | 1660 | init_waitqueue_head(&fs_info->async_submit_wait); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f5e7cae63d80..178df4c67de4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "volumes.h" | 31 | #include "volumes.h" |
32 | #include "locking.h" | 32 | #include "locking.h" |
33 | #include "ref-cache.h" | 33 | #include "ref-cache.h" |
34 | #include "free-space-cache.h" | ||
34 | 35 | ||
35 | #define PENDING_EXTENT_INSERT 0 | 36 | #define PENDING_EXTENT_INSERT 0 |
36 | #define PENDING_EXTENT_DELETE 1 | 37 | #define PENDING_EXTENT_DELETE 1 |
@@ -166,7 +167,6 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
166 | u64 extent_start, extent_end, size; | 167 | u64 extent_start, extent_end, size; |
167 | int ret; | 168 | int ret; |
168 | 169 | ||
169 | mutex_lock(&info->pinned_mutex); | ||
170 | while (start < end) { | 170 | while (start < end) { |
171 | ret = find_first_extent_bit(&info->pinned_extents, start, | 171 | ret = find_first_extent_bit(&info->pinned_extents, start, |
172 | &extent_start, &extent_end, | 172 | &extent_start, &extent_end, |
@@ -192,7 +192,6 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
192 | ret = btrfs_add_free_space(block_group, start, size); | 192 | ret = btrfs_add_free_space(block_group, start, size); |
193 | BUG_ON(ret); | 193 | BUG_ON(ret); |
194 | } | 194 | } |
195 | mutex_unlock(&info->pinned_mutex); | ||
196 | 195 | ||
197 | return 0; | 196 | return 0; |
198 | } | 197 | } |
@@ -291,8 +290,8 @@ next: | |||
291 | block_group->key.objectid + | 290 | block_group->key.objectid + |
292 | block_group->key.offset); | 291 | block_group->key.offset); |
293 | 292 | ||
294 | remove_sb_from_cache(root, block_group); | ||
295 | block_group->cached = 1; | 293 | block_group->cached = 1; |
294 | remove_sb_from_cache(root, block_group); | ||
296 | ret = 0; | 295 | ret = 0; |
297 | err: | 296 | err: |
298 | btrfs_free_path(path); | 297 | btrfs_free_path(path); |
@@ -326,7 +325,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( | |||
326 | return cache; | 325 | return cache; |
327 | } | 326 | } |
328 | 327 | ||
329 | static inline void put_block_group(struct btrfs_block_group_cache *cache) | 328 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
330 | { | 329 | { |
331 | if (atomic_dec_and_test(&cache->count)) | 330 | if (atomic_dec_and_test(&cache->count)) |
332 | kfree(cache); | 331 | kfree(cache); |
@@ -399,12 +398,12 @@ again: | |||
399 | div_factor(cache->key.offset, factor)) { | 398 | div_factor(cache->key.offset, factor)) { |
400 | group_start = cache->key.objectid; | 399 | group_start = cache->key.objectid; |
401 | spin_unlock(&cache->lock); | 400 | spin_unlock(&cache->lock); |
402 | put_block_group(cache); | 401 | btrfs_put_block_group(cache); |
403 | goto found; | 402 | goto found; |
404 | } | 403 | } |
405 | } | 404 | } |
406 | spin_unlock(&cache->lock); | 405 | spin_unlock(&cache->lock); |
407 | put_block_group(cache); | 406 | btrfs_put_block_group(cache); |
408 | cond_resched(); | 407 | cond_resched(); |
409 | } | 408 | } |
410 | if (!wrapped) { | 409 | if (!wrapped) { |
@@ -1594,7 +1593,7 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | |||
1594 | if (!block_group || block_group->ro) | 1593 | if (!block_group || block_group->ro) |
1595 | readonly = 1; | 1594 | readonly = 1; |
1596 | if (block_group) | 1595 | if (block_group) |
1597 | put_block_group(block_group); | 1596 | btrfs_put_block_group(block_group); |
1598 | return readonly; | 1597 | return readonly; |
1599 | } | 1598 | } |
1600 | 1599 | ||
@@ -2018,7 +2017,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
2018 | WARN_ON(ret); | 2017 | WARN_ON(ret); |
2019 | } | 2018 | } |
2020 | } | 2019 | } |
2021 | put_block_group(cache); | 2020 | btrfs_put_block_group(cache); |
2022 | total -= num_bytes; | 2021 | total -= num_bytes; |
2023 | bytenr += num_bytes; | 2022 | bytenr += num_bytes; |
2024 | } | 2023 | } |
@@ -2035,7 +2034,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
2035 | return 0; | 2034 | return 0; |
2036 | 2035 | ||
2037 | bytenr = cache->key.objectid; | 2036 | bytenr = cache->key.objectid; |
2038 | put_block_group(cache); | 2037 | btrfs_put_block_group(cache); |
2039 | 2038 | ||
2040 | return bytenr; | 2039 | return bytenr; |
2041 | } | 2040 | } |
@@ -2047,7 +2046,6 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2047 | struct btrfs_block_group_cache *cache; | 2046 | struct btrfs_block_group_cache *cache; |
2048 | struct btrfs_fs_info *fs_info = root->fs_info; | 2047 | struct btrfs_fs_info *fs_info = root->fs_info; |
2049 | 2048 | ||
2050 | WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex)); | ||
2051 | if (pin) { | 2049 | if (pin) { |
2052 | set_extent_dirty(&fs_info->pinned_extents, | 2050 | set_extent_dirty(&fs_info->pinned_extents, |
2053 | bytenr, bytenr + num - 1, GFP_NOFS); | 2051 | bytenr, bytenr + num - 1, GFP_NOFS); |
@@ -2055,7 +2053,6 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2055 | clear_extent_dirty(&fs_info->pinned_extents, | 2053 | clear_extent_dirty(&fs_info->pinned_extents, |
2056 | bytenr, bytenr + num - 1, GFP_NOFS); | 2054 | bytenr, bytenr + num - 1, GFP_NOFS); |
2057 | } | 2055 | } |
2058 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2059 | 2056 | ||
2060 | while (num > 0) { | 2057 | while (num > 0) { |
2061 | cache = btrfs_lookup_block_group(fs_info, bytenr); | 2058 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
@@ -2081,7 +2078,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2081 | if (cache->cached) | 2078 | if (cache->cached) |
2082 | btrfs_add_free_space(cache, bytenr, len); | 2079 | btrfs_add_free_space(cache, bytenr, len); |
2083 | } | 2080 | } |
2084 | put_block_group(cache); | 2081 | btrfs_put_block_group(cache); |
2085 | bytenr += len; | 2082 | bytenr += len; |
2086 | num -= len; | 2083 | num -= len; |
2087 | } | 2084 | } |
@@ -2112,7 +2109,7 @@ static int update_reserved_extents(struct btrfs_root *root, | |||
2112 | } | 2109 | } |
2113 | spin_unlock(&cache->lock); | 2110 | spin_unlock(&cache->lock); |
2114 | spin_unlock(&cache->space_info->lock); | 2111 | spin_unlock(&cache->space_info->lock); |
2115 | put_block_group(cache); | 2112 | btrfs_put_block_group(cache); |
2116 | bytenr += len; | 2113 | bytenr += len; |
2117 | num -= len; | 2114 | num -= len; |
2118 | } | 2115 | } |
@@ -2127,7 +2124,6 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
2127 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | 2124 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; |
2128 | int ret; | 2125 | int ret; |
2129 | 2126 | ||
2130 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2131 | while (1) { | 2127 | while (1) { |
2132 | ret = find_first_extent_bit(pinned_extents, last, | 2128 | ret = find_first_extent_bit(pinned_extents, last, |
2133 | &start, &end, EXTENT_DIRTY); | 2129 | &start, &end, EXTENT_DIRTY); |
@@ -2136,7 +2132,6 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
2136 | set_extent_dirty(copy, start, end, GFP_NOFS); | 2132 | set_extent_dirty(copy, start, end, GFP_NOFS); |
2137 | last = end + 1; | 2133 | last = end + 1; |
2138 | } | 2134 | } |
2139 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2140 | return 0; | 2135 | return 0; |
2141 | } | 2136 | } |
2142 | 2137 | ||
@@ -2149,7 +2144,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2149 | int ret; | 2144 | int ret; |
2150 | 2145 | ||
2151 | while (1) { | 2146 | while (1) { |
2152 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2153 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 2147 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
2154 | EXTENT_DIRTY); | 2148 | EXTENT_DIRTY); |
2155 | if (ret) | 2149 | if (ret) |
@@ -2163,7 +2157,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2163 | 2157 | ||
2164 | cond_resched(); | 2158 | cond_resched(); |
2165 | } | 2159 | } |
2166 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2167 | return ret; | 2160 | return ret; |
2168 | } | 2161 | } |
2169 | 2162 | ||
@@ -2205,7 +2198,6 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
2205 | free_extent_buffer(buf); | 2198 | free_extent_buffer(buf); |
2206 | pinit: | 2199 | pinit: |
2207 | btrfs_set_path_blocking(path); | 2200 | btrfs_set_path_blocking(path); |
2208 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2209 | /* unlocks the pinned mutex */ | 2201 | /* unlocks the pinned mutex */ |
2210 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 2202 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); |
2211 | 2203 | ||
@@ -2511,8 +2503,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2511 | */ | 2503 | */ |
2512 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && | 2504 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && |
2513 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | 2505 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { |
2514 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2515 | |||
2516 | /* unlocks the pinned mutex */ | 2506 | /* unlocks the pinned mutex */ |
2517 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 2507 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); |
2518 | update_reserved_extents(root, bytenr, num_bytes, 0); | 2508 | update_reserved_extents(root, bytenr, num_bytes, 0); |
@@ -2554,228 +2544,237 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
2554 | { | 2544 | { |
2555 | int ret = 0; | 2545 | int ret = 0; |
2556 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 2546 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
2557 | u64 total_needed = num_bytes; | 2547 | struct btrfs_free_cluster *last_ptr = NULL; |
2558 | u64 *last_ptr = NULL; | ||
2559 | u64 last_wanted = 0; | ||
2560 | struct btrfs_block_group_cache *block_group = NULL; | 2548 | struct btrfs_block_group_cache *block_group = NULL; |
2561 | int chunk_alloc_done = 0; | ||
2562 | int empty_cluster = 2 * 1024 * 1024; | 2549 | int empty_cluster = 2 * 1024 * 1024; |
2563 | int allowed_chunk_alloc = 0; | 2550 | int allowed_chunk_alloc = 0; |
2564 | struct list_head *head = NULL, *cur = NULL; | ||
2565 | int loop = 0; | ||
2566 | int extra_loop = 0; | ||
2567 | struct btrfs_space_info *space_info; | 2551 | struct btrfs_space_info *space_info; |
2552 | int last_ptr_loop = 0; | ||
2553 | int loop = 0; | ||
2568 | 2554 | ||
2569 | WARN_ON(num_bytes < root->sectorsize); | 2555 | WARN_ON(num_bytes < root->sectorsize); |
2570 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 2556 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
2571 | ins->objectid = 0; | 2557 | ins->objectid = 0; |
2572 | ins->offset = 0; | 2558 | ins->offset = 0; |
2573 | 2559 | ||
2560 | space_info = __find_space_info(root->fs_info, data); | ||
2561 | |||
2574 | if (orig_root->ref_cows || empty_size) | 2562 | if (orig_root->ref_cows || empty_size) |
2575 | allowed_chunk_alloc = 1; | 2563 | allowed_chunk_alloc = 1; |
2576 | 2564 | ||
2577 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 2565 | if (data & BTRFS_BLOCK_GROUP_METADATA) { |
2578 | last_ptr = &root->fs_info->last_alloc; | 2566 | last_ptr = &root->fs_info->meta_alloc_cluster; |
2579 | if (!btrfs_test_opt(root, SSD)) | 2567 | if (!btrfs_test_opt(root, SSD)) |
2580 | empty_cluster = 64 * 1024; | 2568 | empty_cluster = 64 * 1024; |
2581 | } | 2569 | } |
2582 | 2570 | ||
2583 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) | 2571 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { |
2584 | last_ptr = &root->fs_info->last_data_alloc; | 2572 | last_ptr = &root->fs_info->data_alloc_cluster; |
2573 | } | ||
2585 | 2574 | ||
2586 | if (last_ptr) { | 2575 | if (last_ptr) { |
2587 | if (*last_ptr) { | 2576 | spin_lock(&last_ptr->lock); |
2588 | hint_byte = *last_ptr; | 2577 | if (last_ptr->block_group) |
2589 | last_wanted = *last_ptr; | 2578 | hint_byte = last_ptr->window_start; |
2590 | } else | 2579 | spin_unlock(&last_ptr->lock); |
2591 | empty_size += empty_cluster; | ||
2592 | } else { | ||
2593 | empty_cluster = 0; | ||
2594 | } | 2580 | } |
2581 | |||
2595 | search_start = max(search_start, first_logical_byte(root, 0)); | 2582 | search_start = max(search_start, first_logical_byte(root, 0)); |
2596 | search_start = max(search_start, hint_byte); | 2583 | search_start = max(search_start, hint_byte); |
2597 | 2584 | ||
2598 | if (last_wanted && search_start != last_wanted) { | 2585 | if (!last_ptr) { |
2599 | last_wanted = 0; | 2586 | empty_cluster = 0; |
2600 | empty_size += empty_cluster; | 2587 | loop = 1; |
2601 | } | 2588 | } |
2602 | 2589 | ||
2603 | total_needed += empty_size; | 2590 | if (search_start == hint_byte) { |
2604 | block_group = btrfs_lookup_block_group(root->fs_info, search_start); | 2591 | block_group = btrfs_lookup_block_group(root->fs_info, |
2605 | if (!block_group) | 2592 | search_start); |
2606 | block_group = btrfs_lookup_first_block_group(root->fs_info, | 2593 | if (block_group && block_group_bits(block_group, data)) { |
2607 | search_start); | 2594 | down_read(&space_info->groups_sem); |
2608 | space_info = __find_space_info(root->fs_info, data); | 2595 | goto have_block_group; |
2596 | } else if (block_group) { | ||
2597 | btrfs_put_block_group(block_group); | ||
2598 | } | ||
2599 | } | ||
2609 | 2600 | ||
2601 | search: | ||
2610 | down_read(&space_info->groups_sem); | 2602 | down_read(&space_info->groups_sem); |
2611 | while (1) { | 2603 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
2612 | struct btrfs_free_space *free_space; | 2604 | u64 offset; |
2613 | /* | ||
2614 | * the only way this happens if our hint points to a block | ||
2615 | * group thats not of the proper type, while looping this | ||
2616 | * should never happen | ||
2617 | */ | ||
2618 | if (empty_size) | ||
2619 | extra_loop = 1; | ||
2620 | 2605 | ||
2621 | if (!block_group) | 2606 | atomic_inc(&block_group->count); |
2622 | goto new_group_no_lock; | 2607 | search_start = block_group->key.objectid; |
2623 | 2608 | ||
2609 | have_block_group: | ||
2624 | if (unlikely(!block_group->cached)) { | 2610 | if (unlikely(!block_group->cached)) { |
2625 | mutex_lock(&block_group->cache_mutex); | 2611 | mutex_lock(&block_group->cache_mutex); |
2626 | ret = cache_block_group(root, block_group); | 2612 | ret = cache_block_group(root, block_group); |
2627 | mutex_unlock(&block_group->cache_mutex); | 2613 | mutex_unlock(&block_group->cache_mutex); |
2628 | if (ret) | 2614 | if (ret) { |
2615 | btrfs_put_block_group(block_group); | ||
2629 | break; | 2616 | break; |
2617 | } | ||
2630 | } | 2618 | } |
2631 | 2619 | ||
2632 | mutex_lock(&block_group->alloc_mutex); | ||
2633 | if (unlikely(!block_group_bits(block_group, data))) | ||
2634 | goto new_group; | ||
2635 | |||
2636 | if (unlikely(block_group->ro)) | 2620 | if (unlikely(block_group->ro)) |
2637 | goto new_group; | 2621 | goto loop; |
2638 | 2622 | ||
2639 | free_space = btrfs_find_free_space(block_group, search_start, | 2623 | if (last_ptr) { |
2640 | total_needed); | 2624 | /* |
2641 | if (free_space) { | 2625 | * the refill lock keeps out other |
2642 | u64 start = block_group->key.objectid; | 2626 | * people trying to start a new cluster |
2643 | u64 end = block_group->key.objectid + | 2627 | */ |
2644 | block_group->key.offset; | 2628 | spin_lock(&last_ptr->refill_lock); |
2629 | offset = btrfs_alloc_from_cluster(block_group, last_ptr, | ||
2630 | num_bytes, search_start); | ||
2631 | if (offset) { | ||
2632 | /* we have a block, we're done */ | ||
2633 | spin_unlock(&last_ptr->refill_lock); | ||
2634 | goto checks; | ||
2635 | } | ||
2645 | 2636 | ||
2646 | search_start = stripe_align(root, free_space->offset); | 2637 | spin_lock(&last_ptr->lock); |
2638 | /* | ||
2639 | * whoops, this cluster doesn't actually point to | ||
2640 | * this block group. Get a ref on the block | ||
2641 | * group is does point to and try again | ||
2642 | */ | ||
2643 | if (!last_ptr_loop && last_ptr->block_group && | ||
2644 | last_ptr->block_group != block_group) { | ||
2645 | |||
2646 | btrfs_put_block_group(block_group); | ||
2647 | block_group = last_ptr->block_group; | ||
2648 | atomic_inc(&block_group->count); | ||
2649 | spin_unlock(&last_ptr->lock); | ||
2650 | spin_unlock(&last_ptr->refill_lock); | ||
2651 | |||
2652 | last_ptr_loop = 1; | ||
2653 | search_start = block_group->key.objectid; | ||
2654 | goto have_block_group; | ||
2655 | } | ||
2656 | spin_unlock(&last_ptr->lock); | ||
2647 | 2657 | ||
2648 | /* move on to the next group */ | 2658 | /* |
2649 | if (search_start + num_bytes >= search_end) | 2659 | * this cluster didn't work out, free it and |
2650 | goto new_group; | 2660 | * start over |
2661 | */ | ||
2662 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | ||
2651 | 2663 | ||
2652 | /* move on to the next group */ | 2664 | last_ptr_loop = 0; |
2653 | if (search_start + num_bytes > end) | ||
2654 | goto new_group; | ||
2655 | 2665 | ||
2656 | if (last_wanted && search_start != last_wanted) { | 2666 | /* allocate a cluster in this block group */ |
2657 | total_needed += empty_cluster; | 2667 | ret = btrfs_find_space_cluster(trans, |
2658 | empty_size += empty_cluster; | 2668 | block_group, last_ptr, |
2659 | last_wanted = 0; | 2669 | offset, num_bytes, |
2670 | empty_cluster + empty_size); | ||
2671 | if (ret == 0) { | ||
2660 | /* | 2672 | /* |
2661 | * if search_start is still in this block group | 2673 | * now pull our allocation out of this |
2662 | * then we just re-search this block group | 2674 | * cluster |
2663 | */ | 2675 | */ |
2664 | if (search_start >= start && | 2676 | offset = btrfs_alloc_from_cluster(block_group, |
2665 | search_start < end) { | 2677 | last_ptr, num_bytes, |
2666 | mutex_unlock(&block_group->alloc_mutex); | 2678 | search_start); |
2667 | continue; | 2679 | if (offset) { |
2680 | /* we found one, proceed */ | ||
2681 | spin_unlock(&last_ptr->refill_lock); | ||
2682 | goto checks; | ||
2668 | } | 2683 | } |
2669 | |||
2670 | /* else we go to the next block group */ | ||
2671 | goto new_group; | ||
2672 | } | 2684 | } |
2673 | 2685 | /* | |
2674 | if (exclude_nr > 0 && | 2686 | * at this point we either didn't find a cluster |
2675 | (search_start + num_bytes > exclude_start && | 2687 | * or we weren't able to allocate a block from our |
2676 | search_start < exclude_start + exclude_nr)) { | 2688 | * cluster. Free the cluster we've been trying |
2677 | search_start = exclude_start + exclude_nr; | 2689 | * to use, and go to the next block group |
2678 | /* | 2690 | */ |
2679 | * if search_start is still in this block group | 2691 | if (loop < 2) { |
2680 | * then we just re-search this block group | 2692 | btrfs_return_cluster_to_free_space(NULL, |
2681 | */ | 2693 | last_ptr); |
2682 | if (search_start >= start && | 2694 | spin_unlock(&last_ptr->refill_lock); |
2683 | search_start < end) { | 2695 | goto loop; |
2684 | mutex_unlock(&block_group->alloc_mutex); | ||
2685 | last_wanted = 0; | ||
2686 | continue; | ||
2687 | } | ||
2688 | |||
2689 | /* else we go to the next block group */ | ||
2690 | goto new_group; | ||
2691 | } | 2696 | } |
2697 | spin_unlock(&last_ptr->refill_lock); | ||
2698 | } | ||
2692 | 2699 | ||
2693 | ins->objectid = search_start; | 2700 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
2694 | ins->offset = num_bytes; | 2701 | num_bytes, empty_size); |
2702 | if (!offset) | ||
2703 | goto loop; | ||
2704 | checks: | ||
2705 | search_start = stripe_align(root, offset); | ||
2706 | |||
2707 | /* move on to the next group */ | ||
2708 | if (search_start + num_bytes >= search_end) { | ||
2709 | btrfs_add_free_space(block_group, offset, num_bytes); | ||
2710 | goto loop; | ||
2711 | } | ||
2695 | 2712 | ||
2696 | btrfs_remove_free_space_lock(block_group, search_start, | 2713 | /* move on to the next group */ |
2697 | num_bytes); | 2714 | if (search_start + num_bytes > |
2698 | /* we are all good, lets return */ | 2715 | block_group->key.objectid + block_group->key.offset) { |
2699 | mutex_unlock(&block_group->alloc_mutex); | 2716 | btrfs_add_free_space(block_group, offset, num_bytes); |
2700 | break; | 2717 | goto loop; |
2701 | } | 2718 | } |
2702 | new_group: | ||
2703 | mutex_unlock(&block_group->alloc_mutex); | ||
2704 | put_block_group(block_group); | ||
2705 | block_group = NULL; | ||
2706 | new_group_no_lock: | ||
2707 | /* don't try to compare new allocations against the | ||
2708 | * last allocation any more | ||
2709 | */ | ||
2710 | last_wanted = 0; | ||
2711 | 2719 | ||
2712 | /* | 2720 | if (exclude_nr > 0 && |
2713 | * Here's how this works. | 2721 | (search_start + num_bytes > exclude_start && |
2714 | * loop == 0: we were searching a block group via a hint | 2722 | search_start < exclude_start + exclude_nr)) { |
2715 | * and didn't find anything, so we start at | 2723 | search_start = exclude_start + exclude_nr; |
2716 | * the head of the block groups and keep searching | 2724 | |
2717 | * loop == 1: we're searching through all of the block groups | 2725 | btrfs_add_free_space(block_group, offset, num_bytes); |
2718 | * if we hit the head again we have searched | 2726 | /* |
2719 | * all of the block groups for this space and we | 2727 | * if search_start is still in this block group |
2720 | * need to try and allocate, if we cant error out. | 2728 | * then we just re-search this block group |
2721 | * loop == 2: we allocated more space and are looping through | ||
2722 | * all of the block groups again. | ||
2723 | */ | ||
2724 | if (loop == 0) { | ||
2725 | head = &space_info->block_groups; | ||
2726 | cur = head->next; | ||
2727 | loop++; | ||
2728 | } else if (loop == 1 && cur == head) { | ||
2729 | int keep_going; | ||
2730 | |||
2731 | /* at this point we give up on the empty_size | ||
2732 | * allocations and just try to allocate the min | ||
2733 | * space. | ||
2734 | * | ||
2735 | * The extra_loop field was set if an empty_size | ||
2736 | * allocation was attempted above, and if this | ||
2737 | * is try we need to try the loop again without | ||
2738 | * the additional empty_size. | ||
2739 | */ | 2729 | */ |
2740 | total_needed -= empty_size; | 2730 | if (search_start >= block_group->key.objectid && |
2741 | empty_size = 0; | 2731 | search_start < (block_group->key.objectid + |
2742 | keep_going = extra_loop; | 2732 | block_group->key.offset)) |
2743 | loop++; | 2733 | goto have_block_group; |
2734 | goto loop; | ||
2735 | } | ||
2744 | 2736 | ||
2745 | if (allowed_chunk_alloc && !chunk_alloc_done) { | 2737 | ins->objectid = search_start; |
2746 | up_read(&space_info->groups_sem); | 2738 | ins->offset = num_bytes; |
2747 | ret = do_chunk_alloc(trans, root, num_bytes + | 2739 | |
2748 | 2 * 1024 * 1024, data, 1); | 2740 | if (offset < search_start) |
2749 | down_read(&space_info->groups_sem); | 2741 | btrfs_add_free_space(block_group, offset, |
2750 | if (ret < 0) | 2742 | search_start - offset); |
2751 | goto loop_check; | 2743 | BUG_ON(offset > search_start); |
2752 | head = &space_info->block_groups; | 2744 | |
2753 | /* | 2745 | /* we are all good, lets return */ |
2754 | * we've allocated a new chunk, keep | 2746 | break; |
2755 | * trying | 2747 | loop: |
2756 | */ | 2748 | btrfs_put_block_group(block_group); |
2757 | keep_going = 1; | 2749 | } |
2758 | chunk_alloc_done = 1; | 2750 | up_read(&space_info->groups_sem); |
2759 | } else if (!allowed_chunk_alloc) { | 2751 | |
2760 | space_info->force_alloc = 1; | 2752 | /* loop == 0, try to find a clustered alloc in every block group |
2761 | } | 2753 | * loop == 1, try again after forcing a chunk allocation |
2762 | loop_check: | 2754 | * loop == 2, set empty_size and empty_cluster to 0 and try again |
2763 | if (keep_going) { | 2755 | */ |
2764 | cur = head->next; | 2756 | if (!ins->objectid && loop < 3 && |
2765 | extra_loop = 0; | 2757 | (empty_size || empty_cluster || allowed_chunk_alloc)) { |
2766 | } else { | 2758 | if (loop >= 2) { |
2767 | break; | 2759 | empty_size = 0; |
2768 | } | 2760 | empty_cluster = 0; |
2769 | } else if (cur == head) { | ||
2770 | break; | ||
2771 | } | 2761 | } |
2772 | 2762 | ||
2773 | block_group = list_entry(cur, struct btrfs_block_group_cache, | 2763 | if (allowed_chunk_alloc) { |
2774 | list); | 2764 | ret = do_chunk_alloc(trans, root, num_bytes + |
2775 | atomic_inc(&block_group->count); | 2765 | 2 * 1024 * 1024, data, 1); |
2766 | allowed_chunk_alloc = 0; | ||
2767 | } else { | ||
2768 | space_info->force_alloc = 1; | ||
2769 | } | ||
2776 | 2770 | ||
2777 | search_start = block_group->key.objectid; | 2771 | if (loop < 3) { |
2778 | cur = cur->next; | 2772 | loop++; |
2773 | goto search; | ||
2774 | } | ||
2775 | ret = -ENOSPC; | ||
2776 | } else if (!ins->objectid) { | ||
2777 | ret = -ENOSPC; | ||
2779 | } | 2778 | } |
2780 | 2779 | ||
2781 | /* we found what we needed */ | 2780 | /* we found what we needed */ |
@@ -2783,21 +2782,10 @@ loop_check: | |||
2783 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | 2782 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) |
2784 | trans->block_group = block_group->key.objectid; | 2783 | trans->block_group = block_group->key.objectid; |
2785 | 2784 | ||
2786 | if (last_ptr) | 2785 | btrfs_put_block_group(block_group); |
2787 | *last_ptr = ins->objectid + ins->offset; | ||
2788 | ret = 0; | 2786 | ret = 0; |
2789 | } else if (!ret) { | ||
2790 | printk(KERN_ERR "btrfs searching for %llu bytes, " | ||
2791 | "num_bytes %llu, loop %d, allowed_alloc %d\n", | ||
2792 | (unsigned long long)total_needed, | ||
2793 | (unsigned long long)num_bytes, | ||
2794 | loop, allowed_chunk_alloc); | ||
2795 | ret = -ENOSPC; | ||
2796 | } | 2787 | } |
2797 | if (block_group) | ||
2798 | put_block_group(block_group); | ||
2799 | 2788 | ||
2800 | up_read(&space_info->groups_sem); | ||
2801 | return ret; | 2789 | return ret; |
2802 | } | 2790 | } |
2803 | 2791 | ||
@@ -2902,7 +2890,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
2902 | ret = btrfs_discard_extent(root, start, len); | 2890 | ret = btrfs_discard_extent(root, start, len); |
2903 | 2891 | ||
2904 | btrfs_add_free_space(cache, start, len); | 2892 | btrfs_add_free_space(cache, start, len); |
2905 | put_block_group(cache); | 2893 | btrfs_put_block_group(cache); |
2906 | update_reserved_extents(root, start, len, 0); | 2894 | update_reserved_extents(root, start, len, 0); |
2907 | 2895 | ||
2908 | return ret; | 2896 | return ret; |
@@ -3040,7 +3028,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | |||
3040 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 3028 | ret = btrfs_remove_free_space(block_group, ins->objectid, |
3041 | ins->offset); | 3029 | ins->offset); |
3042 | BUG_ON(ret); | 3030 | BUG_ON(ret); |
3043 | put_block_group(block_group); | 3031 | btrfs_put_block_group(block_group); |
3044 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 3032 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, |
3045 | ref_generation, owner, ins, 1); | 3033 | ref_generation, owner, ins, 1); |
3046 | return ret; | 3034 | return ret; |
@@ -5729,7 +5717,7 @@ next: | |||
5729 | WARN_ON(block_group->reserved > 0); | 5717 | WARN_ON(block_group->reserved > 0); |
5730 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | 5718 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); |
5731 | spin_unlock(&block_group->lock); | 5719 | spin_unlock(&block_group->lock); |
5732 | put_block_group(block_group); | 5720 | btrfs_put_block_group(block_group); |
5733 | ret = 0; | 5721 | ret = 0; |
5734 | out: | 5722 | out: |
5735 | btrfs_free_path(path); | 5723 | btrfs_free_path(path); |
@@ -5856,9 +5844,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
5856 | 5844 | ||
5857 | atomic_set(&cache->count, 1); | 5845 | atomic_set(&cache->count, 1); |
5858 | spin_lock_init(&cache->lock); | 5846 | spin_lock_init(&cache->lock); |
5859 | mutex_init(&cache->alloc_mutex); | 5847 | spin_lock_init(&cache->tree_lock); |
5860 | mutex_init(&cache->cache_mutex); | 5848 | mutex_init(&cache->cache_mutex); |
5861 | INIT_LIST_HEAD(&cache->list); | 5849 | INIT_LIST_HEAD(&cache->list); |
5850 | INIT_LIST_HEAD(&cache->cluster_list); | ||
5862 | read_extent_buffer(leaf, &cache->item, | 5851 | read_extent_buffer(leaf, &cache->item, |
5863 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 5852 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
5864 | sizeof(cache->item)); | 5853 | sizeof(cache->item)); |
@@ -5912,9 +5901,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
5912 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 5901 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
5913 | atomic_set(&cache->count, 1); | 5902 | atomic_set(&cache->count, 1); |
5914 | spin_lock_init(&cache->lock); | 5903 | spin_lock_init(&cache->lock); |
5915 | mutex_init(&cache->alloc_mutex); | 5904 | spin_lock_init(&cache->tree_lock); |
5916 | mutex_init(&cache->cache_mutex); | 5905 | mutex_init(&cache->cache_mutex); |
5917 | INIT_LIST_HEAD(&cache->list); | 5906 | INIT_LIST_HEAD(&cache->list); |
5907 | INIT_LIST_HEAD(&cache->cluster_list); | ||
5918 | 5908 | ||
5919 | btrfs_set_block_group_used(&cache->item, bytes_used); | 5909 | btrfs_set_block_group_used(&cache->item, bytes_used); |
5920 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); | 5910 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); |
@@ -5974,8 +5964,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
5974 | spin_unlock(&block_group->space_info->lock); | 5964 | spin_unlock(&block_group->space_info->lock); |
5975 | block_group->space_info->full = 0; | 5965 | block_group->space_info->full = 0; |
5976 | 5966 | ||
5977 | put_block_group(block_group); | 5967 | btrfs_put_block_group(block_group); |
5978 | put_block_group(block_group); | 5968 | btrfs_put_block_group(block_group); |
5979 | 5969 | ||
5980 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 5970 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
5981 | if (ret > 0) | 5971 | if (ret > 0) |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 08085af089e2..eb2bee8b7fbf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2884,25 +2884,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2884 | disko = 0; | 2884 | disko = 0; |
2885 | flags = 0; | 2885 | flags = 0; |
2886 | 2886 | ||
2887 | switch (em->block_start) { | 2887 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2888 | case EXTENT_MAP_LAST_BYTE: | ||
2889 | end = 1; | 2888 | end = 1; |
2890 | flags |= FIEMAP_EXTENT_LAST; | 2889 | flags |= FIEMAP_EXTENT_LAST; |
2891 | break; | 2890 | } else if (em->block_start == EXTENT_MAP_HOLE) { |
2892 | case EXTENT_MAP_HOLE: | ||
2893 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 2891 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
2894 | break; | 2892 | } else if (em->block_start == EXTENT_MAP_INLINE) { |
2895 | case EXTENT_MAP_INLINE: | ||
2896 | flags |= (FIEMAP_EXTENT_DATA_INLINE | | 2893 | flags |= (FIEMAP_EXTENT_DATA_INLINE | |
2897 | FIEMAP_EXTENT_NOT_ALIGNED); | 2894 | FIEMAP_EXTENT_NOT_ALIGNED); |
2898 | break; | 2895 | } else if (em->block_start == EXTENT_MAP_DELALLOC) { |
2899 | case EXTENT_MAP_DELALLOC: | ||
2900 | flags |= (FIEMAP_EXTENT_DELALLOC | | 2896 | flags |= (FIEMAP_EXTENT_DELALLOC | |
2901 | FIEMAP_EXTENT_UNKNOWN); | 2897 | FIEMAP_EXTENT_UNKNOWN); |
2902 | break; | 2898 | } else { |
2903 | default: | ||
2904 | disko = em->block_start; | 2899 | disko = em->block_start; |
2905 | break; | ||
2906 | } | 2900 | } |
2907 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2901 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2908 | flags |= FIEMAP_EXTENT_ENCODED; | 2902 | flags |= FIEMAP_EXTENT_ENCODED; |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 50da69da20ce..b187917b36fa 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -234,7 +234,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
234 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | 234 | rb = tree_insert(&tree->map, em->start, &em->rb_node); |
235 | if (rb) { | 235 | if (rb) { |
236 | ret = -EEXIST; | 236 | ret = -EEXIST; |
237 | free_extent_map(merge); | ||
238 | goto out; | 237 | goto out; |
239 | } | 238 | } |
240 | atomic_inc(&em->refs); | 239 | atomic_inc(&em->refs); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d1e5f0e84c58..768b9523662d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -18,6 +18,15 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include "ctree.h" | 20 | #include "ctree.h" |
21 | #include "free-space-cache.h" | ||
22 | #include "transaction.h" | ||
23 | |||
24 | struct btrfs_free_space { | ||
25 | struct rb_node bytes_index; | ||
26 | struct rb_node offset_index; | ||
27 | u64 offset; | ||
28 | u64 bytes; | ||
29 | }; | ||
21 | 30 | ||
22 | static int tree_insert_offset(struct rb_root *root, u64 offset, | 31 | static int tree_insert_offset(struct rb_root *root, u64 offset, |
23 | struct rb_node *node) | 32 | struct rb_node *node) |
@@ -68,14 +77,24 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes, | |||
68 | } | 77 | } |
69 | 78 | ||
70 | /* | 79 | /* |
71 | * searches the tree for the given offset. If contains is set we will return | 80 | * searches the tree for the given offset. |
72 | * the free space that contains the given offset. If contains is not set we | 81 | * |
73 | * will return the free space that starts at or after the given offset and is | 82 | * fuzzy == 1: this is used for allocations where we are given a hint of where |
74 | * at least bytes long. | 83 | * to look for free space. Because the hint may not be completely on an offset |
84 | * mark, or the hint may no longer point to free space we need to fudge our | ||
85 | * results a bit. So we look for free space starting at or after offset with at | ||
86 | * least bytes size. We prefer to find as close to the given offset as we can. | ||
87 | * Also if the offset is within a free space range, then we will return the free | ||
88 | * space that contains the given offset, which means we can return a free space | ||
89 | * chunk with an offset before the provided offset. | ||
90 | * | ||
91 | * fuzzy == 0: this is just a normal tree search. Give us the free space that | ||
92 | * starts at the given offset which is at least bytes size, and if its not there | ||
93 | * return NULL. | ||
75 | */ | 94 | */ |
76 | static struct btrfs_free_space *tree_search_offset(struct rb_root *root, | 95 | static struct btrfs_free_space *tree_search_offset(struct rb_root *root, |
77 | u64 offset, u64 bytes, | 96 | u64 offset, u64 bytes, |
78 | int contains) | 97 | int fuzzy) |
79 | { | 98 | { |
80 | struct rb_node *n = root->rb_node; | 99 | struct rb_node *n = root->rb_node; |
81 | struct btrfs_free_space *entry, *ret = NULL; | 100 | struct btrfs_free_space *entry, *ret = NULL; |
@@ -84,13 +103,14 @@ static struct btrfs_free_space *tree_search_offset(struct rb_root *root, | |||
84 | entry = rb_entry(n, struct btrfs_free_space, offset_index); | 103 | entry = rb_entry(n, struct btrfs_free_space, offset_index); |
85 | 104 | ||
86 | if (offset < entry->offset) { | 105 | if (offset < entry->offset) { |
87 | if (!contains && | 106 | if (fuzzy && |
88 | (!ret || entry->offset < ret->offset) && | 107 | (!ret || entry->offset < ret->offset) && |
89 | (bytes <= entry->bytes)) | 108 | (bytes <= entry->bytes)) |
90 | ret = entry; | 109 | ret = entry; |
91 | n = n->rb_left; | 110 | n = n->rb_left; |
92 | } else if (offset > entry->offset) { | 111 | } else if (offset > entry->offset) { |
93 | if ((entry->offset + entry->bytes - 1) >= offset && | 112 | if (fuzzy && |
113 | (entry->offset + entry->bytes - 1) >= offset && | ||
94 | bytes <= entry->bytes) { | 114 | bytes <= entry->bytes) { |
95 | ret = entry; | 115 | ret = entry; |
96 | break; | 116 | break; |
@@ -171,6 +191,7 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
171 | int ret = 0; | 191 | int ret = 0; |
172 | 192 | ||
173 | 193 | ||
194 | BUG_ON(!info->bytes); | ||
174 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, | 195 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, |
175 | &info->offset_index); | 196 | &info->offset_index); |
176 | if (ret) | 197 | if (ret) |
@@ -184,108 +205,70 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
184 | return ret; | 205 | return ret; |
185 | } | 206 | } |
186 | 207 | ||
187 | static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 208 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, |
188 | u64 offset, u64 bytes) | 209 | u64 offset, u64 bytes) |
189 | { | 210 | { |
190 | struct btrfs_free_space *right_info; | 211 | struct btrfs_free_space *right_info; |
191 | struct btrfs_free_space *left_info; | 212 | struct btrfs_free_space *left_info; |
192 | struct btrfs_free_space *info = NULL; | 213 | struct btrfs_free_space *info = NULL; |
193 | struct btrfs_free_space *alloc_info; | ||
194 | int ret = 0; | 214 | int ret = 0; |
195 | 215 | ||
196 | alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 216 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); |
197 | if (!alloc_info) | 217 | if (!info) |
198 | return -ENOMEM; | 218 | return -ENOMEM; |
199 | 219 | ||
220 | info->offset = offset; | ||
221 | info->bytes = bytes; | ||
222 | |||
223 | spin_lock(&block_group->tree_lock); | ||
224 | |||
200 | /* | 225 | /* |
201 | * first we want to see if there is free space adjacent to the range we | 226 | * first we want to see if there is free space adjacent to the range we |
202 | * are adding, if there is remove that struct and add a new one to | 227 | * are adding, if there is remove that struct and add a new one to |
203 | * cover the entire range | 228 | * cover the entire range |
204 | */ | 229 | */ |
205 | right_info = tree_search_offset(&block_group->free_space_offset, | 230 | right_info = tree_search_offset(&block_group->free_space_offset, |
206 | offset+bytes, 0, 1); | 231 | offset+bytes, 0, 0); |
207 | left_info = tree_search_offset(&block_group->free_space_offset, | 232 | left_info = tree_search_offset(&block_group->free_space_offset, |
208 | offset-1, 0, 1); | 233 | offset-1, 0, 1); |
209 | 234 | ||
210 | if (right_info && right_info->offset == offset+bytes) { | 235 | if (right_info) { |
211 | unlink_free_space(block_group, right_info); | 236 | unlink_free_space(block_group, right_info); |
212 | info = right_info; | 237 | info->bytes += right_info->bytes; |
213 | info->offset = offset; | 238 | kfree(right_info); |
214 | info->bytes += bytes; | ||
215 | } else if (right_info && right_info->offset != offset+bytes) { | ||
216 | printk(KERN_ERR "btrfs adding space in the middle of an " | ||
217 | "existing free space area. existing: " | ||
218 | "offset=%llu, bytes=%llu. new: offset=%llu, " | ||
219 | "bytes=%llu\n", (unsigned long long)right_info->offset, | ||
220 | (unsigned long long)right_info->bytes, | ||
221 | (unsigned long long)offset, | ||
222 | (unsigned long long)bytes); | ||
223 | BUG(); | ||
224 | } | 239 | } |
225 | 240 | ||
226 | if (left_info) { | 241 | if (left_info && left_info->offset + left_info->bytes == offset) { |
227 | unlink_free_space(block_group, left_info); | 242 | unlink_free_space(block_group, left_info); |
228 | 243 | info->offset = left_info->offset; | |
229 | if (unlikely((left_info->offset + left_info->bytes) != | 244 | info->bytes += left_info->bytes; |
230 | offset)) { | 245 | kfree(left_info); |
231 | printk(KERN_ERR "btrfs free space to the left " | ||
232 | "of new free space isn't " | ||
233 | "quite right. existing: offset=%llu, " | ||
234 | "bytes=%llu. new: offset=%llu, bytes=%llu\n", | ||
235 | (unsigned long long)left_info->offset, | ||
236 | (unsigned long long)left_info->bytes, | ||
237 | (unsigned long long)offset, | ||
238 | (unsigned long long)bytes); | ||
239 | BUG(); | ||
240 | } | ||
241 | |||
242 | if (info) { | ||
243 | info->offset = left_info->offset; | ||
244 | info->bytes += left_info->bytes; | ||
245 | kfree(left_info); | ||
246 | } else { | ||
247 | info = left_info; | ||
248 | info->bytes += bytes; | ||
249 | } | ||
250 | } | 246 | } |
251 | 247 | ||
252 | if (info) { | ||
253 | ret = link_free_space(block_group, info); | ||
254 | if (!ret) | ||
255 | info = NULL; | ||
256 | goto out; | ||
257 | } | ||
258 | |||
259 | info = alloc_info; | ||
260 | alloc_info = NULL; | ||
261 | info->offset = offset; | ||
262 | info->bytes = bytes; | ||
263 | |||
264 | ret = link_free_space(block_group, info); | 248 | ret = link_free_space(block_group, info); |
265 | if (ret) | 249 | if (ret) |
266 | kfree(info); | 250 | kfree(info); |
267 | out: | 251 | |
252 | spin_unlock(&block_group->tree_lock); | ||
253 | |||
268 | if (ret) { | 254 | if (ret) { |
269 | printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); | 255 | printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); |
270 | if (ret == -EEXIST) | 256 | BUG_ON(ret == -EEXIST); |
271 | BUG(); | ||
272 | } | 257 | } |
273 | 258 | ||
274 | kfree(alloc_info); | ||
275 | |||
276 | return ret; | 259 | return ret; |
277 | } | 260 | } |
278 | 261 | ||
279 | static int | 262 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
280 | __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | 263 | u64 offset, u64 bytes) |
281 | u64 offset, u64 bytes) | ||
282 | { | 264 | { |
283 | struct btrfs_free_space *info; | 265 | struct btrfs_free_space *info; |
284 | int ret = 0; | 266 | int ret = 0; |
285 | 267 | ||
268 | spin_lock(&block_group->tree_lock); | ||
269 | |||
286 | info = tree_search_offset(&block_group->free_space_offset, offset, 0, | 270 | info = tree_search_offset(&block_group->free_space_offset, offset, 0, |
287 | 1); | 271 | 1); |
288 | |||
289 | if (info && info->offset == offset) { | 272 | if (info && info->offset == offset) { |
290 | if (info->bytes < bytes) { | 273 | if (info->bytes < bytes) { |
291 | printk(KERN_ERR "Found free space at %llu, size %llu," | 274 | printk(KERN_ERR "Found free space at %llu, size %llu," |
@@ -295,12 +278,14 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
295 | (unsigned long long)bytes); | 278 | (unsigned long long)bytes); |
296 | WARN_ON(1); | 279 | WARN_ON(1); |
297 | ret = -EINVAL; | 280 | ret = -EINVAL; |
281 | spin_unlock(&block_group->tree_lock); | ||
298 | goto out; | 282 | goto out; |
299 | } | 283 | } |
300 | unlink_free_space(block_group, info); | 284 | unlink_free_space(block_group, info); |
301 | 285 | ||
302 | if (info->bytes == bytes) { | 286 | if (info->bytes == bytes) { |
303 | kfree(info); | 287 | kfree(info); |
288 | spin_unlock(&block_group->tree_lock); | ||
304 | goto out; | 289 | goto out; |
305 | } | 290 | } |
306 | 291 | ||
@@ -308,6 +293,7 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
308 | info->bytes -= bytes; | 293 | info->bytes -= bytes; |
309 | 294 | ||
310 | ret = link_free_space(block_group, info); | 295 | ret = link_free_space(block_group, info); |
296 | spin_unlock(&block_group->tree_lock); | ||
311 | BUG_ON(ret); | 297 | BUG_ON(ret); |
312 | } else if (info && info->offset < offset && | 298 | } else if (info && info->offset < offset && |
313 | info->offset + info->bytes >= offset + bytes) { | 299 | info->offset + info->bytes >= offset + bytes) { |
@@ -333,70 +319,33 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
333 | */ | 319 | */ |
334 | kfree(info); | 320 | kfree(info); |
335 | } | 321 | } |
336 | 322 | spin_unlock(&block_group->tree_lock); | |
337 | /* step two, insert a new info struct to cover anything | 323 | /* step two, insert a new info struct to cover anything |
338 | * before the hole | 324 | * before the hole |
339 | */ | 325 | */ |
340 | ret = __btrfs_add_free_space(block_group, old_start, | 326 | ret = btrfs_add_free_space(block_group, old_start, |
341 | offset - old_start); | 327 | offset - old_start); |
342 | BUG_ON(ret); | 328 | BUG_ON(ret); |
343 | } else { | 329 | } else { |
330 | spin_unlock(&block_group->tree_lock); | ||
331 | if (!info) { | ||
332 | printk(KERN_ERR "couldn't find space %llu to free\n", | ||
333 | (unsigned long long)offset); | ||
334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", | ||
335 | block_group->cached, block_group->key.objectid, | ||
336 | block_group->key.offset); | ||
337 | btrfs_dump_free_space(block_group, bytes); | ||
338 | } else if (info) { | ||
339 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " | ||
340 | "but wanted offset=%llu bytes=%llu\n", | ||
341 | info->offset, info->bytes, offset, bytes); | ||
342 | } | ||
344 | WARN_ON(1); | 343 | WARN_ON(1); |
345 | } | 344 | } |
346 | out: | 345 | out: |
347 | return ret; | 346 | return ret; |
348 | } | 347 | } |
349 | 348 | ||
350 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
351 | u64 offset, u64 bytes) | ||
352 | { | ||
353 | int ret; | ||
354 | struct btrfs_free_space *sp; | ||
355 | |||
356 | mutex_lock(&block_group->alloc_mutex); | ||
357 | ret = __btrfs_add_free_space(block_group, offset, bytes); | ||
358 | sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); | ||
359 | BUG_ON(!sp); | ||
360 | mutex_unlock(&block_group->alloc_mutex); | ||
361 | |||
362 | return ret; | ||
363 | } | ||
364 | |||
365 | int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
366 | u64 offset, u64 bytes) | ||
367 | { | ||
368 | int ret; | ||
369 | struct btrfs_free_space *sp; | ||
370 | |||
371 | ret = __btrfs_add_free_space(block_group, offset, bytes); | ||
372 | sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); | ||
373 | BUG_ON(!sp); | ||
374 | |||
375 | return ret; | ||
376 | } | ||
377 | |||
378 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
379 | u64 offset, u64 bytes) | ||
380 | { | ||
381 | int ret = 0; | ||
382 | |||
383 | mutex_lock(&block_group->alloc_mutex); | ||
384 | ret = __btrfs_remove_free_space(block_group, offset, bytes); | ||
385 | mutex_unlock(&block_group->alloc_mutex); | ||
386 | |||
387 | return ret; | ||
388 | } | ||
389 | |||
390 | int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
391 | u64 offset, u64 bytes) | ||
392 | { | ||
393 | int ret; | ||
394 | |||
395 | ret = __btrfs_remove_free_space(block_group, offset, bytes); | ||
396 | |||
397 | return ret; | ||
398 | } | ||
399 | |||
400 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | 349 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, |
401 | u64 bytes) | 350 | u64 bytes) |
402 | { | 351 | { |
@@ -408,6 +357,8 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
408 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 357 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
409 | if (info->bytes >= bytes) | 358 | if (info->bytes >= bytes) |
410 | count++; | 359 | count++; |
360 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, | ||
361 | info->bytes); | ||
411 | } | 362 | } |
412 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" | 363 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" |
413 | "\n", count); | 364 | "\n", count); |
@@ -428,68 +379,337 @@ u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) | |||
428 | return ret; | 379 | return ret; |
429 | } | 380 | } |
430 | 381 | ||
382 | /* | ||
383 | * for a given cluster, put all of its extents back into the free | ||
384 | * space cache. If the block group passed doesn't match the block group | ||
385 | * pointed to by the cluster, someone else raced in and freed the | ||
386 | * cluster already. In that case, we just return without changing anything | ||
387 | */ | ||
388 | static int | ||
389 | __btrfs_return_cluster_to_free_space( | ||
390 | struct btrfs_block_group_cache *block_group, | ||
391 | struct btrfs_free_cluster *cluster) | ||
392 | { | ||
393 | struct btrfs_free_space *entry; | ||
394 | struct rb_node *node; | ||
395 | |||
396 | spin_lock(&cluster->lock); | ||
397 | if (cluster->block_group != block_group) | ||
398 | goto out; | ||
399 | |||
400 | cluster->window_start = 0; | ||
401 | node = rb_first(&cluster->root); | ||
402 | while(node) { | ||
403 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
404 | node = rb_next(&entry->offset_index); | ||
405 | rb_erase(&entry->offset_index, &cluster->root); | ||
406 | link_free_space(block_group, entry); | ||
407 | } | ||
408 | list_del_init(&cluster->block_group_list); | ||
409 | |||
410 | btrfs_put_block_group(cluster->block_group); | ||
411 | cluster->block_group = NULL; | ||
412 | cluster->root.rb_node = NULL; | ||
413 | out: | ||
414 | spin_unlock(&cluster->lock); | ||
415 | return 0; | ||
416 | } | ||
417 | |||
431 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | 418 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) |
432 | { | 419 | { |
433 | struct btrfs_free_space *info; | 420 | struct btrfs_free_space *info; |
434 | struct rb_node *node; | 421 | struct rb_node *node; |
422 | struct btrfs_free_cluster *cluster; | ||
423 | struct btrfs_free_cluster *safe; | ||
424 | |||
425 | spin_lock(&block_group->tree_lock); | ||
426 | |||
427 | list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, | ||
428 | block_group_list) { | ||
429 | |||
430 | WARN_ON(cluster->block_group != block_group); | ||
431 | __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
432 | } | ||
435 | 433 | ||
436 | mutex_lock(&block_group->alloc_mutex); | ||
437 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { | 434 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { |
438 | info = rb_entry(node, struct btrfs_free_space, bytes_index); | 435 | info = rb_entry(node, struct btrfs_free_space, bytes_index); |
439 | unlink_free_space(block_group, info); | 436 | unlink_free_space(block_group, info); |
440 | kfree(info); | 437 | kfree(info); |
441 | if (need_resched()) { | 438 | if (need_resched()) { |
442 | mutex_unlock(&block_group->alloc_mutex); | 439 | spin_unlock(&block_group->tree_lock); |
443 | cond_resched(); | 440 | cond_resched(); |
444 | mutex_lock(&block_group->alloc_mutex); | 441 | spin_lock(&block_group->tree_lock); |
445 | } | 442 | } |
446 | } | 443 | } |
447 | mutex_unlock(&block_group->alloc_mutex); | 444 | spin_unlock(&block_group->tree_lock); |
448 | } | 445 | } |
449 | 446 | ||
450 | #if 0 | 447 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, |
451 | static struct btrfs_free_space *btrfs_find_free_space_offset(struct | 448 | u64 offset, u64 bytes, u64 empty_size) |
452 | btrfs_block_group_cache | ||
453 | *block_group, u64 offset, | ||
454 | u64 bytes) | ||
455 | { | 449 | { |
456 | struct btrfs_free_space *ret; | 450 | struct btrfs_free_space *entry = NULL; |
451 | u64 ret = 0; | ||
457 | 452 | ||
458 | mutex_lock(&block_group->alloc_mutex); | 453 | spin_lock(&block_group->tree_lock); |
459 | ret = tree_search_offset(&block_group->free_space_offset, offset, | 454 | entry = tree_search_offset(&block_group->free_space_offset, offset, |
460 | bytes, 0); | 455 | bytes + empty_size, 1); |
461 | mutex_unlock(&block_group->alloc_mutex); | 456 | if (!entry) |
457 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
458 | offset, bytes + empty_size); | ||
459 | if (entry) { | ||
460 | unlink_free_space(block_group, entry); | ||
461 | ret = entry->offset; | ||
462 | entry->offset += bytes; | ||
463 | entry->bytes -= bytes; | ||
464 | |||
465 | if (!entry->bytes) | ||
466 | kfree(entry); | ||
467 | else | ||
468 | link_free_space(block_group, entry); | ||
469 | } | ||
470 | spin_unlock(&block_group->tree_lock); | ||
462 | 471 | ||
463 | return ret; | 472 | return ret; |
464 | } | 473 | } |
465 | 474 | ||
466 | static struct btrfs_free_space *btrfs_find_free_space_bytes(struct | 475 | /* |
467 | btrfs_block_group_cache | 476 | * given a cluster, put all of its extents back into the free space |
468 | *block_group, u64 offset, | 477 | * cache. If a block group is passed, this function will only free |
469 | u64 bytes) | 478 | * a cluster that belongs to the passed block group. |
479 | * | ||
480 | * Otherwise, it'll get a reference on the block group pointed to by the | ||
481 | * cluster and remove the cluster from it. | ||
482 | */ | ||
483 | int btrfs_return_cluster_to_free_space( | ||
484 | struct btrfs_block_group_cache *block_group, | ||
485 | struct btrfs_free_cluster *cluster) | ||
470 | { | 486 | { |
471 | struct btrfs_free_space *ret; | 487 | int ret; |
472 | 488 | ||
473 | mutex_lock(&block_group->alloc_mutex); | 489 | /* first, get a safe pointer to the block group */ |
490 | spin_lock(&cluster->lock); | ||
491 | if (!block_group) { | ||
492 | block_group = cluster->block_group; | ||
493 | if (!block_group) { | ||
494 | spin_unlock(&cluster->lock); | ||
495 | return 0; | ||
496 | } | ||
497 | } else if (cluster->block_group != block_group) { | ||
498 | /* someone else has already freed it don't redo their work */ | ||
499 | spin_unlock(&cluster->lock); | ||
500 | return 0; | ||
501 | } | ||
502 | atomic_inc(&block_group->count); | ||
503 | spin_unlock(&cluster->lock); | ||
474 | 504 | ||
475 | ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); | 505 | /* now return any extents the cluster had on it */ |
476 | mutex_unlock(&block_group->alloc_mutex); | 506 | spin_lock(&block_group->tree_lock); |
507 | ret = __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
508 | spin_unlock(&block_group->tree_lock); | ||
477 | 509 | ||
510 | /* finally drop our ref */ | ||
511 | btrfs_put_block_group(block_group); | ||
478 | return ret; | 512 | return ret; |
479 | } | 513 | } |
480 | #endif | ||
481 | 514 | ||
482 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | 515 | /* |
483 | *block_group, u64 offset, | 516 | * given a cluster, try to allocate 'bytes' from it, returns 0 |
484 | u64 bytes) | 517 | * if it couldn't find anything suitably large, or a logical disk offset |
518 | * if things worked out | ||
519 | */ | ||
520 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
521 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
522 | u64 min_start) | ||
523 | { | ||
524 | struct btrfs_free_space *entry = NULL; | ||
525 | struct rb_node *node; | ||
526 | u64 ret = 0; | ||
527 | |||
528 | spin_lock(&cluster->lock); | ||
529 | if (bytes > cluster->max_size) | ||
530 | goto out; | ||
531 | |||
532 | if (cluster->block_group != block_group) | ||
533 | goto out; | ||
534 | |||
535 | node = rb_first(&cluster->root); | ||
536 | if (!node) | ||
537 | goto out; | ||
538 | |||
539 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
540 | |||
541 | while(1) { | ||
542 | if (entry->bytes < bytes || entry->offset < min_start) { | ||
543 | struct rb_node *node; | ||
544 | |||
545 | node = rb_next(&entry->offset_index); | ||
546 | if (!node) | ||
547 | break; | ||
548 | entry = rb_entry(node, struct btrfs_free_space, | ||
549 | offset_index); | ||
550 | continue; | ||
551 | } | ||
552 | ret = entry->offset; | ||
553 | |||
554 | entry->offset += bytes; | ||
555 | entry->bytes -= bytes; | ||
556 | |||
557 | if (entry->bytes == 0) { | ||
558 | rb_erase(&entry->offset_index, &cluster->root); | ||
559 | kfree(entry); | ||
560 | } | ||
561 | break; | ||
562 | } | ||
563 | out: | ||
564 | spin_unlock(&cluster->lock); | ||
565 | return ret; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * here we try to find a cluster of blocks in a block group. The goal | ||
570 | * is to find at least bytes free and up to empty_size + bytes free. | ||
571 | * We might not find them all in one contiguous area. | ||
572 | * | ||
573 | * returns zero and sets up cluster if things worked out, otherwise | ||
574 | * it returns -enospc | ||
575 | */ | ||
576 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
577 | struct btrfs_block_group_cache *block_group, | ||
578 | struct btrfs_free_cluster *cluster, | ||
579 | u64 offset, u64 bytes, u64 empty_size) | ||
485 | { | 580 | { |
486 | struct btrfs_free_space *ret = NULL; | 581 | struct btrfs_free_space *entry = NULL; |
582 | struct rb_node *node; | ||
583 | struct btrfs_free_space *next; | ||
584 | struct btrfs_free_space *last; | ||
585 | u64 min_bytes; | ||
586 | u64 window_start; | ||
587 | u64 window_free; | ||
588 | u64 max_extent = 0; | ||
589 | int total_retries = 0; | ||
590 | int ret; | ||
591 | |||
592 | /* for metadata, allow allocates with more holes */ | ||
593 | if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
594 | /* | ||
595 | * we want to do larger allocations when we are | ||
596 | * flushing out the delayed refs, it helps prevent | ||
597 | * making more work as we go along. | ||
598 | */ | ||
599 | if (trans->transaction->delayed_refs.flushing) | ||
600 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
601 | else | ||
602 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
603 | } else | ||
604 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
605 | |||
606 | spin_lock(&block_group->tree_lock); | ||
607 | spin_lock(&cluster->lock); | ||
608 | |||
609 | /* someone already found a cluster, hooray */ | ||
610 | if (cluster->block_group) { | ||
611 | ret = 0; | ||
612 | goto out; | ||
613 | } | ||
614 | again: | ||
615 | min_bytes = min(min_bytes, bytes + empty_size); | ||
616 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
617 | offset, min_bytes); | ||
618 | if (!entry) { | ||
619 | ret = -ENOSPC; | ||
620 | goto out; | ||
621 | } | ||
622 | window_start = entry->offset; | ||
623 | window_free = entry->bytes; | ||
624 | last = entry; | ||
625 | max_extent = entry->bytes; | ||
626 | |||
627 | while(1) { | ||
628 | /* out window is just right, lets fill it */ | ||
629 | if (window_free >= bytes + empty_size) | ||
630 | break; | ||
487 | 631 | ||
488 | ret = tree_search_offset(&block_group->free_space_offset, offset, | 632 | node = rb_next(&last->offset_index); |
489 | bytes, 0); | 633 | if (!node) { |
490 | if (!ret) | 634 | ret = -ENOSPC; |
491 | ret = tree_search_bytes(&block_group->free_space_bytes, | 635 | goto out; |
492 | offset, bytes); | 636 | } |
637 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
638 | |||
639 | /* | ||
640 | * we haven't filled the empty size and the window is | ||
641 | * very large. reset and try again | ||
642 | */ | ||
643 | if (next->offset - window_start > (bytes + empty_size) * 2) { | ||
644 | entry = next; | ||
645 | window_start = entry->offset; | ||
646 | window_free = entry->bytes; | ||
647 | last = entry; | ||
648 | max_extent = 0; | ||
649 | total_retries++; | ||
650 | if (total_retries % 256 == 0) { | ||
651 | if (min_bytes >= (bytes + empty_size)) { | ||
652 | ret = -ENOSPC; | ||
653 | goto out; | ||
654 | } | ||
655 | /* | ||
656 | * grow our allocation a bit, we're not having | ||
657 | * much luck | ||
658 | */ | ||
659 | min_bytes *= 2; | ||
660 | goto again; | ||
661 | } | ||
662 | } else { | ||
663 | last = next; | ||
664 | window_free += next->bytes; | ||
665 | if (entry->bytes > max_extent) | ||
666 | max_extent = entry->bytes; | ||
667 | } | ||
668 | } | ||
669 | |||
670 | cluster->window_start = entry->offset; | ||
671 | |||
672 | /* | ||
673 | * now we've found our entries, pull them out of the free space | ||
674 | * cache and put them into the cluster rbtree | ||
675 | * | ||
676 | * The cluster includes an rbtree, but only uses the offset index | ||
677 | * of each free space cache entry. | ||
678 | */ | ||
679 | while(1) { | ||
680 | node = rb_next(&entry->offset_index); | ||
681 | unlink_free_space(block_group, entry); | ||
682 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
683 | &entry->offset_index); | ||
684 | BUG_ON(ret); | ||
685 | |||
686 | if (!node || entry == last) | ||
687 | break; | ||
688 | |||
689 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
690 | } | ||
691 | ret = 0; | ||
692 | cluster->max_size = max_extent; | ||
693 | atomic_inc(&block_group->count); | ||
694 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | ||
695 | cluster->block_group = block_group; | ||
696 | out: | ||
697 | spin_unlock(&cluster->lock); | ||
698 | spin_unlock(&block_group->tree_lock); | ||
493 | 699 | ||
494 | return ret; | 700 | return ret; |
495 | } | 701 | } |
702 | |||
703 | /* | ||
704 | * simple code to zero out a cluster | ||
705 | */ | ||
706 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | ||
707 | { | ||
708 | spin_lock_init(&cluster->lock); | ||
709 | spin_lock_init(&cluster->refill_lock); | ||
710 | cluster->root.rb_node = NULL; | ||
711 | cluster->max_size = 0; | ||
712 | INIT_LIST_HEAD(&cluster->block_group_list); | ||
713 | cluster->block_group = NULL; | ||
714 | } | ||
715 | |||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h new file mode 100644 index 000000000000..ab0bdc0a63ce --- /dev/null +++ b/fs/btrfs/free-space-cache.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BTRFS_FREE_SPACE_CACHE | ||
20 | #define __BTRFS_FREE_SPACE_CACHE | ||
21 | |||
22 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
23 | u64 bytenr, u64 size); | ||
24 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
25 | u64 bytenr, u64 size); | ||
26 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
27 | *block_group); | ||
28 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | ||
29 | u64 offset, u64 bytes, u64 empty_size); | ||
30 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
31 | u64 bytes); | ||
32 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
33 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
34 | struct btrfs_block_group_cache *block_group, | ||
35 | struct btrfs_free_cluster *cluster, | ||
36 | u64 offset, u64 bytes, u64 empty_size); | ||
37 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); | ||
38 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
39 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
40 | u64 min_start); | ||
41 | int btrfs_return_cluster_to_free_space( | ||
42 | struct btrfs_block_group_cache *block_group, | ||
43 | struct btrfs_free_cluster *cluster); | ||
44 | #endif | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 06d8db5afb08..a0d1dd492a58 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -3481,8 +3481,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3481 | 3481 | ||
3482 | if (dir) { | 3482 | if (dir) { |
3483 | ret = btrfs_set_inode_index(dir, index); | 3483 | ret = btrfs_set_inode_index(dir, index); |
3484 | if (ret) | 3484 | if (ret) { |
3485 | iput(inode); | ||
3485 | return ERR_PTR(ret); | 3486 | return ERR_PTR(ret); |
3487 | } | ||
3486 | } | 3488 | } |
3487 | /* | 3489 | /* |
3488 | * index_cnt is ignored for everything but a dir, | 3490 | * index_cnt is ignored for everything but a dir, |
@@ -3565,6 +3567,7 @@ fail: | |||
3565 | if (dir) | 3567 | if (dir) |
3566 | BTRFS_I(dir)->index_cnt--; | 3568 | BTRFS_I(dir)->index_cnt--; |
3567 | btrfs_free_path(path); | 3569 | btrfs_free_path(path); |
3570 | iput(inode); | ||
3568 | return ERR_PTR(ret); | 3571 | return ERR_PTR(ret); |
3569 | } | 3572 | } |
3570 | 3573 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bca729fc80c8..7594bec1be10 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -267,7 +267,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, | |||
267 | goto out_dput; | 267 | goto out_dput; |
268 | 268 | ||
269 | if (!IS_POSIXACL(parent->dentry->d_inode)) | 269 | if (!IS_POSIXACL(parent->dentry->d_inode)) |
270 | mode &= ~current->fs->umask; | 270 | mode &= ~current_umask(); |
271 | 271 | ||
272 | error = mnt_want_write(parent->mnt); | 272 | error = mnt_want_write(parent->mnt); |
273 | if (error) | 273 | if (error) |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index a5310c0f41e2..1c36e5cd8f55 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -60,8 +60,8 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb) | |||
60 | 60 | ||
61 | /* | 61 | /* |
62 | * unfortunately, many of the places that currently set a lock to blocking | 62 | * unfortunately, many of the places that currently set a lock to blocking |
63 | * don't end up blocking for every long, and often they don't block | 63 | * don't end up blocking for very long, and often they don't block |
64 | * at all. For a dbench 50 run, if we don't spin one the blocking bit | 64 | * at all. For a dbench 50 run, if we don't spin on the blocking bit |
65 | * at all, the context switch rate can jump up to 400,000/sec or more. | 65 | * at all, the context switch rate can jump up to 400,000/sec or more. |
66 | * | 66 | * |
67 | * So, we're still stuck with this crummy spin on the blocking bit, | 67 | * So, we're still stuck with this crummy spin on the blocking bit, |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 19a4daf03ccb..9744af9d71e9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
25 | #include <linux/time.h> | 25 | #include <linux/time.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/seq_file.h> | ||
27 | #include <linux/string.h> | 28 | #include <linux/string.h> |
28 | #include <linux/smp_lock.h> | 29 | #include <linux/smp_lock.h> |
29 | #include <linux/backing-dev.h> | 30 | #include <linux/backing-dev.h> |
@@ -66,7 +67,8 @@ static void btrfs_put_super(struct super_block *sb) | |||
66 | enum { | 67 | enum { |
67 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
68 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
69 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, | 70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, |
71 | Opt_flushoncommit, Opt_err, | ||
70 | }; | 72 | }; |
71 | 73 | ||
72 | static match_table_t tokens = { | 74 | static match_table_t tokens = { |
@@ -83,6 +85,8 @@ static match_table_t tokens = { | |||
83 | {Opt_compress, "compress"}, | 85 | {Opt_compress, "compress"}, |
84 | {Opt_ssd, "ssd"}, | 86 | {Opt_ssd, "ssd"}, |
85 | {Opt_noacl, "noacl"}, | 87 | {Opt_noacl, "noacl"}, |
88 | {Opt_notreelog, "notreelog"}, | ||
89 | {Opt_flushoncommit, "flushoncommit"}, | ||
86 | {Opt_err, NULL}, | 90 | {Opt_err, NULL}, |
87 | }; | 91 | }; |
88 | 92 | ||
@@ -222,6 +226,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
222 | case Opt_noacl: | 226 | case Opt_noacl: |
223 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; | 227 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; |
224 | break; | 228 | break; |
229 | case Opt_notreelog: | ||
230 | printk(KERN_INFO "btrfs: disabling tree log\n"); | ||
231 | btrfs_set_opt(info->mount_opt, NOTREELOG); | ||
232 | break; | ||
233 | case Opt_flushoncommit: | ||
234 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); | ||
235 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); | ||
236 | break; | ||
225 | default: | 237 | default: |
226 | break; | 238 | break; |
227 | } | 239 | } |
@@ -363,9 +375,8 @@ fail_close: | |||
363 | int btrfs_sync_fs(struct super_block *sb, int wait) | 375 | int btrfs_sync_fs(struct super_block *sb, int wait) |
364 | { | 376 | { |
365 | struct btrfs_trans_handle *trans; | 377 | struct btrfs_trans_handle *trans; |
366 | struct btrfs_root *root; | 378 | struct btrfs_root *root = btrfs_sb(sb); |
367 | int ret; | 379 | int ret; |
368 | root = btrfs_sb(sb); | ||
369 | 380 | ||
370 | if (sb->s_flags & MS_RDONLY) | 381 | if (sb->s_flags & MS_RDONLY) |
371 | return 0; | 382 | return 0; |
@@ -385,6 +396,41 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
385 | return ret; | 396 | return ret; |
386 | } | 397 | } |
387 | 398 | ||
399 | static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | ||
400 | { | ||
401 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); | ||
402 | struct btrfs_fs_info *info = root->fs_info; | ||
403 | |||
404 | if (btrfs_test_opt(root, DEGRADED)) | ||
405 | seq_puts(seq, ",degraded"); | ||
406 | if (btrfs_test_opt(root, NODATASUM)) | ||
407 | seq_puts(seq, ",nodatasum"); | ||
408 | if (btrfs_test_opt(root, NODATACOW)) | ||
409 | seq_puts(seq, ",nodatacow"); | ||
410 | if (btrfs_test_opt(root, NOBARRIER)) | ||
411 | seq_puts(seq, ",nobarrier"); | ||
412 | if (info->max_extent != (u64)-1) | ||
413 | seq_printf(seq, ",max_extent=%llu", info->max_extent); | ||
414 | if (info->max_inline != 8192 * 1024) | ||
415 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | ||
416 | if (info->alloc_start != 0) | ||
417 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | ||
418 | if (info->thread_pool_size != min_t(unsigned long, | ||
419 | num_online_cpus() + 2, 8)) | ||
420 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | ||
421 | if (btrfs_test_opt(root, COMPRESS)) | ||
422 | seq_puts(seq, ",compress"); | ||
423 | if (btrfs_test_opt(root, SSD)) | ||
424 | seq_puts(seq, ",ssd"); | ||
425 | if (btrfs_test_opt(root, NOTREELOG)) | ||
426 | seq_puts(seq, ",no-treelog"); | ||
427 | if (btrfs_test_opt(root, FLUSHONCOMMIT)) | ||
428 | seq_puts(seq, ",flush-on-commit"); | ||
429 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) | ||
430 | seq_puts(seq, ",noacl"); | ||
431 | return 0; | ||
432 | } | ||
433 | |||
388 | static void btrfs_write_super(struct super_block *sb) | 434 | static void btrfs_write_super(struct super_block *sb) |
389 | { | 435 | { |
390 | sb->s_dirt = 0; | 436 | sb->s_dirt = 0; |
@@ -630,7 +676,7 @@ static struct super_operations btrfs_super_ops = { | |||
630 | .put_super = btrfs_put_super, | 676 | .put_super = btrfs_put_super, |
631 | .write_super = btrfs_write_super, | 677 | .write_super = btrfs_write_super, |
632 | .sync_fs = btrfs_sync_fs, | 678 | .sync_fs = btrfs_sync_fs, |
633 | .show_options = generic_show_options, | 679 | .show_options = btrfs_show_options, |
634 | .write_inode = btrfs_write_inode, | 680 | .write_inode = btrfs_write_inode, |
635 | .dirty_inode = btrfs_dirty_inode, | 681 | .dirty_inode = btrfs_dirty_inode, |
636 | .alloc_inode = btrfs_alloc_inode, | 682 | .alloc_inode = btrfs_alloc_inode, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 664782c6a2df..2869b3361eb6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -53,8 +53,6 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
53 | GFP_NOFS); | 53 | GFP_NOFS); |
54 | BUG_ON(!cur_trans); | 54 | BUG_ON(!cur_trans); |
55 | root->fs_info->generation++; | 55 | root->fs_info->generation++; |
56 | root->fs_info->last_alloc = 0; | ||
57 | root->fs_info->last_data_alloc = 0; | ||
58 | cur_trans->num_writers = 1; | 56 | cur_trans->num_writers = 1; |
59 | cur_trans->num_joined = 0; | 57 | cur_trans->num_joined = 0; |
60 | cur_trans->transid = root->fs_info->generation; | 58 | cur_trans->transid = root->fs_info->generation; |
@@ -974,6 +972,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
974 | int ret; | 972 | int ret; |
975 | int should_grow = 0; | 973 | int should_grow = 0; |
976 | unsigned long now = get_seconds(); | 974 | unsigned long now = get_seconds(); |
975 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
977 | 976 | ||
978 | btrfs_run_ordered_operations(root, 0); | 977 | btrfs_run_ordered_operations(root, 0); |
979 | 978 | ||
@@ -1053,7 +1052,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1053 | 1052 | ||
1054 | mutex_unlock(&root->fs_info->trans_mutex); | 1053 | mutex_unlock(&root->fs_info->trans_mutex); |
1055 | 1054 | ||
1056 | if (snap_pending) { | 1055 | if (flush_on_commit || snap_pending) { |
1056 | if (flush_on_commit) | ||
1057 | btrfs_start_delalloc_inodes(root); | ||
1057 | ret = btrfs_wait_ordered_extents(root, 1); | 1058 | ret = btrfs_wait_ordered_extents(root, 1); |
1058 | BUG_ON(ret); | 1059 | BUG_ON(ret); |
1059 | } | 1060 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fc9b87a7975b..25f20ea11f27 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -262,11 +262,9 @@ static int process_one_buffer(struct btrfs_root *log, | |||
262 | struct extent_buffer *eb, | 262 | struct extent_buffer *eb, |
263 | struct walk_control *wc, u64 gen) | 263 | struct walk_control *wc, u64 gen) |
264 | { | 264 | { |
265 | if (wc->pin) { | 265 | if (wc->pin) |
266 | mutex_lock(&log->fs_info->pinned_mutex); | ||
267 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 266 | btrfs_update_pinned_extents(log->fs_info->extent_root, |
268 | eb->start, eb->len, 1); | 267 | eb->start, eb->len, 1); |
269 | } | ||
270 | 268 | ||
271 | if (btrfs_buffer_uptodate(eb, gen)) { | 269 | if (btrfs_buffer_uptodate(eb, gen)) { |
272 | if (wc->write) | 270 | if (wc->write) |
@@ -1224,8 +1222,7 @@ insert: | |||
1224 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, | 1222 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, |
1225 | name, name_len, log_type, &log_key); | 1223 | name, name_len, log_type, &log_key); |
1226 | 1224 | ||
1227 | if (ret && ret != -ENOENT) | 1225 | BUG_ON(ret && ret != -ENOENT); |
1228 | BUG(); | ||
1229 | goto out; | 1226 | goto out; |
1230 | } | 1227 | } |
1231 | 1228 | ||
@@ -2900,6 +2897,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2900 | 2897 | ||
2901 | sb = inode->i_sb; | 2898 | sb = inode->i_sb; |
2902 | 2899 | ||
2900 | if (btrfs_test_opt(root, NOTREELOG)) { | ||
2901 | ret = 1; | ||
2902 | goto end_no_trans; | ||
2903 | } | ||
2904 | |||
2903 | if (root->fs_info->last_trans_log_full_commit > | 2905 | if (root->fs_info->last_trans_log_full_commit > |
2904 | root->fs_info->last_trans_committed) { | 2906 | root->fs_info->last_trans_committed) { |
2905 | ret = 1; | 2907 | ret = 1; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd06e18e5aac..e0913e469728 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/random.h> | 22 | #include <linux/random.h> |
23 | #include <linux/iocontext.h> | ||
23 | #include <asm/div64.h> | 24 | #include <asm/div64.h> |
24 | #include "compat.h" | 25 | #include "compat.h" |
25 | #include "ctree.h" | 26 | #include "ctree.h" |
@@ -145,8 +146,9 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
145 | int again = 0; | 146 | int again = 0; |
146 | unsigned long num_run = 0; | 147 | unsigned long num_run = 0; |
147 | unsigned long limit; | 148 | unsigned long limit; |
149 | unsigned long last_waited = 0; | ||
148 | 150 | ||
149 | bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; | 151 | bdi = blk_get_backing_dev_info(device->bdev); |
150 | fs_info = device->dev_root->fs_info; | 152 | fs_info = device->dev_root->fs_info; |
151 | limit = btrfs_async_submit_limit(fs_info); | 153 | limit = btrfs_async_submit_limit(fs_info); |
152 | limit = limit * 2 / 3; | 154 | limit = limit * 2 / 3; |
@@ -207,7 +209,32 @@ loop_lock: | |||
207 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 209 | if (pending && bdi_write_congested(bdi) && num_run > 16 && |
208 | fs_info->fs_devices->open_devices > 1) { | 210 | fs_info->fs_devices->open_devices > 1) { |
209 | struct bio *old_head; | 211 | struct bio *old_head; |
212 | struct io_context *ioc; | ||
210 | 213 | ||
214 | ioc = current->io_context; | ||
215 | |||
216 | /* | ||
217 | * the main goal here is that we don't want to | ||
218 | * block if we're going to be able to submit | ||
219 | * more requests without blocking. | ||
220 | * | ||
221 | * This code does two great things, it pokes into | ||
222 | * the elevator code from a filesystem _and_ | ||
223 | * it makes assumptions about how batching works. | ||
224 | */ | ||
225 | if (ioc && ioc->nr_batch_requests > 0 && | ||
226 | time_before(jiffies, ioc->last_waited + HZ/50UL) && | ||
227 | (last_waited == 0 || | ||
228 | ioc->last_waited == last_waited)) { | ||
229 | /* | ||
230 | * we want to go through our batch of | ||
231 | * requests and stop. So, we copy out | ||
232 | * the ioc->last_waited time and test | ||
233 | * against it before looping | ||
234 | */ | ||
235 | last_waited = ioc->last_waited; | ||
236 | continue; | ||
237 | } | ||
211 | spin_lock(&device->io_lock); | 238 | spin_lock(&device->io_lock); |
212 | 239 | ||
213 | old_head = device->pending_bios; | 240 | old_head = device->pending_bios; |
@@ -231,6 +258,18 @@ loop_lock: | |||
231 | if (device->pending_bios) | 258 | if (device->pending_bios) |
232 | goto loop_lock; | 259 | goto loop_lock; |
233 | spin_unlock(&device->io_lock); | 260 | spin_unlock(&device->io_lock); |
261 | |||
262 | /* | ||
263 | * IO has already been through a long path to get here. Checksumming, | ||
264 | * async helper threads, perhaps compression. We've done a pretty | ||
265 | * good job of collecting a batch of IO and should just unplug | ||
266 | * the device right away. | ||
267 | * | ||
268 | * This will help anyone who is waiting on the IO, they might have | ||
269 | * already unplugged, but managed to do so before the bio they | ||
270 | * cared about found its way down here. | ||
271 | */ | ||
272 | blk_run_backing_dev(bdi, NULL); | ||
234 | done: | 273 | done: |
235 | return 0; | 274 | return 0; |
236 | } | 275 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 86c44e9ae110..2185de72ff7d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -76,7 +76,7 @@ struct btrfs_device { | |||
76 | struct btrfs_fs_devices { | 76 | struct btrfs_fs_devices { |
77 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ | 77 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ |
78 | 78 | ||
79 | /* the device with this id has the most recent coyp of the super */ | 79 | /* the device with this id has the most recent copy of the super */ |
80 | u64 latest_devid; | 80 | u64 latest_devid; |
81 | u64 latest_trans; | 81 | u64 latest_trans; |
82 | u64 num_devices; | 82 | u64 num_devices; |
diff --git a/fs/buffer.c b/fs/buffer.c index f5f8b15a6e40..5d55a896ff78 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -199,13 +199,13 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) | |||
199 | head = page_buffers(page); | 199 | head = page_buffers(page); |
200 | bh = head; | 200 | bh = head; |
201 | do { | 201 | do { |
202 | if (bh->b_blocknr == block) { | 202 | if (!buffer_mapped(bh)) |
203 | all_mapped = 0; | ||
204 | else if (bh->b_blocknr == block) { | ||
203 | ret = bh; | 205 | ret = bh; |
204 | get_bh(bh); | 206 | get_bh(bh); |
205 | goto out_unlock; | 207 | goto out_unlock; |
206 | } | 208 | } |
207 | if (!buffer_mapped(bh)) | ||
208 | all_mapped = 0; | ||
209 | bh = bh->b_this_page; | 209 | bh = bh->b_this_page; |
210 | } while (bh != head); | 210 | } while (bh != head); |
211 | 211 | ||
@@ -1595,6 +1595,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1595 | struct buffer_head *bh, *head; | 1595 | struct buffer_head *bh, *head; |
1596 | const unsigned blocksize = 1 << inode->i_blkbits; | 1596 | const unsigned blocksize = 1 << inode->i_blkbits; |
1597 | int nr_underway = 0; | 1597 | int nr_underway = 0; |
1598 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
1598 | 1599 | ||
1599 | BUG_ON(!PageLocked(page)); | 1600 | BUG_ON(!PageLocked(page)); |
1600 | 1601 | ||
@@ -1686,7 +1687,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1686 | do { | 1687 | do { |
1687 | struct buffer_head *next = bh->b_this_page; | 1688 | struct buffer_head *next = bh->b_this_page; |
1688 | if (buffer_async_write(bh)) { | 1689 | if (buffer_async_write(bh)) { |
1689 | submit_bh(WRITE, bh); | 1690 | submit_bh(write_op, bh); |
1690 | nr_underway++; | 1691 | nr_underway++; |
1691 | } | 1692 | } |
1692 | bh = next; | 1693 | bh = next; |
@@ -1740,7 +1741,7 @@ recover: | |||
1740 | struct buffer_head *next = bh->b_this_page; | 1741 | struct buffer_head *next = bh->b_this_page; |
1741 | if (buffer_async_write(bh)) { | 1742 | if (buffer_async_write(bh)) { |
1742 | clear_buffer_dirty(bh); | 1743 | clear_buffer_dirty(bh); |
1743 | submit_bh(WRITE, bh); | 1744 | submit_bh(write_op, bh); |
1744 | nr_underway++; | 1745 | nr_underway++; |
1745 | } | 1746 | } |
1746 | bh = next; | 1747 | bh = next; |
@@ -3315,7 +3316,6 @@ EXPORT_SYMBOL(cont_write_begin); | |||
3315 | EXPORT_SYMBOL(end_buffer_read_sync); | 3316 | EXPORT_SYMBOL(end_buffer_read_sync); |
3316 | EXPORT_SYMBOL(end_buffer_write_sync); | 3317 | EXPORT_SYMBOL(end_buffer_write_sync); |
3317 | EXPORT_SYMBOL(file_fsync); | 3318 | EXPORT_SYMBOL(file_fsync); |
3318 | EXPORT_SYMBOL(fsync_bdev); | ||
3319 | EXPORT_SYMBOL(generic_block_bmap); | 3319 | EXPORT_SYMBOL(generic_block_bmap); |
3320 | EXPORT_SYMBOL(generic_cont_expand_simple); | 3320 | EXPORT_SYMBOL(generic_cont_expand_simple); |
3321 | EXPORT_SYMBOL(init_buffer); | 3321 | EXPORT_SYMBOL(init_buffer); |
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig new file mode 100644 index 000000000000..80e9c6167f0b --- /dev/null +++ b/fs/cachefiles/Kconfig | |||
@@ -0,0 +1,39 @@ | |||
1 | |||
2 | config CACHEFILES | ||
3 | tristate "Filesystem caching on files" | ||
4 | depends on FSCACHE && BLOCK | ||
5 | help | ||
6 | This permits use of a mounted filesystem as a cache for other | ||
7 | filesystems - primarily networking filesystems - thus allowing fast | ||
8 | local disk to enhance the speed of slower devices. | ||
9 | |||
10 | See Documentation/filesystems/caching/cachefiles.txt for more | ||
11 | information. | ||
12 | |||
13 | config CACHEFILES_DEBUG | ||
14 | bool "Debug CacheFiles" | ||
15 | depends on CACHEFILES | ||
16 | help | ||
17 | This permits debugging to be dynamically enabled in the filesystem | ||
18 | caching on files module. If this is set, the debugging output may be | ||
19 | enabled by setting bits in /sys/modules/cachefiles/parameter/debug or | ||
20 | by including a debugging specifier in /etc/cachefilesd.conf. | ||
21 | |||
22 | config CACHEFILES_HISTOGRAM | ||
23 | bool "Gather latency information on CacheFiles" | ||
24 | depends on CACHEFILES && PROC_FS | ||
25 | help | ||
26 | |||
27 | This option causes latency information to be gathered on CacheFiles | ||
28 | operation and exported through file: | ||
29 | |||
30 | /proc/fs/cachefiles/histogram | ||
31 | |||
32 | The generation of this histogram adds a certain amount of overhead to | ||
33 | execution as there are a number of points at which data is gathered, | ||
34 | and on a multi-CPU system these may be on cachelines that keep | ||
35 | bouncing between CPUs. On the other hand, the histogram may be | ||
36 | useful for debugging purposes. Saying 'N' here is recommended. | ||
37 | |||
38 | See Documentation/filesystems/caching/cachefiles.txt for more | ||
39 | information. | ||
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile new file mode 100644 index 000000000000..32cbab0ffce3 --- /dev/null +++ b/fs/cachefiles/Makefile | |||
@@ -0,0 +1,18 @@ | |||
1 | # | ||
2 | # Makefile for caching in a mounted filesystem | ||
3 | # | ||
4 | |||
5 | cachefiles-y := \ | ||
6 | bind.o \ | ||
7 | daemon.o \ | ||
8 | interface.o \ | ||
9 | key.o \ | ||
10 | main.o \ | ||
11 | namei.o \ | ||
12 | rdwr.o \ | ||
13 | security.o \ | ||
14 | xattr.o | ||
15 | |||
16 | cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o | ||
17 | |||
18 | obj-$(CONFIG_CACHEFILES) := cachefiles.o | ||
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c new file mode 100644 index 000000000000..3797e0077b35 --- /dev/null +++ b/fs/cachefiles/bind.c | |||
@@ -0,0 +1,286 @@ | |||
1 | /* Bind and unbind a cache from the filesystem backing it | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/completion.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/fs.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/namei.h> | ||
20 | #include <linux/mount.h> | ||
21 | #include <linux/statfs.h> | ||
22 | #include <linux/ctype.h> | ||
23 | #include "internal.h" | ||
24 | |||
25 | static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches); | ||
26 | |||
27 | /* | ||
28 | * bind a directory as a cache | ||
29 | */ | ||
30 | int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) | ||
31 | { | ||
32 | _enter("{%u,%u,%u,%u,%u,%u},%s", | ||
33 | cache->frun_percent, | ||
34 | cache->fcull_percent, | ||
35 | cache->fstop_percent, | ||
36 | cache->brun_percent, | ||
37 | cache->bcull_percent, | ||
38 | cache->bstop_percent, | ||
39 | args); | ||
40 | |||
41 | /* start by checking things over */ | ||
42 | ASSERT(cache->fstop_percent >= 0 && | ||
43 | cache->fstop_percent < cache->fcull_percent && | ||
44 | cache->fcull_percent < cache->frun_percent && | ||
45 | cache->frun_percent < 100); | ||
46 | |||
47 | ASSERT(cache->bstop_percent >= 0 && | ||
48 | cache->bstop_percent < cache->bcull_percent && | ||
49 | cache->bcull_percent < cache->brun_percent && | ||
50 | cache->brun_percent < 100); | ||
51 | |||
52 | if (*args) { | ||
53 | kerror("'bind' command doesn't take an argument"); | ||
54 | return -EINVAL; | ||
55 | } | ||
56 | |||
57 | if (!cache->rootdirname) { | ||
58 | kerror("No cache directory specified"); | ||
59 | return -EINVAL; | ||
60 | } | ||
61 | |||
62 | /* don't permit already bound caches to be re-bound */ | ||
63 | if (test_bit(CACHEFILES_READY, &cache->flags)) { | ||
64 | kerror("Cache already bound"); | ||
65 | return -EBUSY; | ||
66 | } | ||
67 | |||
68 | /* make sure we have copies of the tag and dirname strings */ | ||
69 | if (!cache->tag) { | ||
70 | /* the tag string is released by the fops->release() | ||
71 | * function, so we don't release it on error here */ | ||
72 | cache->tag = kstrdup("CacheFiles", GFP_KERNEL); | ||
73 | if (!cache->tag) | ||
74 | return -ENOMEM; | ||
75 | } | ||
76 | |||
77 | /* add the cache */ | ||
78 | return cachefiles_daemon_add_cache(cache); | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * add a cache | ||
83 | */ | ||
84 | static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) | ||
85 | { | ||
86 | struct cachefiles_object *fsdef; | ||
87 | struct nameidata nd; | ||
88 | struct kstatfs stats; | ||
89 | struct dentry *graveyard, *cachedir, *root; | ||
90 | const struct cred *saved_cred; | ||
91 | int ret; | ||
92 | |||
93 | _enter(""); | ||
94 | |||
95 | /* we want to work under the module's security ID */ | ||
96 | ret = cachefiles_get_security_ID(cache); | ||
97 | if (ret < 0) | ||
98 | return ret; | ||
99 | |||
100 | cachefiles_begin_secure(cache, &saved_cred); | ||
101 | |||
102 | /* allocate the root index object */ | ||
103 | ret = -ENOMEM; | ||
104 | |||
105 | fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); | ||
106 | if (!fsdef) | ||
107 | goto error_root_object; | ||
108 | |||
109 | ASSERTCMP(fsdef->backer, ==, NULL); | ||
110 | |||
111 | atomic_set(&fsdef->usage, 1); | ||
112 | fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; | ||
113 | |||
114 | _debug("- fsdef %p", fsdef); | ||
115 | |||
116 | /* look up the directory at the root of the cache */ | ||
117 | memset(&nd, 0, sizeof(nd)); | ||
118 | |||
119 | ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd); | ||
120 | if (ret < 0) | ||
121 | goto error_open_root; | ||
122 | |||
123 | cache->mnt = mntget(nd.path.mnt); | ||
124 | root = dget(nd.path.dentry); | ||
125 | path_put(&nd.path); | ||
126 | |||
127 | /* check parameters */ | ||
128 | ret = -EOPNOTSUPP; | ||
129 | if (!root->d_inode || | ||
130 | !root->d_inode->i_op || | ||
131 | !root->d_inode->i_op->lookup || | ||
132 | !root->d_inode->i_op->mkdir || | ||
133 | !root->d_inode->i_op->setxattr || | ||
134 | !root->d_inode->i_op->getxattr || | ||
135 | !root->d_sb || | ||
136 | !root->d_sb->s_op || | ||
137 | !root->d_sb->s_op->statfs || | ||
138 | !root->d_sb->s_op->sync_fs) | ||
139 | goto error_unsupported; | ||
140 | |||
141 | ret = -EROFS; | ||
142 | if (root->d_sb->s_flags & MS_RDONLY) | ||
143 | goto error_unsupported; | ||
144 | |||
145 | /* determine the security of the on-disk cache as this governs | ||
146 | * security ID of files we create */ | ||
147 | ret = cachefiles_determine_cache_security(cache, root, &saved_cred); | ||
148 | if (ret < 0) | ||
149 | goto error_unsupported; | ||
150 | |||
151 | /* get the cache size and blocksize */ | ||
152 | ret = vfs_statfs(root, &stats); | ||
153 | if (ret < 0) | ||
154 | goto error_unsupported; | ||
155 | |||
156 | ret = -ERANGE; | ||
157 | if (stats.f_bsize <= 0) | ||
158 | goto error_unsupported; | ||
159 | |||
160 | ret = -EOPNOTSUPP; | ||
161 | if (stats.f_bsize > PAGE_SIZE) | ||
162 | goto error_unsupported; | ||
163 | |||
164 | cache->bsize = stats.f_bsize; | ||
165 | cache->bshift = 0; | ||
166 | if (stats.f_bsize < PAGE_SIZE) | ||
167 | cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize); | ||
168 | |||
169 | _debug("blksize %u (shift %u)", | ||
170 | cache->bsize, cache->bshift); | ||
171 | |||
172 | _debug("size %llu, avail %llu", | ||
173 | (unsigned long long) stats.f_blocks, | ||
174 | (unsigned long long) stats.f_bavail); | ||
175 | |||
176 | /* set up caching limits */ | ||
177 | do_div(stats.f_files, 100); | ||
178 | cache->fstop = stats.f_files * cache->fstop_percent; | ||
179 | cache->fcull = stats.f_files * cache->fcull_percent; | ||
180 | cache->frun = stats.f_files * cache->frun_percent; | ||
181 | |||
182 | _debug("limits {%llu,%llu,%llu} files", | ||
183 | (unsigned long long) cache->frun, | ||
184 | (unsigned long long) cache->fcull, | ||
185 | (unsigned long long) cache->fstop); | ||
186 | |||
187 | stats.f_blocks >>= cache->bshift; | ||
188 | do_div(stats.f_blocks, 100); | ||
189 | cache->bstop = stats.f_blocks * cache->bstop_percent; | ||
190 | cache->bcull = stats.f_blocks * cache->bcull_percent; | ||
191 | cache->brun = stats.f_blocks * cache->brun_percent; | ||
192 | |||
193 | _debug("limits {%llu,%llu,%llu} blocks", | ||
194 | (unsigned long long) cache->brun, | ||
195 | (unsigned long long) cache->bcull, | ||
196 | (unsigned long long) cache->bstop); | ||
197 | |||
198 | /* get the cache directory and check its type */ | ||
199 | cachedir = cachefiles_get_directory(cache, root, "cache"); | ||
200 | if (IS_ERR(cachedir)) { | ||
201 | ret = PTR_ERR(cachedir); | ||
202 | goto error_unsupported; | ||
203 | } | ||
204 | |||
205 | fsdef->dentry = cachedir; | ||
206 | fsdef->fscache.cookie = NULL; | ||
207 | |||
208 | ret = cachefiles_check_object_type(fsdef); | ||
209 | if (ret < 0) | ||
210 | goto error_unsupported; | ||
211 | |||
212 | /* get the graveyard directory */ | ||
213 | graveyard = cachefiles_get_directory(cache, root, "graveyard"); | ||
214 | if (IS_ERR(graveyard)) { | ||
215 | ret = PTR_ERR(graveyard); | ||
216 | goto error_unsupported; | ||
217 | } | ||
218 | |||
219 | cache->graveyard = graveyard; | ||
220 | |||
221 | /* publish the cache */ | ||
222 | fscache_init_cache(&cache->cache, | ||
223 | &cachefiles_cache_ops, | ||
224 | "%s", | ||
225 | fsdef->dentry->d_sb->s_id); | ||
226 | |||
227 | fscache_object_init(&fsdef->fscache, NULL, &cache->cache); | ||
228 | |||
229 | ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); | ||
230 | if (ret < 0) | ||
231 | goto error_add_cache; | ||
232 | |||
233 | /* done */ | ||
234 | set_bit(CACHEFILES_READY, &cache->flags); | ||
235 | dput(root); | ||
236 | |||
237 | printk(KERN_INFO "CacheFiles:" | ||
238 | " File cache on %s registered\n", | ||
239 | cache->cache.identifier); | ||
240 | |||
241 | /* check how much space the cache has */ | ||
242 | cachefiles_has_space(cache, 0, 0); | ||
243 | cachefiles_end_secure(cache, saved_cred); | ||
244 | return 0; | ||
245 | |||
246 | error_add_cache: | ||
247 | dput(cache->graveyard); | ||
248 | cache->graveyard = NULL; | ||
249 | error_unsupported: | ||
250 | mntput(cache->mnt); | ||
251 | cache->mnt = NULL; | ||
252 | dput(fsdef->dentry); | ||
253 | fsdef->dentry = NULL; | ||
254 | dput(root); | ||
255 | error_open_root: | ||
256 | kmem_cache_free(cachefiles_object_jar, fsdef); | ||
257 | error_root_object: | ||
258 | cachefiles_end_secure(cache, saved_cred); | ||
259 | kerror("Failed to register: %d", ret); | ||
260 | return ret; | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * unbind a cache on fd release | ||
265 | */ | ||
266 | void cachefiles_daemon_unbind(struct cachefiles_cache *cache) | ||
267 | { | ||
268 | _enter(""); | ||
269 | |||
270 | if (test_bit(CACHEFILES_READY, &cache->flags)) { | ||
271 | printk(KERN_INFO "CacheFiles:" | ||
272 | " File cache on %s unregistering\n", | ||
273 | cache->cache.identifier); | ||
274 | |||
275 | fscache_withdraw_cache(&cache->cache); | ||
276 | } | ||
277 | |||
278 | dput(cache->graveyard); | ||
279 | mntput(cache->mnt); | ||
280 | |||
281 | kfree(cache->rootdirname); | ||
282 | kfree(cache->secctx); | ||
283 | kfree(cache->tag); | ||
284 | |||
285 | _leave(""); | ||
286 | } | ||
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c new file mode 100644 index 000000000000..4618516dd994 --- /dev/null +++ b/fs/cachefiles/daemon.c | |||
@@ -0,0 +1,755 @@ | |||
1 | /* Daemon interface | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/completion.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/fs.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/namei.h> | ||
20 | #include <linux/poll.h> | ||
21 | #include <linux/mount.h> | ||
22 | #include <linux/statfs.h> | ||
23 | #include <linux/ctype.h> | ||
24 | #include <linux/fs_struct.h> | ||
25 | #include "internal.h" | ||
26 | |||
27 | static int cachefiles_daemon_open(struct inode *, struct file *); | ||
28 | static int cachefiles_daemon_release(struct inode *, struct file *); | ||
29 | static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t, | ||
30 | loff_t *); | ||
31 | static ssize_t cachefiles_daemon_write(struct file *, const char __user *, | ||
32 | size_t, loff_t *); | ||
33 | static unsigned int cachefiles_daemon_poll(struct file *, | ||
34 | struct poll_table_struct *); | ||
35 | static int cachefiles_daemon_frun(struct cachefiles_cache *, char *); | ||
36 | static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *); | ||
37 | static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *); | ||
38 | static int cachefiles_daemon_brun(struct cachefiles_cache *, char *); | ||
39 | static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *); | ||
40 | static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *); | ||
41 | static int cachefiles_daemon_cull(struct cachefiles_cache *, char *); | ||
42 | static int cachefiles_daemon_debug(struct cachefiles_cache *, char *); | ||
43 | static int cachefiles_daemon_dir(struct cachefiles_cache *, char *); | ||
44 | static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *); | ||
45 | static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *); | ||
46 | static int cachefiles_daemon_tag(struct cachefiles_cache *, char *); | ||
47 | |||
48 | static unsigned long cachefiles_open; | ||
49 | |||
50 | const struct file_operations cachefiles_daemon_fops = { | ||
51 | .owner = THIS_MODULE, | ||
52 | .open = cachefiles_daemon_open, | ||
53 | .release = cachefiles_daemon_release, | ||
54 | .read = cachefiles_daemon_read, | ||
55 | .write = cachefiles_daemon_write, | ||
56 | .poll = cachefiles_daemon_poll, | ||
57 | }; | ||
58 | |||
59 | struct cachefiles_daemon_cmd { | ||
60 | char name[8]; | ||
61 | int (*handler)(struct cachefiles_cache *cache, char *args); | ||
62 | }; | ||
63 | |||
64 | static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { | ||
65 | { "bind", cachefiles_daemon_bind }, | ||
66 | { "brun", cachefiles_daemon_brun }, | ||
67 | { "bcull", cachefiles_daemon_bcull }, | ||
68 | { "bstop", cachefiles_daemon_bstop }, | ||
69 | { "cull", cachefiles_daemon_cull }, | ||
70 | { "debug", cachefiles_daemon_debug }, | ||
71 | { "dir", cachefiles_daemon_dir }, | ||
72 | { "frun", cachefiles_daemon_frun }, | ||
73 | { "fcull", cachefiles_daemon_fcull }, | ||
74 | { "fstop", cachefiles_daemon_fstop }, | ||
75 | { "inuse", cachefiles_daemon_inuse }, | ||
76 | { "secctx", cachefiles_daemon_secctx }, | ||
77 | { "tag", cachefiles_daemon_tag }, | ||
78 | { "", NULL } | ||
79 | }; | ||
80 | |||
81 | |||
82 | /* | ||
83 | * do various checks | ||
84 | */ | ||
85 | static int cachefiles_daemon_open(struct inode *inode, struct file *file) | ||
86 | { | ||
87 | struct cachefiles_cache *cache; | ||
88 | |||
89 | _enter(""); | ||
90 | |||
91 | /* only the superuser may do this */ | ||
92 | if (!capable(CAP_SYS_ADMIN)) | ||
93 | return -EPERM; | ||
94 | |||
95 | /* the cachefiles device may only be open once at a time */ | ||
96 | if (xchg(&cachefiles_open, 1) == 1) | ||
97 | return -EBUSY; | ||
98 | |||
99 | /* allocate a cache record */ | ||
100 | cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL); | ||
101 | if (!cache) { | ||
102 | cachefiles_open = 0; | ||
103 | return -ENOMEM; | ||
104 | } | ||
105 | |||
106 | mutex_init(&cache->daemon_mutex); | ||
107 | cache->active_nodes = RB_ROOT; | ||
108 | rwlock_init(&cache->active_lock); | ||
109 | init_waitqueue_head(&cache->daemon_pollwq); | ||
110 | |||
111 | /* set default caching limits | ||
112 | * - limit at 1% free space and/or free files | ||
113 | * - cull below 5% free space and/or free files | ||
114 | * - cease culling above 7% free space and/or free files | ||
115 | */ | ||
116 | cache->frun_percent = 7; | ||
117 | cache->fcull_percent = 5; | ||
118 | cache->fstop_percent = 1; | ||
119 | cache->brun_percent = 7; | ||
120 | cache->bcull_percent = 5; | ||
121 | cache->bstop_percent = 1; | ||
122 | |||
123 | file->private_data = cache; | ||
124 | cache->cachefilesd = file; | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * release a cache | ||
130 | */ | ||
131 | static int cachefiles_daemon_release(struct inode *inode, struct file *file) | ||
132 | { | ||
133 | struct cachefiles_cache *cache = file->private_data; | ||
134 | |||
135 | _enter(""); | ||
136 | |||
137 | ASSERT(cache); | ||
138 | |||
139 | set_bit(CACHEFILES_DEAD, &cache->flags); | ||
140 | |||
141 | cachefiles_daemon_unbind(cache); | ||
142 | |||
143 | ASSERT(!cache->active_nodes.rb_node); | ||
144 | |||
145 | /* clean up the control file interface */ | ||
146 | cache->cachefilesd = NULL; | ||
147 | file->private_data = NULL; | ||
148 | cachefiles_open = 0; | ||
149 | |||
150 | kfree(cache); | ||
151 | |||
152 | _leave(""); | ||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * read the cache state | ||
158 | */ | ||
159 | static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, | ||
160 | size_t buflen, loff_t *pos) | ||
161 | { | ||
162 | struct cachefiles_cache *cache = file->private_data; | ||
163 | char buffer[256]; | ||
164 | int n; | ||
165 | |||
166 | //_enter(",,%zu,", buflen); | ||
167 | |||
168 | if (!test_bit(CACHEFILES_READY, &cache->flags)) | ||
169 | return 0; | ||
170 | |||
171 | /* check how much space the cache has */ | ||
172 | cachefiles_has_space(cache, 0, 0); | ||
173 | |||
174 | /* summarise */ | ||
175 | clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags); | ||
176 | |||
177 | n = snprintf(buffer, sizeof(buffer), | ||
178 | "cull=%c" | ||
179 | " frun=%llx" | ||
180 | " fcull=%llx" | ||
181 | " fstop=%llx" | ||
182 | " brun=%llx" | ||
183 | " bcull=%llx" | ||
184 | " bstop=%llx", | ||
185 | test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0', | ||
186 | (unsigned long long) cache->frun, | ||
187 | (unsigned long long) cache->fcull, | ||
188 | (unsigned long long) cache->fstop, | ||
189 | (unsigned long long) cache->brun, | ||
190 | (unsigned long long) cache->bcull, | ||
191 | (unsigned long long) cache->bstop | ||
192 | ); | ||
193 | |||
194 | if (n > buflen) | ||
195 | return -EMSGSIZE; | ||
196 | |||
197 | if (copy_to_user(_buffer, buffer, n) != 0) | ||
198 | return -EFAULT; | ||
199 | |||
200 | return n; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * command the cache | ||
205 | */ | ||
206 | static ssize_t cachefiles_daemon_write(struct file *file, | ||
207 | const char __user *_data, | ||
208 | size_t datalen, | ||
209 | loff_t *pos) | ||
210 | { | ||
211 | const struct cachefiles_daemon_cmd *cmd; | ||
212 | struct cachefiles_cache *cache = file->private_data; | ||
213 | ssize_t ret; | ||
214 | char *data, *args, *cp; | ||
215 | |||
216 | //_enter(",,%zu,", datalen); | ||
217 | |||
218 | ASSERT(cache); | ||
219 | |||
220 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) | ||
221 | return -EIO; | ||
222 | |||
223 | if (datalen < 0 || datalen > PAGE_SIZE - 1) | ||
224 | return -EOPNOTSUPP; | ||
225 | |||
226 | /* drag the command string into the kernel so we can parse it */ | ||
227 | data = kmalloc(datalen + 1, GFP_KERNEL); | ||
228 | if (!data) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | ret = -EFAULT; | ||
232 | if (copy_from_user(data, _data, datalen) != 0) | ||
233 | goto error; | ||
234 | |||
235 | data[datalen] = '\0'; | ||
236 | |||
237 | ret = -EINVAL; | ||
238 | if (memchr(data, '\0', datalen)) | ||
239 | goto error; | ||
240 | |||
241 | /* strip any newline */ | ||
242 | cp = memchr(data, '\n', datalen); | ||
243 | if (cp) { | ||
244 | if (cp == data) | ||
245 | goto error; | ||
246 | |||
247 | *cp = '\0'; | ||
248 | } | ||
249 | |||
250 | /* parse the command */ | ||
251 | ret = -EOPNOTSUPP; | ||
252 | |||
253 | for (args = data; *args; args++) | ||
254 | if (isspace(*args)) | ||
255 | break; | ||
256 | if (*args) { | ||
257 | if (args == data) | ||
258 | goto error; | ||
259 | *args = '\0'; | ||
260 | for (args++; isspace(*args); args++) | ||
261 | continue; | ||
262 | } | ||
263 | |||
264 | /* run the appropriate command handler */ | ||
265 | for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++) | ||
266 | if (strcmp(cmd->name, data) == 0) | ||
267 | goto found_command; | ||
268 | |||
269 | error: | ||
270 | kfree(data); | ||
271 | //_leave(" = %zd", ret); | ||
272 | return ret; | ||
273 | |||
274 | found_command: | ||
275 | mutex_lock(&cache->daemon_mutex); | ||
276 | |||
277 | ret = -EIO; | ||
278 | if (!test_bit(CACHEFILES_DEAD, &cache->flags)) | ||
279 | ret = cmd->handler(cache, args); | ||
280 | |||
281 | mutex_unlock(&cache->daemon_mutex); | ||
282 | |||
283 | if (ret == 0) | ||
284 | ret = datalen; | ||
285 | goto error; | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * poll for culling state | ||
290 | * - use POLLOUT to indicate culling state | ||
291 | */ | ||
292 | static unsigned int cachefiles_daemon_poll(struct file *file, | ||
293 | struct poll_table_struct *poll) | ||
294 | { | ||
295 | struct cachefiles_cache *cache = file->private_data; | ||
296 | unsigned int mask; | ||
297 | |||
298 | poll_wait(file, &cache->daemon_pollwq, poll); | ||
299 | mask = 0; | ||
300 | |||
301 | if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) | ||
302 | mask |= POLLIN; | ||
303 | |||
304 | if (test_bit(CACHEFILES_CULLING, &cache->flags)) | ||
305 | mask |= POLLOUT; | ||
306 | |||
307 | return mask; | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * give a range error for cache space constraints | ||
312 | * - can be tail-called | ||
313 | */ | ||
314 | static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, | ||
315 | char *args) | ||
316 | { | ||
317 | kerror("Free space limits must be in range" | ||
318 | " 0%%<=stop<cull<run<100%%"); | ||
319 | |||
320 | return -EINVAL; | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * set the percentage of files at which to stop culling | ||
325 | * - command: "frun <N>%" | ||
326 | */ | ||
327 | static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) | ||
328 | { | ||
329 | unsigned long frun; | ||
330 | |||
331 | _enter(",%s", args); | ||
332 | |||
333 | if (!*args) | ||
334 | return -EINVAL; | ||
335 | |||
336 | frun = simple_strtoul(args, &args, 10); | ||
337 | if (args[0] != '%' || args[1] != '\0') | ||
338 | return -EINVAL; | ||
339 | |||
340 | if (frun <= cache->fcull_percent || frun >= 100) | ||
341 | return cachefiles_daemon_range_error(cache, args); | ||
342 | |||
343 | cache->frun_percent = frun; | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * set the percentage of files at which to start culling | ||
349 | * - command: "fcull <N>%" | ||
350 | */ | ||
351 | static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) | ||
352 | { | ||
353 | unsigned long fcull; | ||
354 | |||
355 | _enter(",%s", args); | ||
356 | |||
357 | if (!*args) | ||
358 | return -EINVAL; | ||
359 | |||
360 | fcull = simple_strtoul(args, &args, 10); | ||
361 | if (args[0] != '%' || args[1] != '\0') | ||
362 | return -EINVAL; | ||
363 | |||
364 | if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent) | ||
365 | return cachefiles_daemon_range_error(cache, args); | ||
366 | |||
367 | cache->fcull_percent = fcull; | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * set the percentage of files at which to stop allocating | ||
373 | * - command: "fstop <N>%" | ||
374 | */ | ||
375 | static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) | ||
376 | { | ||
377 | unsigned long fstop; | ||
378 | |||
379 | _enter(",%s", args); | ||
380 | |||
381 | if (!*args) | ||
382 | return -EINVAL; | ||
383 | |||
384 | fstop = simple_strtoul(args, &args, 10); | ||
385 | if (args[0] != '%' || args[1] != '\0') | ||
386 | return -EINVAL; | ||
387 | |||
388 | if (fstop < 0 || fstop >= cache->fcull_percent) | ||
389 | return cachefiles_daemon_range_error(cache, args); | ||
390 | |||
391 | cache->fstop_percent = fstop; | ||
392 | return 0; | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * set the percentage of blocks at which to stop culling | ||
397 | * - command: "brun <N>%" | ||
398 | */ | ||
399 | static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) | ||
400 | { | ||
401 | unsigned long brun; | ||
402 | |||
403 | _enter(",%s", args); | ||
404 | |||
405 | if (!*args) | ||
406 | return -EINVAL; | ||
407 | |||
408 | brun = simple_strtoul(args, &args, 10); | ||
409 | if (args[0] != '%' || args[1] != '\0') | ||
410 | return -EINVAL; | ||
411 | |||
412 | if (brun <= cache->bcull_percent || brun >= 100) | ||
413 | return cachefiles_daemon_range_error(cache, args); | ||
414 | |||
415 | cache->brun_percent = brun; | ||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | /* | ||
420 | * set the percentage of blocks at which to start culling | ||
421 | * - command: "bcull <N>%" | ||
422 | */ | ||
423 | static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) | ||
424 | { | ||
425 | unsigned long bcull; | ||
426 | |||
427 | _enter(",%s", args); | ||
428 | |||
429 | if (!*args) | ||
430 | return -EINVAL; | ||
431 | |||
432 | bcull = simple_strtoul(args, &args, 10); | ||
433 | if (args[0] != '%' || args[1] != '\0') | ||
434 | return -EINVAL; | ||
435 | |||
436 | if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent) | ||
437 | return cachefiles_daemon_range_error(cache, args); | ||
438 | |||
439 | cache->bcull_percent = bcull; | ||
440 | return 0; | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * set the percentage of blocks at which to stop allocating | ||
445 | * - command: "bstop <N>%" | ||
446 | */ | ||
447 | static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) | ||
448 | { | ||
449 | unsigned long bstop; | ||
450 | |||
451 | _enter(",%s", args); | ||
452 | |||
453 | if (!*args) | ||
454 | return -EINVAL; | ||
455 | |||
456 | bstop = simple_strtoul(args, &args, 10); | ||
457 | if (args[0] != '%' || args[1] != '\0') | ||
458 | return -EINVAL; | ||
459 | |||
460 | if (bstop < 0 || bstop >= cache->bcull_percent) | ||
461 | return cachefiles_daemon_range_error(cache, args); | ||
462 | |||
463 | cache->bstop_percent = bstop; | ||
464 | return 0; | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * set the cache directory | ||
469 | * - command: "dir <name>" | ||
470 | */ | ||
471 | static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) | ||
472 | { | ||
473 | char *dir; | ||
474 | |||
475 | _enter(",%s", args); | ||
476 | |||
477 | if (!*args) { | ||
478 | kerror("Empty directory specified"); | ||
479 | return -EINVAL; | ||
480 | } | ||
481 | |||
482 | if (cache->rootdirname) { | ||
483 | kerror("Second cache directory specified"); | ||
484 | return -EEXIST; | ||
485 | } | ||
486 | |||
487 | dir = kstrdup(args, GFP_KERNEL); | ||
488 | if (!dir) | ||
489 | return -ENOMEM; | ||
490 | |||
491 | cache->rootdirname = dir; | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * set the cache security context | ||
497 | * - command: "secctx <ctx>" | ||
498 | */ | ||
499 | static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) | ||
500 | { | ||
501 | char *secctx; | ||
502 | |||
503 | _enter(",%s", args); | ||
504 | |||
505 | if (!*args) { | ||
506 | kerror("Empty security context specified"); | ||
507 | return -EINVAL; | ||
508 | } | ||
509 | |||
510 | if (cache->secctx) { | ||
511 | kerror("Second security context specified"); | ||
512 | return -EINVAL; | ||
513 | } | ||
514 | |||
515 | secctx = kstrdup(args, GFP_KERNEL); | ||
516 | if (!secctx) | ||
517 | return -ENOMEM; | ||
518 | |||
519 | cache->secctx = secctx; | ||
520 | return 0; | ||
521 | } | ||
522 | |||
523 | /* | ||
524 | * set the cache tag | ||
525 | * - command: "tag <name>" | ||
526 | */ | ||
527 | static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) | ||
528 | { | ||
529 | char *tag; | ||
530 | |||
531 | _enter(",%s", args); | ||
532 | |||
533 | if (!*args) { | ||
534 | kerror("Empty tag specified"); | ||
535 | return -EINVAL; | ||
536 | } | ||
537 | |||
538 | if (cache->tag) | ||
539 | return -EEXIST; | ||
540 | |||
541 | tag = kstrdup(args, GFP_KERNEL); | ||
542 | if (!tag) | ||
543 | return -ENOMEM; | ||
544 | |||
545 | cache->tag = tag; | ||
546 | return 0; | ||
547 | } | ||
548 | |||
549 | /* | ||
550 | * request a node in the cache be culled from the current working directory | ||
551 | * - command: "cull <name>" | ||
552 | */ | ||
553 | static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) | ||
554 | { | ||
555 | struct fs_struct *fs; | ||
556 | struct dentry *dir; | ||
557 | const struct cred *saved_cred; | ||
558 | int ret; | ||
559 | |||
560 | _enter(",%s", args); | ||
561 | |||
562 | if (strchr(args, '/')) | ||
563 | goto inval; | ||
564 | |||
565 | if (!test_bit(CACHEFILES_READY, &cache->flags)) { | ||
566 | kerror("cull applied to unready cache"); | ||
567 | return -EIO; | ||
568 | } | ||
569 | |||
570 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) { | ||
571 | kerror("cull applied to dead cache"); | ||
572 | return -EIO; | ||
573 | } | ||
574 | |||
575 | /* extract the directory dentry from the cwd */ | ||
576 | fs = current->fs; | ||
577 | read_lock(&fs->lock); | ||
578 | dir = dget(fs->pwd.dentry); | ||
579 | read_unlock(&fs->lock); | ||
580 | |||
581 | if (!S_ISDIR(dir->d_inode->i_mode)) | ||
582 | goto notdir; | ||
583 | |||
584 | cachefiles_begin_secure(cache, &saved_cred); | ||
585 | ret = cachefiles_cull(cache, dir, args); | ||
586 | cachefiles_end_secure(cache, saved_cred); | ||
587 | |||
588 | dput(dir); | ||
589 | _leave(" = %d", ret); | ||
590 | return ret; | ||
591 | |||
592 | notdir: | ||
593 | dput(dir); | ||
594 | kerror("cull command requires dirfd to be a directory"); | ||
595 | return -ENOTDIR; | ||
596 | |||
597 | inval: | ||
598 | kerror("cull command requires dirfd and filename"); | ||
599 | return -EINVAL; | ||
600 | } | ||
601 | |||
602 | /* | ||
603 | * set debugging mode | ||
604 | * - command: "debug <mask>" | ||
605 | */ | ||
606 | static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) | ||
607 | { | ||
608 | unsigned long mask; | ||
609 | |||
610 | _enter(",%s", args); | ||
611 | |||
612 | mask = simple_strtoul(args, &args, 0); | ||
613 | if (args[0] != '\0') | ||
614 | goto inval; | ||
615 | |||
616 | cachefiles_debug = mask; | ||
617 | _leave(" = 0"); | ||
618 | return 0; | ||
619 | |||
620 | inval: | ||
621 | kerror("debug command requires mask"); | ||
622 | return -EINVAL; | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * find out whether an object in the current working directory is in use or not | ||
627 | * - command: "inuse <name>" | ||
628 | */ | ||
629 | static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) | ||
630 | { | ||
631 | struct fs_struct *fs; | ||
632 | struct dentry *dir; | ||
633 | const struct cred *saved_cred; | ||
634 | int ret; | ||
635 | |||
636 | //_enter(",%s", args); | ||
637 | |||
638 | if (strchr(args, '/')) | ||
639 | goto inval; | ||
640 | |||
641 | if (!test_bit(CACHEFILES_READY, &cache->flags)) { | ||
642 | kerror("inuse applied to unready cache"); | ||
643 | return -EIO; | ||
644 | } | ||
645 | |||
646 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) { | ||
647 | kerror("inuse applied to dead cache"); | ||
648 | return -EIO; | ||
649 | } | ||
650 | |||
651 | /* extract the directory dentry from the cwd */ | ||
652 | fs = current->fs; | ||
653 | read_lock(&fs->lock); | ||
654 | dir = dget(fs->pwd.dentry); | ||
655 | read_unlock(&fs->lock); | ||
656 | |||
657 | if (!S_ISDIR(dir->d_inode->i_mode)) | ||
658 | goto notdir; | ||
659 | |||
660 | cachefiles_begin_secure(cache, &saved_cred); | ||
661 | ret = cachefiles_check_in_use(cache, dir, args); | ||
662 | cachefiles_end_secure(cache, saved_cred); | ||
663 | |||
664 | dput(dir); | ||
665 | //_leave(" = %d", ret); | ||
666 | return ret; | ||
667 | |||
668 | notdir: | ||
669 | dput(dir); | ||
670 | kerror("inuse command requires dirfd to be a directory"); | ||
671 | return -ENOTDIR; | ||
672 | |||
673 | inval: | ||
674 | kerror("inuse command requires dirfd and filename"); | ||
675 | return -EINVAL; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * see if we have space for a number of pages and/or a number of files in the | ||
680 | * cache | ||
681 | */ | ||
682 | int cachefiles_has_space(struct cachefiles_cache *cache, | ||
683 | unsigned fnr, unsigned bnr) | ||
684 | { | ||
685 | struct kstatfs stats; | ||
686 | int ret; | ||
687 | |||
688 | //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", | ||
689 | // (unsigned long long) cache->frun, | ||
690 | // (unsigned long long) cache->fcull, | ||
691 | // (unsigned long long) cache->fstop, | ||
692 | // (unsigned long long) cache->brun, | ||
693 | // (unsigned long long) cache->bcull, | ||
694 | // (unsigned long long) cache->bstop, | ||
695 | // fnr, bnr); | ||
696 | |||
697 | /* find out how many pages of blockdev are available */ | ||
698 | memset(&stats, 0, sizeof(stats)); | ||
699 | |||
700 | ret = vfs_statfs(cache->mnt->mnt_root, &stats); | ||
701 | if (ret < 0) { | ||
702 | if (ret == -EIO) | ||
703 | cachefiles_io_error(cache, "statfs failed"); | ||
704 | _leave(" = %d", ret); | ||
705 | return ret; | ||
706 | } | ||
707 | |||
708 | stats.f_bavail >>= cache->bshift; | ||
709 | |||
710 | //_debug("avail %llu,%llu", | ||
711 | // (unsigned long long) stats.f_ffree, | ||
712 | // (unsigned long long) stats.f_bavail); | ||
713 | |||
714 | /* see if there is sufficient space */ | ||
715 | if (stats.f_ffree > fnr) | ||
716 | stats.f_ffree -= fnr; | ||
717 | else | ||
718 | stats.f_ffree = 0; | ||
719 | |||
720 | if (stats.f_bavail > bnr) | ||
721 | stats.f_bavail -= bnr; | ||
722 | else | ||
723 | stats.f_bavail = 0; | ||
724 | |||
725 | ret = -ENOBUFS; | ||
726 | if (stats.f_ffree < cache->fstop || | ||
727 | stats.f_bavail < cache->bstop) | ||
728 | goto begin_cull; | ||
729 | |||
730 | ret = 0; | ||
731 | if (stats.f_ffree < cache->fcull || | ||
732 | stats.f_bavail < cache->bcull) | ||
733 | goto begin_cull; | ||
734 | |||
735 | if (test_bit(CACHEFILES_CULLING, &cache->flags) && | ||
736 | stats.f_ffree >= cache->frun && | ||
737 | stats.f_bavail >= cache->brun && | ||
738 | test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) | ||
739 | ) { | ||
740 | _debug("cease culling"); | ||
741 | cachefiles_state_changed(cache); | ||
742 | } | ||
743 | |||
744 | //_leave(" = 0"); | ||
745 | return 0; | ||
746 | |||
747 | begin_cull: | ||
748 | if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { | ||
749 | _debug("### CULL CACHE ###"); | ||
750 | cachefiles_state_changed(cache); | ||
751 | } | ||
752 | |||
753 | _leave(" = %d", ret); | ||
754 | return ret; | ||
755 | } | ||
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c new file mode 100644 index 000000000000..1e962348d111 --- /dev/null +++ b/fs/cachefiles/interface.c | |||
@@ -0,0 +1,449 @@ | |||
1 | /* FS-Cache interface to CacheFiles | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/mount.h> | ||
13 | #include <linux/buffer_head.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) | ||
17 | |||
18 | struct cachefiles_lookup_data { | ||
19 | struct cachefiles_xattr *auxdata; /* auxiliary data */ | ||
20 | char *key; /* key path */ | ||
21 | }; | ||
22 | |||
23 | static int cachefiles_attr_changed(struct fscache_object *_object); | ||
24 | |||
25 | /* | ||
26 | * allocate an object record for a cookie lookup and prepare the lookup data | ||
27 | */ | ||
28 | static struct fscache_object *cachefiles_alloc_object( | ||
29 | struct fscache_cache *_cache, | ||
30 | struct fscache_cookie *cookie) | ||
31 | { | ||
32 | struct cachefiles_lookup_data *lookup_data; | ||
33 | struct cachefiles_object *object; | ||
34 | struct cachefiles_cache *cache; | ||
35 | struct cachefiles_xattr *auxdata; | ||
36 | unsigned keylen, auxlen; | ||
37 | void *buffer; | ||
38 | char *key; | ||
39 | |||
40 | cache = container_of(_cache, struct cachefiles_cache, cache); | ||
41 | |||
42 | _enter("{%s},%p,", cache->cache.identifier, cookie); | ||
43 | |||
44 | lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL); | ||
45 | if (!lookup_data) | ||
46 | goto nomem_lookup_data; | ||
47 | |||
48 | /* create a new object record and a temporary leaf image */ | ||
49 | object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); | ||
50 | if (!object) | ||
51 | goto nomem_object; | ||
52 | |||
53 | ASSERTCMP(object->backer, ==, NULL); | ||
54 | |||
55 | BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); | ||
56 | atomic_set(&object->usage, 1); | ||
57 | |||
58 | fscache_object_init(&object->fscache, cookie, &cache->cache); | ||
59 | |||
60 | object->type = cookie->def->type; | ||
61 | |||
62 | /* get hold of the raw key | ||
63 | * - stick the length on the front and leave space on the back for the | ||
64 | * encoder | ||
65 | */ | ||
66 | buffer = kmalloc((2 + 512) + 3, GFP_KERNEL); | ||
67 | if (!buffer) | ||
68 | goto nomem_buffer; | ||
69 | |||
70 | keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512); | ||
71 | ASSERTCMP(keylen, <, 512); | ||
72 | |||
73 | *(uint16_t *)buffer = keylen; | ||
74 | ((char *)buffer)[keylen + 2] = 0; | ||
75 | ((char *)buffer)[keylen + 3] = 0; | ||
76 | ((char *)buffer)[keylen + 4] = 0; | ||
77 | |||
78 | /* turn the raw key into something that can work with as a filename */ | ||
79 | key = cachefiles_cook_key(buffer, keylen + 2, object->type); | ||
80 | if (!key) | ||
81 | goto nomem_key; | ||
82 | |||
83 | /* get hold of the auxiliary data and prepend the object type */ | ||
84 | auxdata = buffer; | ||
85 | auxlen = 0; | ||
86 | if (cookie->def->get_aux) { | ||
87 | auxlen = cookie->def->get_aux(cookie->netfs_data, | ||
88 | auxdata->data, 511); | ||
89 | ASSERTCMP(auxlen, <, 511); | ||
90 | } | ||
91 | |||
92 | auxdata->len = auxlen + 1; | ||
93 | auxdata->type = cookie->def->type; | ||
94 | |||
95 | lookup_data->auxdata = auxdata; | ||
96 | lookup_data->key = key; | ||
97 | object->lookup_data = lookup_data; | ||
98 | |||
99 | _leave(" = %p [%p]", &object->fscache, lookup_data); | ||
100 | return &object->fscache; | ||
101 | |||
102 | nomem_key: | ||
103 | kfree(buffer); | ||
104 | nomem_buffer: | ||
105 | BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); | ||
106 | kmem_cache_free(cachefiles_object_jar, object); | ||
107 | fscache_object_destroyed(&cache->cache); | ||
108 | nomem_object: | ||
109 | kfree(lookup_data); | ||
110 | nomem_lookup_data: | ||
111 | _leave(" = -ENOMEM"); | ||
112 | return ERR_PTR(-ENOMEM); | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * attempt to look up the nominated node in this cache | ||
117 | */ | ||
118 | static void cachefiles_lookup_object(struct fscache_object *_object) | ||
119 | { | ||
120 | struct cachefiles_lookup_data *lookup_data; | ||
121 | struct cachefiles_object *parent, *object; | ||
122 | struct cachefiles_cache *cache; | ||
123 | const struct cred *saved_cred; | ||
124 | int ret; | ||
125 | |||
126 | _enter("{OBJ%x}", _object->debug_id); | ||
127 | |||
128 | cache = container_of(_object->cache, struct cachefiles_cache, cache); | ||
129 | parent = container_of(_object->parent, | ||
130 | struct cachefiles_object, fscache); | ||
131 | object = container_of(_object, struct cachefiles_object, fscache); | ||
132 | lookup_data = object->lookup_data; | ||
133 | |||
134 | ASSERTCMP(lookup_data, !=, NULL); | ||
135 | |||
136 | /* look up the key, creating any missing bits */ | ||
137 | cachefiles_begin_secure(cache, &saved_cred); | ||
138 | ret = cachefiles_walk_to_object(parent, object, | ||
139 | lookup_data->key, | ||
140 | lookup_data->auxdata); | ||
141 | cachefiles_end_secure(cache, saved_cred); | ||
142 | |||
143 | /* polish off by setting the attributes of non-index files */ | ||
144 | if (ret == 0 && | ||
145 | object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) | ||
146 | cachefiles_attr_changed(&object->fscache); | ||
147 | |||
148 | if (ret < 0) { | ||
149 | printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", | ||
150 | ret); | ||
151 | fscache_object_lookup_error(&object->fscache); | ||
152 | } | ||
153 | |||
154 | _leave(" [%d]", ret); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * indication of lookup completion | ||
159 | */ | ||
160 | static void cachefiles_lookup_complete(struct fscache_object *_object) | ||
161 | { | ||
162 | struct cachefiles_object *object; | ||
163 | |||
164 | object = container_of(_object, struct cachefiles_object, fscache); | ||
165 | |||
166 | _enter("{OBJ%x,%p}", object->fscache.debug_id, object->lookup_data); | ||
167 | |||
168 | if (object->lookup_data) { | ||
169 | kfree(object->lookup_data->key); | ||
170 | kfree(object->lookup_data->auxdata); | ||
171 | kfree(object->lookup_data); | ||
172 | object->lookup_data = NULL; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * increment the usage count on an inode object (may fail if unmounting) | ||
178 | */ | ||
179 | static | ||
180 | struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) | ||
181 | { | ||
182 | struct cachefiles_object *object = | ||
183 | container_of(_object, struct cachefiles_object, fscache); | ||
184 | |||
185 | _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage)); | ||
186 | |||
187 | #ifdef CACHEFILES_DEBUG_SLAB | ||
188 | ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); | ||
189 | #endif | ||
190 | |||
191 | atomic_inc(&object->usage); | ||
192 | return &object->fscache; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * update the auxilliary data for an object object on disk | ||
197 | */ | ||
198 | static void cachefiles_update_object(struct fscache_object *_object) | ||
199 | { | ||
200 | struct cachefiles_object *object; | ||
201 | struct cachefiles_xattr *auxdata; | ||
202 | struct cachefiles_cache *cache; | ||
203 | struct fscache_cookie *cookie; | ||
204 | const struct cred *saved_cred; | ||
205 | unsigned auxlen; | ||
206 | |||
207 | _enter("{OBJ%x}", _object->debug_id); | ||
208 | |||
209 | object = container_of(_object, struct cachefiles_object, fscache); | ||
210 | cache = container_of(object->fscache.cache, struct cachefiles_cache, | ||
211 | cache); | ||
212 | cookie = object->fscache.cookie; | ||
213 | |||
214 | if (!cookie->def->get_aux) { | ||
215 | _leave(" [no aux]"); | ||
216 | return; | ||
217 | } | ||
218 | |||
219 | auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL); | ||
220 | if (!auxdata) { | ||
221 | _leave(" [nomem]"); | ||
222 | return; | ||
223 | } | ||
224 | |||
225 | auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); | ||
226 | ASSERTCMP(auxlen, <, 511); | ||
227 | |||
228 | auxdata->len = auxlen + 1; | ||
229 | auxdata->type = cookie->def->type; | ||
230 | |||
231 | cachefiles_begin_secure(cache, &saved_cred); | ||
232 | cachefiles_update_object_xattr(object, auxdata); | ||
233 | cachefiles_end_secure(cache, saved_cred); | ||
234 | kfree(auxdata); | ||
235 | _leave(""); | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * discard the resources pinned by an object and effect retirement if | ||
240 | * requested | ||
241 | */ | ||
242 | static void cachefiles_drop_object(struct fscache_object *_object) | ||
243 | { | ||
244 | struct cachefiles_object *object; | ||
245 | struct cachefiles_cache *cache; | ||
246 | const struct cred *saved_cred; | ||
247 | |||
248 | ASSERT(_object); | ||
249 | |||
250 | object = container_of(_object, struct cachefiles_object, fscache); | ||
251 | |||
252 | _enter("{OBJ%x,%d}", | ||
253 | object->fscache.debug_id, atomic_read(&object->usage)); | ||
254 | |||
255 | cache = container_of(object->fscache.cache, | ||
256 | struct cachefiles_cache, cache); | ||
257 | |||
258 | #ifdef CACHEFILES_DEBUG_SLAB | ||
259 | ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); | ||
260 | #endif | ||
261 | |||
262 | /* delete retired objects */ | ||
263 | if (object->fscache.state == FSCACHE_OBJECT_RECYCLING && | ||
264 | _object != cache->cache.fsdef | ||
265 | ) { | ||
266 | _debug("- retire object OBJ%x", object->fscache.debug_id); | ||
267 | cachefiles_begin_secure(cache, &saved_cred); | ||
268 | cachefiles_delete_object(cache, object); | ||
269 | cachefiles_end_secure(cache, saved_cred); | ||
270 | } | ||
271 | |||
272 | /* close the filesystem stuff attached to the object */ | ||
273 | if (object->backer != object->dentry) | ||
274 | dput(object->backer); | ||
275 | object->backer = NULL; | ||
276 | |||
277 | /* note that the object is now inactive */ | ||
278 | if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { | ||
279 | write_lock(&cache->active_lock); | ||
280 | if (!test_and_clear_bit(CACHEFILES_OBJECT_ACTIVE, | ||
281 | &object->flags)) | ||
282 | BUG(); | ||
283 | rb_erase(&object->active_node, &cache->active_nodes); | ||
284 | wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); | ||
285 | write_unlock(&cache->active_lock); | ||
286 | } | ||
287 | |||
288 | dput(object->dentry); | ||
289 | object->dentry = NULL; | ||
290 | |||
291 | _leave(""); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * dispose of a reference to an object | ||
296 | */ | ||
297 | static void cachefiles_put_object(struct fscache_object *_object) | ||
298 | { | ||
299 | struct cachefiles_object *object; | ||
300 | struct fscache_cache *cache; | ||
301 | |||
302 | ASSERT(_object); | ||
303 | |||
304 | object = container_of(_object, struct cachefiles_object, fscache); | ||
305 | |||
306 | _enter("{OBJ%x,%d}", | ||
307 | object->fscache.debug_id, atomic_read(&object->usage)); | ||
308 | |||
309 | #ifdef CACHEFILES_DEBUG_SLAB | ||
310 | ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); | ||
311 | #endif | ||
312 | |||
313 | ASSERTIFCMP(object->fscache.parent, | ||
314 | object->fscache.parent->n_children, >, 0); | ||
315 | |||
316 | if (atomic_dec_and_test(&object->usage)) { | ||
317 | _debug("- kill object OBJ%x", object->fscache.debug_id); | ||
318 | |||
319 | ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); | ||
320 | ASSERTCMP(object->fscache.parent, ==, NULL); | ||
321 | ASSERTCMP(object->backer, ==, NULL); | ||
322 | ASSERTCMP(object->dentry, ==, NULL); | ||
323 | ASSERTCMP(object->fscache.n_ops, ==, 0); | ||
324 | ASSERTCMP(object->fscache.n_children, ==, 0); | ||
325 | |||
326 | if (object->lookup_data) { | ||
327 | kfree(object->lookup_data->key); | ||
328 | kfree(object->lookup_data->auxdata); | ||
329 | kfree(object->lookup_data); | ||
330 | object->lookup_data = NULL; | ||
331 | } | ||
332 | |||
333 | cache = object->fscache.cache; | ||
334 | kmem_cache_free(cachefiles_object_jar, object); | ||
335 | fscache_object_destroyed(cache); | ||
336 | } | ||
337 | |||
338 | _leave(""); | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * sync a cache | ||
343 | */ | ||
344 | static void cachefiles_sync_cache(struct fscache_cache *_cache) | ||
345 | { | ||
346 | struct cachefiles_cache *cache; | ||
347 | const struct cred *saved_cred; | ||
348 | int ret; | ||
349 | |||
350 | _enter("%p", _cache); | ||
351 | |||
352 | cache = container_of(_cache, struct cachefiles_cache, cache); | ||
353 | |||
354 | /* make sure all pages pinned by operations on behalf of the netfs are | ||
355 | * written to disc */ | ||
356 | cachefiles_begin_secure(cache, &saved_cred); | ||
357 | ret = fsync_super(cache->mnt->mnt_sb); | ||
358 | cachefiles_end_secure(cache, saved_cred); | ||
359 | |||
360 | if (ret == -EIO) | ||
361 | cachefiles_io_error(cache, | ||
362 | "Attempt to sync backing fs superblock" | ||
363 | " returned error %d", | ||
364 | ret); | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * notification the attributes on an object have changed | ||
369 | * - called with reads/writes excluded by FS-Cache | ||
370 | */ | ||
371 | static int cachefiles_attr_changed(struct fscache_object *_object) | ||
372 | { | ||
373 | struct cachefiles_object *object; | ||
374 | struct cachefiles_cache *cache; | ||
375 | const struct cred *saved_cred; | ||
376 | struct iattr newattrs; | ||
377 | uint64_t ni_size; | ||
378 | loff_t oi_size; | ||
379 | int ret; | ||
380 | |||
381 | _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size); | ||
382 | |||
383 | _enter("{OBJ%x},[%llu]", | ||
384 | _object->debug_id, (unsigned long long) ni_size); | ||
385 | |||
386 | object = container_of(_object, struct cachefiles_object, fscache); | ||
387 | cache = container_of(object->fscache.cache, | ||
388 | struct cachefiles_cache, cache); | ||
389 | |||
390 | if (ni_size == object->i_size) | ||
391 | return 0; | ||
392 | |||
393 | if (!object->backer) | ||
394 | return -ENOBUFS; | ||
395 | |||
396 | ASSERT(S_ISREG(object->backer->d_inode->i_mode)); | ||
397 | |||
398 | fscache_set_store_limit(&object->fscache, ni_size); | ||
399 | |||
400 | oi_size = i_size_read(object->backer->d_inode); | ||
401 | if (oi_size == ni_size) | ||
402 | return 0; | ||
403 | |||
404 | newattrs.ia_size = ni_size; | ||
405 | newattrs.ia_valid = ATTR_SIZE; | ||
406 | |||
407 | cachefiles_begin_secure(cache, &saved_cred); | ||
408 | mutex_lock(&object->backer->d_inode->i_mutex); | ||
409 | ret = notify_change(object->backer, &newattrs); | ||
410 | mutex_unlock(&object->backer->d_inode->i_mutex); | ||
411 | cachefiles_end_secure(cache, saved_cred); | ||
412 | |||
413 | if (ret == -EIO) { | ||
414 | fscache_set_store_limit(&object->fscache, 0); | ||
415 | cachefiles_io_error_obj(object, "Size set failed"); | ||
416 | ret = -ENOBUFS; | ||
417 | } | ||
418 | |||
419 | _leave(" = %d", ret); | ||
420 | return ret; | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * dissociate a cache from all the pages it was backing | ||
425 | */ | ||
426 | static void cachefiles_dissociate_pages(struct fscache_cache *cache) | ||
427 | { | ||
428 | _enter(""); | ||
429 | } | ||
430 | |||
431 | const struct fscache_cache_ops cachefiles_cache_ops = { | ||
432 | .name = "cachefiles", | ||
433 | .alloc_object = cachefiles_alloc_object, | ||
434 | .lookup_object = cachefiles_lookup_object, | ||
435 | .lookup_complete = cachefiles_lookup_complete, | ||
436 | .grab_object = cachefiles_grab_object, | ||
437 | .update_object = cachefiles_update_object, | ||
438 | .drop_object = cachefiles_drop_object, | ||
439 | .put_object = cachefiles_put_object, | ||
440 | .sync_cache = cachefiles_sync_cache, | ||
441 | .attr_changed = cachefiles_attr_changed, | ||
442 | .read_or_alloc_page = cachefiles_read_or_alloc_page, | ||
443 | .read_or_alloc_pages = cachefiles_read_or_alloc_pages, | ||
444 | .allocate_page = cachefiles_allocate_page, | ||
445 | .allocate_pages = cachefiles_allocate_pages, | ||
446 | .write_page = cachefiles_write_page, | ||
447 | .uncache_page = cachefiles_uncache_page, | ||
448 | .dissociate_pages = cachefiles_dissociate_pages, | ||
449 | }; | ||
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h new file mode 100644 index 000000000000..19218e1463d6 --- /dev/null +++ b/fs/cachefiles/internal.h | |||
@@ -0,0 +1,360 @@ | |||
1 | /* General netfs cache on cache files internal defs | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/fscache-cache.h> | ||
13 | #include <linux/timer.h> | ||
14 | #include <linux/wait.h> | ||
15 | #include <linux/workqueue.h> | ||
16 | #include <linux/security.h> | ||
17 | |||
18 | struct cachefiles_cache; | ||
19 | struct cachefiles_object; | ||
20 | |||
21 | extern unsigned cachefiles_debug; | ||
22 | #define CACHEFILES_DEBUG_KENTER 1 | ||
23 | #define CACHEFILES_DEBUG_KLEAVE 2 | ||
24 | #define CACHEFILES_DEBUG_KDEBUG 4 | ||
25 | |||
26 | /* | ||
27 | * node records | ||
28 | */ | ||
29 | struct cachefiles_object { | ||
30 | struct fscache_object fscache; /* fscache handle */ | ||
31 | struct cachefiles_lookup_data *lookup_data; /* cached lookup data */ | ||
32 | struct dentry *dentry; /* the file/dir representing this object */ | ||
33 | struct dentry *backer; /* backing file */ | ||
34 | loff_t i_size; /* object size */ | ||
35 | unsigned long flags; | ||
36 | #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ | ||
37 | atomic_t usage; /* object usage count */ | ||
38 | uint8_t type; /* object type */ | ||
39 | uint8_t new; /* T if object new */ | ||
40 | spinlock_t work_lock; | ||
41 | struct rb_node active_node; /* link in active tree (dentry is key) */ | ||
42 | }; | ||
43 | |||
44 | extern struct kmem_cache *cachefiles_object_jar; | ||
45 | |||
46 | /* | ||
47 | * Cache files cache definition | ||
48 | */ | ||
49 | struct cachefiles_cache { | ||
50 | struct fscache_cache cache; /* FS-Cache record */ | ||
51 | struct vfsmount *mnt; /* mountpoint holding the cache */ | ||
52 | struct dentry *graveyard; /* directory into which dead objects go */ | ||
53 | struct file *cachefilesd; /* manager daemon handle */ | ||
54 | const struct cred *cache_cred; /* security override for accessing cache */ | ||
55 | struct mutex daemon_mutex; /* command serialisation mutex */ | ||
56 | wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */ | ||
57 | struct rb_root active_nodes; /* active nodes (can't be culled) */ | ||
58 | rwlock_t active_lock; /* lock for active_nodes */ | ||
59 | atomic_t gravecounter; /* graveyard uniquifier */ | ||
60 | unsigned frun_percent; /* when to stop culling (% files) */ | ||
61 | unsigned fcull_percent; /* when to start culling (% files) */ | ||
62 | unsigned fstop_percent; /* when to stop allocating (% files) */ | ||
63 | unsigned brun_percent; /* when to stop culling (% blocks) */ | ||
64 | unsigned bcull_percent; /* when to start culling (% blocks) */ | ||
65 | unsigned bstop_percent; /* when to stop allocating (% blocks) */ | ||
66 | unsigned bsize; /* cache's block size */ | ||
67 | unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */ | ||
68 | uint64_t frun; /* when to stop culling */ | ||
69 | uint64_t fcull; /* when to start culling */ | ||
70 | uint64_t fstop; /* when to stop allocating */ | ||
71 | sector_t brun; /* when to stop culling */ | ||
72 | sector_t bcull; /* when to start culling */ | ||
73 | sector_t bstop; /* when to stop allocating */ | ||
74 | unsigned long flags; | ||
75 | #define CACHEFILES_READY 0 /* T if cache prepared */ | ||
76 | #define CACHEFILES_DEAD 1 /* T if cache dead */ | ||
77 | #define CACHEFILES_CULLING 2 /* T if cull engaged */ | ||
78 | #define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ | ||
79 | char *rootdirname; /* name of cache root directory */ | ||
80 | char *secctx; /* LSM security context */ | ||
81 | char *tag; /* cache binding tag */ | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * backing file read tracking | ||
86 | */ | ||
87 | struct cachefiles_one_read { | ||
88 | wait_queue_t monitor; /* link into monitored waitqueue */ | ||
89 | struct page *back_page; /* backing file page we're waiting for */ | ||
90 | struct page *netfs_page; /* netfs page we're going to fill */ | ||
91 | struct fscache_retrieval *op; /* retrieval op covering this */ | ||
92 | struct list_head op_link; /* link in op's todo list */ | ||
93 | }; | ||
94 | |||
95 | /* | ||
96 | * backing file write tracking | ||
97 | */ | ||
98 | struct cachefiles_one_write { | ||
99 | struct page *netfs_page; /* netfs page to copy */ | ||
100 | struct cachefiles_object *object; | ||
101 | struct list_head obj_link; /* link in object's lists */ | ||
102 | fscache_rw_complete_t end_io_func; | ||
103 | void *context; | ||
104 | }; | ||
105 | |||
106 | /* | ||
107 | * auxiliary data xattr buffer | ||
108 | */ | ||
109 | struct cachefiles_xattr { | ||
110 | uint16_t len; | ||
111 | uint8_t type; | ||
112 | uint8_t data[]; | ||
113 | }; | ||
114 | |||
115 | /* | ||
116 | * note change of state for daemon | ||
117 | */ | ||
118 | static inline void cachefiles_state_changed(struct cachefiles_cache *cache) | ||
119 | { | ||
120 | set_bit(CACHEFILES_STATE_CHANGED, &cache->flags); | ||
121 | wake_up_all(&cache->daemon_pollwq); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * cf-bind.c | ||
126 | */ | ||
127 | extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); | ||
128 | extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); | ||
129 | |||
130 | /* | ||
131 | * cf-daemon.c | ||
132 | */ | ||
133 | extern const struct file_operations cachefiles_daemon_fops; | ||
134 | |||
135 | extern int cachefiles_has_space(struct cachefiles_cache *cache, | ||
136 | unsigned fnr, unsigned bnr); | ||
137 | |||
138 | /* | ||
139 | * cf-interface.c | ||
140 | */ | ||
141 | extern const struct fscache_cache_ops cachefiles_cache_ops; | ||
142 | |||
143 | /* | ||
144 | * cf-key.c | ||
145 | */ | ||
146 | extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); | ||
147 | |||
148 | /* | ||
149 | * cf-namei.c | ||
150 | */ | ||
151 | extern int cachefiles_delete_object(struct cachefiles_cache *cache, | ||
152 | struct cachefiles_object *object); | ||
153 | extern int cachefiles_walk_to_object(struct cachefiles_object *parent, | ||
154 | struct cachefiles_object *object, | ||
155 | const char *key, | ||
156 | struct cachefiles_xattr *auxdata); | ||
157 | extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, | ||
158 | struct dentry *dir, | ||
159 | const char *name); | ||
160 | |||
161 | extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, | ||
162 | char *filename); | ||
163 | |||
164 | extern int cachefiles_check_in_use(struct cachefiles_cache *cache, | ||
165 | struct dentry *dir, char *filename); | ||
166 | |||
167 | /* | ||
168 | * cf-proc.c | ||
169 | */ | ||
170 | #ifdef CONFIG_CACHEFILES_HISTOGRAM | ||
171 | extern atomic_t cachefiles_lookup_histogram[HZ]; | ||
172 | extern atomic_t cachefiles_mkdir_histogram[HZ]; | ||
173 | extern atomic_t cachefiles_create_histogram[HZ]; | ||
174 | |||
175 | extern int __init cachefiles_proc_init(void); | ||
176 | extern void cachefiles_proc_cleanup(void); | ||
177 | static inline | ||
178 | void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) | ||
179 | { | ||
180 | unsigned long jif = jiffies - start_jif; | ||
181 | if (jif >= HZ) | ||
182 | jif = HZ - 1; | ||
183 | atomic_inc(&histogram[jif]); | ||
184 | } | ||
185 | |||
186 | #else | ||
187 | #define cachefiles_proc_init() (0) | ||
188 | #define cachefiles_proc_cleanup() do {} while (0) | ||
189 | #define cachefiles_hist(hist, start_jif) do {} while (0) | ||
190 | #endif | ||
191 | |||
192 | /* | ||
193 | * cf-rdwr.c | ||
194 | */ | ||
195 | extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, | ||
196 | struct page *, gfp_t); | ||
197 | extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *, | ||
198 | struct list_head *, unsigned *, | ||
199 | gfp_t); | ||
200 | extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *, | ||
201 | gfp_t); | ||
202 | extern int cachefiles_allocate_pages(struct fscache_retrieval *, | ||
203 | struct list_head *, unsigned *, gfp_t); | ||
204 | extern int cachefiles_write_page(struct fscache_storage *, struct page *); | ||
205 | extern void cachefiles_uncache_page(struct fscache_object *, struct page *); | ||
206 | |||
207 | /* | ||
208 | * cf-security.c | ||
209 | */ | ||
210 | extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); | ||
211 | extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, | ||
212 | struct dentry *root, | ||
213 | const struct cred **_saved_cred); | ||
214 | |||
215 | static inline void cachefiles_begin_secure(struct cachefiles_cache *cache, | ||
216 | const struct cred **_saved_cred) | ||
217 | { | ||
218 | *_saved_cred = override_creds(cache->cache_cred); | ||
219 | } | ||
220 | |||
221 | static inline void cachefiles_end_secure(struct cachefiles_cache *cache, | ||
222 | const struct cred *saved_cred) | ||
223 | { | ||
224 | revert_creds(saved_cred); | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * cf-xattr.c | ||
229 | */ | ||
230 | extern int cachefiles_check_object_type(struct cachefiles_object *object); | ||
231 | extern int cachefiles_set_object_xattr(struct cachefiles_object *object, | ||
232 | struct cachefiles_xattr *auxdata); | ||
233 | extern int cachefiles_update_object_xattr(struct cachefiles_object *object, | ||
234 | struct cachefiles_xattr *auxdata); | ||
235 | extern int cachefiles_check_object_xattr(struct cachefiles_object *object, | ||
236 | struct cachefiles_xattr *auxdata); | ||
237 | extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, | ||
238 | struct dentry *dentry); | ||
239 | |||
240 | |||
241 | /* | ||
242 | * error handling | ||
243 | */ | ||
244 | #define kerror(FMT, ...) printk(KERN_ERR "CacheFiles: "FMT"\n", ##__VA_ARGS__) | ||
245 | |||
246 | #define cachefiles_io_error(___cache, FMT, ...) \ | ||
247 | do { \ | ||
248 | kerror("I/O Error: " FMT, ##__VA_ARGS__); \ | ||
249 | fscache_io_error(&(___cache)->cache); \ | ||
250 | set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ | ||
251 | } while (0) | ||
252 | |||
253 | #define cachefiles_io_error_obj(object, FMT, ...) \ | ||
254 | do { \ | ||
255 | struct cachefiles_cache *___cache; \ | ||
256 | \ | ||
257 | ___cache = container_of((object)->fscache.cache, \ | ||
258 | struct cachefiles_cache, cache); \ | ||
259 | cachefiles_io_error(___cache, FMT, ##__VA_ARGS__); \ | ||
260 | } while (0) | ||
261 | |||
262 | |||
263 | /* | ||
264 | * debug tracing | ||
265 | */ | ||
266 | #define dbgprintk(FMT, ...) \ | ||
267 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | ||
268 | |||
269 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
270 | static inline void _dbprintk(const char *fmt, ...) | ||
271 | __attribute__((format(printf, 1, 2))); | ||
272 | static inline void _dbprintk(const char *fmt, ...) | ||
273 | { | ||
274 | } | ||
275 | |||
276 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | ||
277 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | ||
278 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | ||
279 | |||
280 | |||
281 | #if defined(__KDEBUG) | ||
282 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) | ||
283 | #define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) | ||
284 | #define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) | ||
285 | |||
286 | #elif defined(CONFIG_CACHEFILES_DEBUG) | ||
287 | #define _enter(FMT, ...) \ | ||
288 | do { \ | ||
289 | if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \ | ||
290 | kenter(FMT, ##__VA_ARGS__); \ | ||
291 | } while (0) | ||
292 | |||
293 | #define _leave(FMT, ...) \ | ||
294 | do { \ | ||
295 | if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \ | ||
296 | kleave(FMT, ##__VA_ARGS__); \ | ||
297 | } while (0) | ||
298 | |||
299 | #define _debug(FMT, ...) \ | ||
300 | do { \ | ||
301 | if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \ | ||
302 | kdebug(FMT, ##__VA_ARGS__); \ | ||
303 | } while (0) | ||
304 | |||
305 | #else | ||
306 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | ||
307 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | ||
308 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | ||
309 | #endif | ||
310 | |||
311 | #if 1 /* defined(__KDEBUGALL) */ | ||
312 | |||
313 | #define ASSERT(X) \ | ||
314 | do { \ | ||
315 | if (unlikely(!(X))) { \ | ||
316 | printk(KERN_ERR "\n"); \ | ||
317 | printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ | ||
318 | BUG(); \ | ||
319 | } \ | ||
320 | } while (0) | ||
321 | |||
322 | #define ASSERTCMP(X, OP, Y) \ | ||
323 | do { \ | ||
324 | if (unlikely(!((X) OP (Y)))) { \ | ||
325 | printk(KERN_ERR "\n"); \ | ||
326 | printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ | ||
327 | printk(KERN_ERR "%lx " #OP " %lx is false\n", \ | ||
328 | (unsigned long)(X), (unsigned long)(Y)); \ | ||
329 | BUG(); \ | ||
330 | } \ | ||
331 | } while (0) | ||
332 | |||
333 | #define ASSERTIF(C, X) \ | ||
334 | do { \ | ||
335 | if (unlikely((C) && !(X))) { \ | ||
336 | printk(KERN_ERR "\n"); \ | ||
337 | printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ | ||
338 | BUG(); \ | ||
339 | } \ | ||
340 | } while (0) | ||
341 | |||
342 | #define ASSERTIFCMP(C, X, OP, Y) \ | ||
343 | do { \ | ||
344 | if (unlikely((C) && !((X) OP (Y)))) { \ | ||
345 | printk(KERN_ERR "\n"); \ | ||
346 | printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ | ||
347 | printk(KERN_ERR "%lx " #OP " %lx is false\n", \ | ||
348 | (unsigned long)(X), (unsigned long)(Y)); \ | ||
349 | BUG(); \ | ||
350 | } \ | ||
351 | } while (0) | ||
352 | |||
353 | #else | ||
354 | |||
355 | #define ASSERT(X) do {} while (0) | ||
356 | #define ASSERTCMP(X, OP, Y) do {} while (0) | ||
357 | #define ASSERTIF(C, X) do {} while (0) | ||
358 | #define ASSERTIFCMP(C, X, OP, Y) do {} while (0) | ||
359 | |||
360 | #endif | ||
diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c new file mode 100644 index 000000000000..81b8b2b3a674 --- /dev/null +++ b/fs/cachefiles/key.c | |||
@@ -0,0 +1,159 @@ | |||
1 | /* Key to pathname encoder | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/slab.h> | ||
13 | #include "internal.h" | ||
14 | |||
15 | static const char cachefiles_charmap[64] = | ||
16 | "0123456789" /* 0 - 9 */ | ||
17 | "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ | ||
18 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ | ||
19 | "_-" /* 62 - 63 */ | ||
20 | ; | ||
21 | |||
22 | static const char cachefiles_filecharmap[256] = { | ||
23 | /* we skip space and tab and control chars */ | ||
24 | [33 ... 46] = 1, /* '!' -> '.' */ | ||
25 | /* we skip '/' as it's significant to pathwalk */ | ||
26 | [48 ... 127] = 1, /* '0' -> '~' */ | ||
27 | }; | ||
28 | |||
29 | /* | ||
30 | * turn the raw key into something cooked | ||
31 | * - the raw key should include the length in the two bytes at the front | ||
32 | * - the key may be up to 514 bytes in length (including the length word) | ||
33 | * - "base64" encode the strange keys, mapping 3 bytes of raw to four of | ||
34 | * cooked | ||
35 | * - need to cut the cooked key into 252 char lengths (189 raw bytes) | ||
36 | */ | ||
37 | char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) | ||
38 | { | ||
39 | unsigned char csum, ch; | ||
40 | unsigned int acc; | ||
41 | char *key; | ||
42 | int loop, len, max, seg, mark, print; | ||
43 | |||
44 | _enter(",%d", keylen); | ||
45 | |||
46 | BUG_ON(keylen < 2 || keylen > 514); | ||
47 | |||
48 | csum = raw[0] + raw[1]; | ||
49 | print = 1; | ||
50 | for (loop = 2; loop < keylen; loop++) { | ||
51 | ch = raw[loop]; | ||
52 | csum += ch; | ||
53 | print &= cachefiles_filecharmap[ch]; | ||
54 | } | ||
55 | |||
56 | if (print) { | ||
57 | /* if the path is usable ASCII, then we render it directly */ | ||
58 | max = keylen - 2; | ||
59 | max += 2; /* two base64'd length chars on the front */ | ||
60 | max += 5; /* @checksum/M */ | ||
61 | max += 3 * 2; /* maximum number of segment dividers (".../M") | ||
62 | * is ((514 + 251) / 252) = 3 | ||
63 | */ | ||
64 | max += 1; /* NUL on end */ | ||
65 | } else { | ||
66 | /* calculate the maximum length of the cooked key */ | ||
67 | keylen = (keylen + 2) / 3; | ||
68 | |||
69 | max = keylen * 4; | ||
70 | max += 5; /* @checksum/M */ | ||
71 | max += 3 * 2; /* maximum number of segment dividers (".../M") | ||
72 | * is ((514 + 188) / 189) = 3 | ||
73 | */ | ||
74 | max += 1; /* NUL on end */ | ||
75 | } | ||
76 | |||
77 | max += 1; /* 2nd NUL on end */ | ||
78 | |||
79 | _debug("max: %d", max); | ||
80 | |||
81 | key = kmalloc(max, GFP_KERNEL); | ||
82 | if (!key) | ||
83 | return NULL; | ||
84 | |||
85 | len = 0; | ||
86 | |||
87 | /* build the cooked key */ | ||
88 | sprintf(key, "@%02x%c+", (unsigned) csum, 0); | ||
89 | len = 5; | ||
90 | mark = len - 1; | ||
91 | |||
92 | if (print) { | ||
93 | acc = *(uint16_t *) raw; | ||
94 | raw += 2; | ||
95 | |||
96 | key[len + 1] = cachefiles_charmap[acc & 63]; | ||
97 | acc >>= 6; | ||
98 | key[len] = cachefiles_charmap[acc & 63]; | ||
99 | len += 2; | ||
100 | |||
101 | seg = 250; | ||
102 | for (loop = keylen; loop > 0; loop--) { | ||
103 | if (seg <= 0) { | ||
104 | key[len++] = '\0'; | ||
105 | mark = len; | ||
106 | key[len++] = '+'; | ||
107 | seg = 252; | ||
108 | } | ||
109 | |||
110 | key[len++] = *raw++; | ||
111 | ASSERT(len < max); | ||
112 | } | ||
113 | |||
114 | switch (type) { | ||
115 | case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break; | ||
116 | case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break; | ||
117 | default: type = 'S'; break; | ||
118 | } | ||
119 | } else { | ||
120 | seg = 252; | ||
121 | for (loop = keylen; loop > 0; loop--) { | ||
122 | if (seg <= 0) { | ||
123 | key[len++] = '\0'; | ||
124 | mark = len; | ||
125 | key[len++] = '+'; | ||
126 | seg = 252; | ||
127 | } | ||
128 | |||
129 | acc = *raw++; | ||
130 | acc |= *raw++ << 8; | ||
131 | acc |= *raw++ << 16; | ||
132 | |||
133 | _debug("acc: %06x", acc); | ||
134 | |||
135 | key[len++] = cachefiles_charmap[acc & 63]; | ||
136 | acc >>= 6; | ||
137 | key[len++] = cachefiles_charmap[acc & 63]; | ||
138 | acc >>= 6; | ||
139 | key[len++] = cachefiles_charmap[acc & 63]; | ||
140 | acc >>= 6; | ||
141 | key[len++] = cachefiles_charmap[acc & 63]; | ||
142 | |||
143 | ASSERT(len < max); | ||
144 | } | ||
145 | |||
146 | switch (type) { | ||
147 | case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break; | ||
148 | case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break; | ||
149 | default: type = 'T'; break; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | key[mark] = type; | ||
154 | key[len++] = 0; | ||
155 | key[len] = 0; | ||
156 | |||
157 | _leave(" = %p %d", key, len); | ||
158 | return key; | ||
159 | } | ||
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c new file mode 100644 index 000000000000..4bfa8cf43bf5 --- /dev/null +++ b/fs/cachefiles/main.c | |||
@@ -0,0 +1,106 @@ | |||
1 | /* Network filesystem caching backend to use cache files on a premounted | ||
2 | * filesystem | ||
3 | * | ||
4 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
5 | * Written by David Howells (dhowells@redhat.com) | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public Licence | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the Licence, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/completion.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/fs.h> | ||
19 | #include <linux/file.h> | ||
20 | #include <linux/namei.h> | ||
21 | #include <linux/mount.h> | ||
22 | #include <linux/statfs.h> | ||
23 | #include <linux/sysctl.h> | ||
24 | #include <linux/miscdevice.h> | ||
25 | #include "internal.h" | ||
26 | |||
27 | unsigned cachefiles_debug; | ||
28 | module_param_named(debug, cachefiles_debug, uint, S_IWUSR | S_IRUGO); | ||
29 | MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask"); | ||
30 | |||
31 | MODULE_DESCRIPTION("Mounted-filesystem based cache"); | ||
32 | MODULE_AUTHOR("Red Hat, Inc."); | ||
33 | MODULE_LICENSE("GPL"); | ||
34 | |||
35 | struct kmem_cache *cachefiles_object_jar; | ||
36 | |||
37 | static struct miscdevice cachefiles_dev = { | ||
38 | .minor = MISC_DYNAMIC_MINOR, | ||
39 | .name = "cachefiles", | ||
40 | .fops = &cachefiles_daemon_fops, | ||
41 | }; | ||
42 | |||
43 | static void cachefiles_object_init_once(void *_object) | ||
44 | { | ||
45 | struct cachefiles_object *object = _object; | ||
46 | |||
47 | memset(object, 0, sizeof(*object)); | ||
48 | spin_lock_init(&object->work_lock); | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * initialise the fs caching module | ||
53 | */ | ||
54 | static int __init cachefiles_init(void) | ||
55 | { | ||
56 | int ret; | ||
57 | |||
58 | ret = misc_register(&cachefiles_dev); | ||
59 | if (ret < 0) | ||
60 | goto error_dev; | ||
61 | |||
62 | /* create an object jar */ | ||
63 | ret = -ENOMEM; | ||
64 | cachefiles_object_jar = | ||
65 | kmem_cache_create("cachefiles_object_jar", | ||
66 | sizeof(struct cachefiles_object), | ||
67 | 0, | ||
68 | SLAB_HWCACHE_ALIGN, | ||
69 | cachefiles_object_init_once); | ||
70 | if (!cachefiles_object_jar) { | ||
71 | printk(KERN_NOTICE | ||
72 | "CacheFiles: Failed to allocate an object jar\n"); | ||
73 | goto error_object_jar; | ||
74 | } | ||
75 | |||
76 | ret = cachefiles_proc_init(); | ||
77 | if (ret < 0) | ||
78 | goto error_proc; | ||
79 | |||
80 | printk(KERN_INFO "CacheFiles: Loaded\n"); | ||
81 | return 0; | ||
82 | |||
83 | error_proc: | ||
84 | kmem_cache_destroy(cachefiles_object_jar); | ||
85 | error_object_jar: | ||
86 | misc_deregister(&cachefiles_dev); | ||
87 | error_dev: | ||
88 | kerror("failed to register: %d", ret); | ||
89 | return ret; | ||
90 | } | ||
91 | |||
92 | fs_initcall(cachefiles_init); | ||
93 | |||
94 | /* | ||
95 | * clean up on module removal | ||
96 | */ | ||
97 | static void __exit cachefiles_exit(void) | ||
98 | { | ||
99 | printk(KERN_INFO "CacheFiles: Unloading\n"); | ||
100 | |||
101 | cachefiles_proc_cleanup(); | ||
102 | kmem_cache_destroy(cachefiles_object_jar); | ||
103 | misc_deregister(&cachefiles_dev); | ||
104 | } | ||
105 | |||
106 | module_exit(cachefiles_exit); | ||
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c new file mode 100644 index 000000000000..4ce818ae39ea --- /dev/null +++ b/fs/cachefiles/namei.c | |||
@@ -0,0 +1,771 @@ | |||
1 | /* CacheFiles path walking and related routines | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/file.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/fsnotify.h> | ||
17 | #include <linux/quotaops.h> | ||
18 | #include <linux/xattr.h> | ||
19 | #include <linux/mount.h> | ||
20 | #include <linux/namei.h> | ||
21 | #include <linux/security.h> | ||
22 | #include "internal.h" | ||
23 | |||
24 | static int cachefiles_wait_bit(void *flags) | ||
25 | { | ||
26 | schedule(); | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | /* | ||
31 | * record the fact that an object is now active | ||
32 | */ | ||
33 | static void cachefiles_mark_object_active(struct cachefiles_cache *cache, | ||
34 | struct cachefiles_object *object) | ||
35 | { | ||
36 | struct cachefiles_object *xobject; | ||
37 | struct rb_node **_p, *_parent = NULL; | ||
38 | struct dentry *dentry; | ||
39 | |||
40 | _enter(",%p", object); | ||
41 | |||
42 | try_again: | ||
43 | write_lock(&cache->active_lock); | ||
44 | |||
45 | if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) | ||
46 | BUG(); | ||
47 | |||
48 | dentry = object->dentry; | ||
49 | _p = &cache->active_nodes.rb_node; | ||
50 | while (*_p) { | ||
51 | _parent = *_p; | ||
52 | xobject = rb_entry(_parent, | ||
53 | struct cachefiles_object, active_node); | ||
54 | |||
55 | ASSERT(xobject != object); | ||
56 | |||
57 | if (xobject->dentry > dentry) | ||
58 | _p = &(*_p)->rb_left; | ||
59 | else if (xobject->dentry < dentry) | ||
60 | _p = &(*_p)->rb_right; | ||
61 | else | ||
62 | goto wait_for_old_object; | ||
63 | } | ||
64 | |||
65 | rb_link_node(&object->active_node, _parent, _p); | ||
66 | rb_insert_color(&object->active_node, &cache->active_nodes); | ||
67 | |||
68 | write_unlock(&cache->active_lock); | ||
69 | _leave(""); | ||
70 | return; | ||
71 | |||
72 | /* an old object from a previous incarnation is hogging the slot - we | ||
73 | * need to wait for it to be destroyed */ | ||
74 | wait_for_old_object: | ||
75 | if (xobject->fscache.state < FSCACHE_OBJECT_DYING) { | ||
76 | printk(KERN_ERR "\n"); | ||
77 | printk(KERN_ERR "CacheFiles: Error:" | ||
78 | " Unexpected object collision\n"); | ||
79 | printk(KERN_ERR "xobject: OBJ%x\n", | ||
80 | xobject->fscache.debug_id); | ||
81 | printk(KERN_ERR "xobjstate=%s\n", | ||
82 | fscache_object_states[xobject->fscache.state]); | ||
83 | printk(KERN_ERR "xobjflags=%lx\n", xobject->fscache.flags); | ||
84 | printk(KERN_ERR "xobjevent=%lx [%lx]\n", | ||
85 | xobject->fscache.events, xobject->fscache.event_mask); | ||
86 | printk(KERN_ERR "xops=%u inp=%u exc=%u\n", | ||
87 | xobject->fscache.n_ops, xobject->fscache.n_in_progress, | ||
88 | xobject->fscache.n_exclusive); | ||
89 | printk(KERN_ERR "xcookie=%p [pr=%p nd=%p fl=%lx]\n", | ||
90 | xobject->fscache.cookie, | ||
91 | xobject->fscache.cookie->parent, | ||
92 | xobject->fscache.cookie->netfs_data, | ||
93 | xobject->fscache.cookie->flags); | ||
94 | printk(KERN_ERR "xparent=%p\n", | ||
95 | xobject->fscache.parent); | ||
96 | printk(KERN_ERR "object: OBJ%x\n", | ||
97 | object->fscache.debug_id); | ||
98 | printk(KERN_ERR "cookie=%p [pr=%p nd=%p fl=%lx]\n", | ||
99 | object->fscache.cookie, | ||
100 | object->fscache.cookie->parent, | ||
101 | object->fscache.cookie->netfs_data, | ||
102 | object->fscache.cookie->flags); | ||
103 | printk(KERN_ERR "parent=%p\n", | ||
104 | object->fscache.parent); | ||
105 | BUG(); | ||
106 | } | ||
107 | atomic_inc(&xobject->usage); | ||
108 | write_unlock(&cache->active_lock); | ||
109 | |||
110 | _debug(">>> wait"); | ||
111 | wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE, | ||
112 | cachefiles_wait_bit, TASK_UNINTERRUPTIBLE); | ||
113 | _debug("<<< waited"); | ||
114 | |||
115 | cache->cache.ops->put_object(&xobject->fscache); | ||
116 | goto try_again; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * delete an object representation from the cache | ||
121 | * - file backed objects are unlinked | ||
122 | * - directory backed objects are stuffed into the graveyard for userspace to | ||
123 | * delete | ||
124 | * - unlocks the directory mutex | ||
125 | */ | ||
126 | static int cachefiles_bury_object(struct cachefiles_cache *cache, | ||
127 | struct dentry *dir, | ||
128 | struct dentry *rep) | ||
129 | { | ||
130 | struct dentry *grave, *trap; | ||
131 | char nbuffer[8 + 8 + 1]; | ||
132 | int ret; | ||
133 | |||
134 | _enter(",'%*.*s','%*.*s'", | ||
135 | dir->d_name.len, dir->d_name.len, dir->d_name.name, | ||
136 | rep->d_name.len, rep->d_name.len, rep->d_name.name); | ||
137 | |||
138 | /* non-directories can just be unlinked */ | ||
139 | if (!S_ISDIR(rep->d_inode->i_mode)) { | ||
140 | _debug("unlink stale object"); | ||
141 | ret = vfs_unlink(dir->d_inode, rep); | ||
142 | |||
143 | mutex_unlock(&dir->d_inode->i_mutex); | ||
144 | |||
145 | if (ret == -EIO) | ||
146 | cachefiles_io_error(cache, "Unlink failed"); | ||
147 | |||
148 | _leave(" = %d", ret); | ||
149 | return ret; | ||
150 | } | ||
151 | |||
152 | /* directories have to be moved to the graveyard */ | ||
153 | _debug("move stale object to graveyard"); | ||
154 | mutex_unlock(&dir->d_inode->i_mutex); | ||
155 | |||
156 | try_again: | ||
157 | /* first step is to make up a grave dentry in the graveyard */ | ||
158 | sprintf(nbuffer, "%08x%08x", | ||
159 | (uint32_t) get_seconds(), | ||
160 | (uint32_t) atomic_inc_return(&cache->gravecounter)); | ||
161 | |||
162 | /* do the multiway lock magic */ | ||
163 | trap = lock_rename(cache->graveyard, dir); | ||
164 | |||
165 | /* do some checks before getting the grave dentry */ | ||
166 | if (rep->d_parent != dir) { | ||
167 | /* the entry was probably culled when we dropped the parent dir | ||
168 | * lock */ | ||
169 | unlock_rename(cache->graveyard, dir); | ||
170 | _leave(" = 0 [culled?]"); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) { | ||
175 | unlock_rename(cache->graveyard, dir); | ||
176 | cachefiles_io_error(cache, "Graveyard no longer a directory"); | ||
177 | return -EIO; | ||
178 | } | ||
179 | |||
180 | if (trap == rep) { | ||
181 | unlock_rename(cache->graveyard, dir); | ||
182 | cachefiles_io_error(cache, "May not make directory loop"); | ||
183 | return -EIO; | ||
184 | } | ||
185 | |||
186 | if (d_mountpoint(rep)) { | ||
187 | unlock_rename(cache->graveyard, dir); | ||
188 | cachefiles_io_error(cache, "Mountpoint in cache"); | ||
189 | return -EIO; | ||
190 | } | ||
191 | |||
192 | grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer)); | ||
193 | if (IS_ERR(grave)) { | ||
194 | unlock_rename(cache->graveyard, dir); | ||
195 | |||
196 | if (PTR_ERR(grave) == -ENOMEM) { | ||
197 | _leave(" = -ENOMEM"); | ||
198 | return -ENOMEM; | ||
199 | } | ||
200 | |||
201 | cachefiles_io_error(cache, "Lookup error %ld", | ||
202 | PTR_ERR(grave)); | ||
203 | return -EIO; | ||
204 | } | ||
205 | |||
206 | if (grave->d_inode) { | ||
207 | unlock_rename(cache->graveyard, dir); | ||
208 | dput(grave); | ||
209 | grave = NULL; | ||
210 | cond_resched(); | ||
211 | goto try_again; | ||
212 | } | ||
213 | |||
214 | if (d_mountpoint(grave)) { | ||
215 | unlock_rename(cache->graveyard, dir); | ||
216 | dput(grave); | ||
217 | cachefiles_io_error(cache, "Mountpoint in graveyard"); | ||
218 | return -EIO; | ||
219 | } | ||
220 | |||
221 | /* target should not be an ancestor of source */ | ||
222 | if (trap == grave) { | ||
223 | unlock_rename(cache->graveyard, dir); | ||
224 | dput(grave); | ||
225 | cachefiles_io_error(cache, "May not make directory loop"); | ||
226 | return -EIO; | ||
227 | } | ||
228 | |||
229 | /* attempt the rename */ | ||
230 | ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave); | ||
231 | if (ret != 0 && ret != -ENOMEM) | ||
232 | cachefiles_io_error(cache, "Rename failed with error %d", ret); | ||
233 | |||
234 | unlock_rename(cache->graveyard, dir); | ||
235 | dput(grave); | ||
236 | _leave(" = 0"); | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * delete an object representation from the cache | ||
242 | */ | ||
243 | int cachefiles_delete_object(struct cachefiles_cache *cache, | ||
244 | struct cachefiles_object *object) | ||
245 | { | ||
246 | struct dentry *dir; | ||
247 | int ret; | ||
248 | |||
249 | _enter(",{%p}", object->dentry); | ||
250 | |||
251 | ASSERT(object->dentry); | ||
252 | ASSERT(object->dentry->d_inode); | ||
253 | ASSERT(object->dentry->d_parent); | ||
254 | |||
255 | dir = dget_parent(object->dentry); | ||
256 | |||
257 | mutex_lock(&dir->d_inode->i_mutex); | ||
258 | ret = cachefiles_bury_object(cache, dir, object->dentry); | ||
259 | |||
260 | dput(dir); | ||
261 | _leave(" = %d", ret); | ||
262 | return ret; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * walk from the parent object to the child object through the backing | ||
267 | * filesystem, creating directories as we go | ||
268 | */ | ||
269 | int cachefiles_walk_to_object(struct cachefiles_object *parent, | ||
270 | struct cachefiles_object *object, | ||
271 | const char *key, | ||
272 | struct cachefiles_xattr *auxdata) | ||
273 | { | ||
274 | struct cachefiles_cache *cache; | ||
275 | struct dentry *dir, *next = NULL; | ||
276 | unsigned long start; | ||
277 | const char *name; | ||
278 | int ret, nlen; | ||
279 | |||
280 | _enter("{%p},,%s,", parent->dentry, key); | ||
281 | |||
282 | cache = container_of(parent->fscache.cache, | ||
283 | struct cachefiles_cache, cache); | ||
284 | |||
285 | ASSERT(parent->dentry); | ||
286 | ASSERT(parent->dentry->d_inode); | ||
287 | |||
288 | if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) { | ||
289 | // TODO: convert file to dir | ||
290 | _leave("looking up in none directory"); | ||
291 | return -ENOBUFS; | ||
292 | } | ||
293 | |||
294 | dir = dget(parent->dentry); | ||
295 | |||
296 | advance: | ||
297 | /* attempt to transit the first directory component */ | ||
298 | name = key; | ||
299 | nlen = strlen(key); | ||
300 | |||
301 | /* key ends in a double NUL */ | ||
302 | key = key + nlen + 1; | ||
303 | if (!*key) | ||
304 | key = NULL; | ||
305 | |||
306 | lookup_again: | ||
307 | /* search the current directory for the element name */ | ||
308 | _debug("lookup '%s'", name); | ||
309 | |||
310 | mutex_lock(&dir->d_inode->i_mutex); | ||
311 | |||
312 | start = jiffies; | ||
313 | next = lookup_one_len(name, dir, nlen); | ||
314 | cachefiles_hist(cachefiles_lookup_histogram, start); | ||
315 | if (IS_ERR(next)) | ||
316 | goto lookup_error; | ||
317 | |||
318 | _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative"); | ||
319 | |||
320 | if (!key) | ||
321 | object->new = !next->d_inode; | ||
322 | |||
323 | /* if this element of the path doesn't exist, then the lookup phase | ||
324 | * failed, and we can release any readers in the certain knowledge that | ||
325 | * there's nothing for them to actually read */ | ||
326 | if (!next->d_inode) | ||
327 | fscache_object_lookup_negative(&object->fscache); | ||
328 | |||
329 | /* we need to create the object if it's negative */ | ||
330 | if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) { | ||
331 | /* index objects and intervening tree levels must be subdirs */ | ||
332 | if (!next->d_inode) { | ||
333 | ret = cachefiles_has_space(cache, 1, 0); | ||
334 | if (ret < 0) | ||
335 | goto create_error; | ||
336 | |||
337 | start = jiffies; | ||
338 | ret = vfs_mkdir(dir->d_inode, next, 0); | ||
339 | cachefiles_hist(cachefiles_mkdir_histogram, start); | ||
340 | if (ret < 0) | ||
341 | goto create_error; | ||
342 | |||
343 | ASSERT(next->d_inode); | ||
344 | |||
345 | _debug("mkdir -> %p{%p{ino=%lu}}", | ||
346 | next, next->d_inode, next->d_inode->i_ino); | ||
347 | |||
348 | } else if (!S_ISDIR(next->d_inode->i_mode)) { | ||
349 | kerror("inode %lu is not a directory", | ||
350 | next->d_inode->i_ino); | ||
351 | ret = -ENOBUFS; | ||
352 | goto error; | ||
353 | } | ||
354 | |||
355 | } else { | ||
356 | /* non-index objects start out life as files */ | ||
357 | if (!next->d_inode) { | ||
358 | ret = cachefiles_has_space(cache, 1, 0); | ||
359 | if (ret < 0) | ||
360 | goto create_error; | ||
361 | |||
362 | start = jiffies; | ||
363 | ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); | ||
364 | cachefiles_hist(cachefiles_create_histogram, start); | ||
365 | if (ret < 0) | ||
366 | goto create_error; | ||
367 | |||
368 | ASSERT(next->d_inode); | ||
369 | |||
370 | _debug("create -> %p{%p{ino=%lu}}", | ||
371 | next, next->d_inode, next->d_inode->i_ino); | ||
372 | |||
373 | } else if (!S_ISDIR(next->d_inode->i_mode) && | ||
374 | !S_ISREG(next->d_inode->i_mode) | ||
375 | ) { | ||
376 | kerror("inode %lu is not a file or directory", | ||
377 | next->d_inode->i_ino); | ||
378 | ret = -ENOBUFS; | ||
379 | goto error; | ||
380 | } | ||
381 | } | ||
382 | |||
383 | /* process the next component */ | ||
384 | if (key) { | ||
385 | _debug("advance"); | ||
386 | mutex_unlock(&dir->d_inode->i_mutex); | ||
387 | dput(dir); | ||
388 | dir = next; | ||
389 | next = NULL; | ||
390 | goto advance; | ||
391 | } | ||
392 | |||
393 | /* we've found the object we were looking for */ | ||
394 | object->dentry = next; | ||
395 | |||
396 | /* if we've found that the terminal object exists, then we need to | ||
397 | * check its attributes and delete it if it's out of date */ | ||
398 | if (!object->new) { | ||
399 | _debug("validate '%*.*s'", | ||
400 | next->d_name.len, next->d_name.len, next->d_name.name); | ||
401 | |||
402 | ret = cachefiles_check_object_xattr(object, auxdata); | ||
403 | if (ret == -ESTALE) { | ||
404 | /* delete the object (the deleter drops the directory | ||
405 | * mutex) */ | ||
406 | object->dentry = NULL; | ||
407 | |||
408 | ret = cachefiles_bury_object(cache, dir, next); | ||
409 | dput(next); | ||
410 | next = NULL; | ||
411 | |||
412 | if (ret < 0) | ||
413 | goto delete_error; | ||
414 | |||
415 | _debug("redo lookup"); | ||
416 | goto lookup_again; | ||
417 | } | ||
418 | } | ||
419 | |||
420 | /* note that we're now using this object */ | ||
421 | cachefiles_mark_object_active(cache, object); | ||
422 | |||
423 | mutex_unlock(&dir->d_inode->i_mutex); | ||
424 | dput(dir); | ||
425 | dir = NULL; | ||
426 | |||
427 | _debug("=== OBTAINED_OBJECT ==="); | ||
428 | |||
429 | if (object->new) { | ||
430 | /* attach data to a newly constructed terminal object */ | ||
431 | ret = cachefiles_set_object_xattr(object, auxdata); | ||
432 | if (ret < 0) | ||
433 | goto check_error; | ||
434 | } else { | ||
435 | /* always update the atime on an object we've just looked up | ||
436 | * (this is used to keep track of culling, and atimes are only | ||
437 | * updated by read, write and readdir but not lookup or | ||
438 | * open) */ | ||
439 | touch_atime(cache->mnt, next); | ||
440 | } | ||
441 | |||
442 | /* open a file interface onto a data file */ | ||
443 | if (object->type != FSCACHE_COOKIE_TYPE_INDEX) { | ||
444 | if (S_ISREG(object->dentry->d_inode->i_mode)) { | ||
445 | const struct address_space_operations *aops; | ||
446 | |||
447 | ret = -EPERM; | ||
448 | aops = object->dentry->d_inode->i_mapping->a_ops; | ||
449 | if (!aops->bmap) | ||
450 | goto check_error; | ||
451 | |||
452 | object->backer = object->dentry; | ||
453 | } else { | ||
454 | BUG(); // TODO: open file in data-class subdir | ||
455 | } | ||
456 | } | ||
457 | |||
458 | object->new = 0; | ||
459 | fscache_obtained_object(&object->fscache); | ||
460 | |||
461 | _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino); | ||
462 | return 0; | ||
463 | |||
464 | create_error: | ||
465 | _debug("create error %d", ret); | ||
466 | if (ret == -EIO) | ||
467 | cachefiles_io_error(cache, "Create/mkdir failed"); | ||
468 | goto error; | ||
469 | |||
470 | check_error: | ||
471 | _debug("check error %d", ret); | ||
472 | write_lock(&cache->active_lock); | ||
473 | rb_erase(&object->active_node, &cache->active_nodes); | ||
474 | clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); | ||
475 | wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); | ||
476 | write_unlock(&cache->active_lock); | ||
477 | |||
478 | dput(object->dentry); | ||
479 | object->dentry = NULL; | ||
480 | goto error_out; | ||
481 | |||
482 | delete_error: | ||
483 | _debug("delete error %d", ret); | ||
484 | goto error_out2; | ||
485 | |||
486 | lookup_error: | ||
487 | _debug("lookup error %ld", PTR_ERR(next)); | ||
488 | ret = PTR_ERR(next); | ||
489 | if (ret == -EIO) | ||
490 | cachefiles_io_error(cache, "Lookup failed"); | ||
491 | next = NULL; | ||
492 | error: | ||
493 | mutex_unlock(&dir->d_inode->i_mutex); | ||
494 | dput(next); | ||
495 | error_out2: | ||
496 | dput(dir); | ||
497 | error_out: | ||
498 | if (ret == -ENOSPC) | ||
499 | ret = -ENOBUFS; | ||
500 | |||
501 | _leave(" = error %d", -ret); | ||
502 | return ret; | ||
503 | } | ||
504 | |||
505 | /* | ||
506 | * get a subdirectory | ||
507 | */ | ||
508 | struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, | ||
509 | struct dentry *dir, | ||
510 | const char *dirname) | ||
511 | { | ||
512 | struct dentry *subdir; | ||
513 | unsigned long start; | ||
514 | int ret; | ||
515 | |||
516 | _enter(",,%s", dirname); | ||
517 | |||
518 | /* search the current directory for the element name */ | ||
519 | mutex_lock(&dir->d_inode->i_mutex); | ||
520 | |||
521 | start = jiffies; | ||
522 | subdir = lookup_one_len(dirname, dir, strlen(dirname)); | ||
523 | cachefiles_hist(cachefiles_lookup_histogram, start); | ||
524 | if (IS_ERR(subdir)) { | ||
525 | if (PTR_ERR(subdir) == -ENOMEM) | ||
526 | goto nomem_d_alloc; | ||
527 | goto lookup_error; | ||
528 | } | ||
529 | |||
530 | _debug("subdir -> %p %s", | ||
531 | subdir, subdir->d_inode ? "positive" : "negative"); | ||
532 | |||
533 | /* we need to create the subdir if it doesn't exist yet */ | ||
534 | if (!subdir->d_inode) { | ||
535 | ret = cachefiles_has_space(cache, 1, 0); | ||
536 | if (ret < 0) | ||
537 | goto mkdir_error; | ||
538 | |||
539 | _debug("attempt mkdir"); | ||
540 | |||
541 | ret = vfs_mkdir(dir->d_inode, subdir, 0700); | ||
542 | if (ret < 0) | ||
543 | goto mkdir_error; | ||
544 | |||
545 | ASSERT(subdir->d_inode); | ||
546 | |||
547 | _debug("mkdir -> %p{%p{ino=%lu}}", | ||
548 | subdir, | ||
549 | subdir->d_inode, | ||
550 | subdir->d_inode->i_ino); | ||
551 | } | ||
552 | |||
553 | mutex_unlock(&dir->d_inode->i_mutex); | ||
554 | |||
555 | /* we need to make sure the subdir is a directory */ | ||
556 | ASSERT(subdir->d_inode); | ||
557 | |||
558 | if (!S_ISDIR(subdir->d_inode->i_mode)) { | ||
559 | kerror("%s is not a directory", dirname); | ||
560 | ret = -EIO; | ||
561 | goto check_error; | ||
562 | } | ||
563 | |||
564 | ret = -EPERM; | ||
565 | if (!subdir->d_inode->i_op || | ||
566 | !subdir->d_inode->i_op->setxattr || | ||
567 | !subdir->d_inode->i_op->getxattr || | ||
568 | !subdir->d_inode->i_op->lookup || | ||
569 | !subdir->d_inode->i_op->mkdir || | ||
570 | !subdir->d_inode->i_op->create || | ||
571 | !subdir->d_inode->i_op->rename || | ||
572 | !subdir->d_inode->i_op->rmdir || | ||
573 | !subdir->d_inode->i_op->unlink) | ||
574 | goto check_error; | ||
575 | |||
576 | _leave(" = [%lu]", subdir->d_inode->i_ino); | ||
577 | return subdir; | ||
578 | |||
579 | check_error: | ||
580 | dput(subdir); | ||
581 | _leave(" = %d [check]", ret); | ||
582 | return ERR_PTR(ret); | ||
583 | |||
584 | mkdir_error: | ||
585 | mutex_unlock(&dir->d_inode->i_mutex); | ||
586 | dput(subdir); | ||
587 | kerror("mkdir %s failed with error %d", dirname, ret); | ||
588 | return ERR_PTR(ret); | ||
589 | |||
590 | lookup_error: | ||
591 | mutex_unlock(&dir->d_inode->i_mutex); | ||
592 | ret = PTR_ERR(subdir); | ||
593 | kerror("Lookup %s failed with error %d", dirname, ret); | ||
594 | return ERR_PTR(ret); | ||
595 | |||
596 | nomem_d_alloc: | ||
597 | mutex_unlock(&dir->d_inode->i_mutex); | ||
598 | _leave(" = -ENOMEM"); | ||
599 | return ERR_PTR(-ENOMEM); | ||
600 | } | ||
601 | |||
602 | /* | ||
603 | * find out if an object is in use or not | ||
604 | * - if finds object and it's not in use: | ||
605 | * - returns a pointer to the object and a reference on it | ||
606 | * - returns with the directory locked | ||
607 | */ | ||
608 | static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, | ||
609 | struct dentry *dir, | ||
610 | char *filename) | ||
611 | { | ||
612 | struct cachefiles_object *object; | ||
613 | struct rb_node *_n; | ||
614 | struct dentry *victim; | ||
615 | unsigned long start; | ||
616 | int ret; | ||
617 | |||
618 | //_enter(",%*.*s/,%s", | ||
619 | // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); | ||
620 | |||
621 | /* look up the victim */ | ||
622 | mutex_lock_nested(&dir->d_inode->i_mutex, 1); | ||
623 | |||
624 | start = jiffies; | ||
625 | victim = lookup_one_len(filename, dir, strlen(filename)); | ||
626 | cachefiles_hist(cachefiles_lookup_histogram, start); | ||
627 | if (IS_ERR(victim)) | ||
628 | goto lookup_error; | ||
629 | |||
630 | //_debug("victim -> %p %s", | ||
631 | // victim, victim->d_inode ? "positive" : "negative"); | ||
632 | |||
633 | /* if the object is no longer there then we probably retired the object | ||
634 | * at the netfs's request whilst the cull was in progress | ||
635 | */ | ||
636 | if (!victim->d_inode) { | ||
637 | mutex_unlock(&dir->d_inode->i_mutex); | ||
638 | dput(victim); | ||
639 | _leave(" = -ENOENT [absent]"); | ||
640 | return ERR_PTR(-ENOENT); | ||
641 | } | ||
642 | |||
643 | /* check to see if we're using this object */ | ||
644 | read_lock(&cache->active_lock); | ||
645 | |||
646 | _n = cache->active_nodes.rb_node; | ||
647 | |||
648 | while (_n) { | ||
649 | object = rb_entry(_n, struct cachefiles_object, active_node); | ||
650 | |||
651 | if (object->dentry > victim) | ||
652 | _n = _n->rb_left; | ||
653 | else if (object->dentry < victim) | ||
654 | _n = _n->rb_right; | ||
655 | else | ||
656 | goto object_in_use; | ||
657 | } | ||
658 | |||
659 | read_unlock(&cache->active_lock); | ||
660 | |||
661 | //_leave(" = %p", victim); | ||
662 | return victim; | ||
663 | |||
664 | object_in_use: | ||
665 | read_unlock(&cache->active_lock); | ||
666 | mutex_unlock(&dir->d_inode->i_mutex); | ||
667 | dput(victim); | ||
668 | //_leave(" = -EBUSY [in use]"); | ||
669 | return ERR_PTR(-EBUSY); | ||
670 | |||
671 | lookup_error: | ||
672 | mutex_unlock(&dir->d_inode->i_mutex); | ||
673 | ret = PTR_ERR(victim); | ||
674 | if (ret == -ENOENT) { | ||
675 | /* file or dir now absent - probably retired by netfs */ | ||
676 | _leave(" = -ESTALE [absent]"); | ||
677 | return ERR_PTR(-ESTALE); | ||
678 | } | ||
679 | |||
680 | if (ret == -EIO) { | ||
681 | cachefiles_io_error(cache, "Lookup failed"); | ||
682 | } else if (ret != -ENOMEM) { | ||
683 | kerror("Internal error: %d", ret); | ||
684 | ret = -EIO; | ||
685 | } | ||
686 | |||
687 | _leave(" = %d", ret); | ||
688 | return ERR_PTR(ret); | ||
689 | } | ||
690 | |||
691 | /* | ||
692 | * cull an object if it's not in use | ||
693 | * - called only by cache manager daemon | ||
694 | */ | ||
695 | int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, | ||
696 | char *filename) | ||
697 | { | ||
698 | struct dentry *victim; | ||
699 | int ret; | ||
700 | |||
701 | _enter(",%*.*s/,%s", | ||
702 | dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); | ||
703 | |||
704 | victim = cachefiles_check_active(cache, dir, filename); | ||
705 | if (IS_ERR(victim)) | ||
706 | return PTR_ERR(victim); | ||
707 | |||
708 | _debug("victim -> %p %s", | ||
709 | victim, victim->d_inode ? "positive" : "negative"); | ||
710 | |||
711 | /* okay... the victim is not being used so we can cull it | ||
712 | * - start by marking it as stale | ||
713 | */ | ||
714 | _debug("victim is cullable"); | ||
715 | |||
716 | ret = cachefiles_remove_object_xattr(cache, victim); | ||
717 | if (ret < 0) | ||
718 | goto error_unlock; | ||
719 | |||
720 | /* actually remove the victim (drops the dir mutex) */ | ||
721 | _debug("bury"); | ||
722 | |||
723 | ret = cachefiles_bury_object(cache, dir, victim); | ||
724 | if (ret < 0) | ||
725 | goto error; | ||
726 | |||
727 | dput(victim); | ||
728 | _leave(" = 0"); | ||
729 | return 0; | ||
730 | |||
731 | error_unlock: | ||
732 | mutex_unlock(&dir->d_inode->i_mutex); | ||
733 | error: | ||
734 | dput(victim); | ||
735 | if (ret == -ENOENT) { | ||
736 | /* file or dir now absent - probably retired by netfs */ | ||
737 | _leave(" = -ESTALE [absent]"); | ||
738 | return -ESTALE; | ||
739 | } | ||
740 | |||
741 | if (ret != -ENOMEM) { | ||
742 | kerror("Internal error: %d", ret); | ||
743 | ret = -EIO; | ||
744 | } | ||
745 | |||
746 | _leave(" = %d", ret); | ||
747 | return ret; | ||
748 | } | ||
749 | |||
750 | /* | ||
751 | * find out if an object is in use or not | ||
752 | * - called only by cache manager daemon | ||
753 | * - returns -EBUSY or 0 to indicate whether an object is in use or not | ||
754 | */ | ||
755 | int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, | ||
756 | char *filename) | ||
757 | { | ||
758 | struct dentry *victim; | ||
759 | |||
760 | //_enter(",%*.*s/,%s", | ||
761 | // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); | ||
762 | |||
763 | victim = cachefiles_check_active(cache, dir, filename); | ||
764 | if (IS_ERR(victim)) | ||
765 | return PTR_ERR(victim); | ||
766 | |||
767 | mutex_unlock(&dir->d_inode->i_mutex); | ||
768 | dput(victim); | ||
769 | //_leave(" = 0"); | ||
770 | return 0; | ||
771 | } | ||
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c new file mode 100644 index 000000000000..eccd33941199 --- /dev/null +++ b/fs/cachefiles/proc.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* CacheFiles statistics | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/proc_fs.h> | ||
14 | #include <linux/seq_file.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | atomic_t cachefiles_lookup_histogram[HZ]; | ||
18 | atomic_t cachefiles_mkdir_histogram[HZ]; | ||
19 | atomic_t cachefiles_create_histogram[HZ]; | ||
20 | |||
21 | /* | ||
22 | * display the latency histogram | ||
23 | */ | ||
24 | static int cachefiles_histogram_show(struct seq_file *m, void *v) | ||
25 | { | ||
26 | unsigned long index; | ||
27 | unsigned x, y, z, t; | ||
28 | |||
29 | switch ((unsigned long) v) { | ||
30 | case 1: | ||
31 | seq_puts(m, "JIFS SECS LOOKUPS MKDIRS CREATES\n"); | ||
32 | return 0; | ||
33 | case 2: | ||
34 | seq_puts(m, "===== ===== ========= ========= =========\n"); | ||
35 | return 0; | ||
36 | default: | ||
37 | index = (unsigned long) v - 3; | ||
38 | x = atomic_read(&cachefiles_lookup_histogram[index]); | ||
39 | y = atomic_read(&cachefiles_mkdir_histogram[index]); | ||
40 | z = atomic_read(&cachefiles_create_histogram[index]); | ||
41 | if (x == 0 && y == 0 && z == 0) | ||
42 | return 0; | ||
43 | |||
44 | t = (index * 1000) / HZ; | ||
45 | |||
46 | seq_printf(m, "%4lu 0.%03u %9u %9u %9u\n", index, t, x, y, z); | ||
47 | return 0; | ||
48 | } | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * set up the iterator to start reading from the first line | ||
53 | */ | ||
54 | static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos) | ||
55 | { | ||
56 | if ((unsigned long long)*_pos >= HZ + 2) | ||
57 | return NULL; | ||
58 | if (*_pos == 0) | ||
59 | *_pos = 1; | ||
60 | return (void *)(unsigned long) *_pos; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * move to the next line | ||
65 | */ | ||
66 | static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos) | ||
67 | { | ||
68 | (*pos)++; | ||
69 | return (unsigned long long)*pos > HZ + 2 ? | ||
70 | NULL : (void *)(unsigned long) *pos; | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * clean up after reading | ||
75 | */ | ||
76 | static void cachefiles_histogram_stop(struct seq_file *m, void *v) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static const struct seq_operations cachefiles_histogram_ops = { | ||
81 | .start = cachefiles_histogram_start, | ||
82 | .stop = cachefiles_histogram_stop, | ||
83 | .next = cachefiles_histogram_next, | ||
84 | .show = cachefiles_histogram_show, | ||
85 | }; | ||
86 | |||
87 | /* | ||
88 | * open "/proc/fs/cachefiles/XXX" which provide statistics summaries | ||
89 | */ | ||
90 | static int cachefiles_histogram_open(struct inode *inode, struct file *file) | ||
91 | { | ||
92 | return seq_open(file, &cachefiles_histogram_ops); | ||
93 | } | ||
94 | |||
95 | static const struct file_operations cachefiles_histogram_fops = { | ||
96 | .owner = THIS_MODULE, | ||
97 | .open = cachefiles_histogram_open, | ||
98 | .read = seq_read, | ||
99 | .llseek = seq_lseek, | ||
100 | .release = seq_release, | ||
101 | }; | ||
102 | |||
103 | /* | ||
104 | * initialise the /proc/fs/cachefiles/ directory | ||
105 | */ | ||
106 | int __init cachefiles_proc_init(void) | ||
107 | { | ||
108 | _enter(""); | ||
109 | |||
110 | if (!proc_mkdir("fs/cachefiles", NULL)) | ||
111 | goto error_dir; | ||
112 | |||
113 | if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL, | ||
114 | &cachefiles_histogram_fops)) | ||
115 | goto error_histogram; | ||
116 | |||
117 | _leave(" = 0"); | ||
118 | return 0; | ||
119 | |||
120 | error_histogram: | ||
121 | remove_proc_entry("fs/cachefiles", NULL); | ||
122 | error_dir: | ||
123 | _leave(" = -ENOMEM"); | ||
124 | return -ENOMEM; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * clean up the /proc/fs/cachefiles/ directory | ||
129 | */ | ||
130 | void cachefiles_proc_cleanup(void) | ||
131 | { | ||
132 | remove_proc_entry("fs/cachefiles/histogram", NULL); | ||
133 | remove_proc_entry("fs/cachefiles", NULL); | ||
134 | } | ||
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c new file mode 100644 index 000000000000..a69787e7dd96 --- /dev/null +++ b/fs/cachefiles/rdwr.c | |||
@@ -0,0 +1,879 @@ | |||
1 | /* Storage object read/write | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/mount.h> | ||
13 | #include <linux/file.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | /* | ||
17 | * detect wake up events generated by the unlocking of pages in which we're | ||
18 | * interested | ||
19 | * - we use this to detect read completion of backing pages | ||
20 | * - the caller holds the waitqueue lock | ||
21 | */ | ||
22 | static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, | ||
23 | int sync, void *_key) | ||
24 | { | ||
25 | struct cachefiles_one_read *monitor = | ||
26 | container_of(wait, struct cachefiles_one_read, monitor); | ||
27 | struct cachefiles_object *object; | ||
28 | struct wait_bit_key *key = _key; | ||
29 | struct page *page = wait->private; | ||
30 | |||
31 | ASSERT(key); | ||
32 | |||
33 | _enter("{%lu},%u,%d,{%p,%u}", | ||
34 | monitor->netfs_page->index, mode, sync, | ||
35 | key->flags, key->bit_nr); | ||
36 | |||
37 | if (key->flags != &page->flags || | ||
38 | key->bit_nr != PG_locked) | ||
39 | return 0; | ||
40 | |||
41 | _debug("--- monitor %p %lx ---", page, page->flags); | ||
42 | |||
43 | if (!PageUptodate(page) && !PageError(page)) | ||
44 | dump_stack(); | ||
45 | |||
46 | /* remove from the waitqueue */ | ||
47 | list_del(&wait->task_list); | ||
48 | |||
49 | /* move onto the action list and queue for FS-Cache thread pool */ | ||
50 | ASSERT(monitor->op); | ||
51 | |||
52 | object = container_of(monitor->op->op.object, | ||
53 | struct cachefiles_object, fscache); | ||
54 | |||
55 | spin_lock(&object->work_lock); | ||
56 | list_add_tail(&monitor->op_link, &monitor->op->to_do); | ||
57 | spin_unlock(&object->work_lock); | ||
58 | |||
59 | fscache_enqueue_retrieval(monitor->op); | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * copy data from backing pages to netfs pages to complete a read operation | ||
65 | * - driven by FS-Cache's thread pool | ||
66 | */ | ||
67 | static void cachefiles_read_copier(struct fscache_operation *_op) | ||
68 | { | ||
69 | struct cachefiles_one_read *monitor; | ||
70 | struct cachefiles_object *object; | ||
71 | struct fscache_retrieval *op; | ||
72 | struct pagevec pagevec; | ||
73 | int error, max; | ||
74 | |||
75 | op = container_of(_op, struct fscache_retrieval, op); | ||
76 | object = container_of(op->op.object, | ||
77 | struct cachefiles_object, fscache); | ||
78 | |||
79 | _enter("{ino=%lu}", object->backer->d_inode->i_ino); | ||
80 | |||
81 | pagevec_init(&pagevec, 0); | ||
82 | |||
83 | max = 8; | ||
84 | spin_lock_irq(&object->work_lock); | ||
85 | |||
86 | while (!list_empty(&op->to_do)) { | ||
87 | monitor = list_entry(op->to_do.next, | ||
88 | struct cachefiles_one_read, op_link); | ||
89 | list_del(&monitor->op_link); | ||
90 | |||
91 | spin_unlock_irq(&object->work_lock); | ||
92 | |||
93 | _debug("- copy {%lu}", monitor->back_page->index); | ||
94 | |||
95 | error = -EIO; | ||
96 | if (PageUptodate(monitor->back_page)) { | ||
97 | copy_highpage(monitor->netfs_page, monitor->back_page); | ||
98 | |||
99 | pagevec_add(&pagevec, monitor->netfs_page); | ||
100 | fscache_mark_pages_cached(monitor->op, &pagevec); | ||
101 | error = 0; | ||
102 | } | ||
103 | |||
104 | if (error) | ||
105 | cachefiles_io_error_obj( | ||
106 | object, | ||
107 | "Readpage failed on backing file %lx", | ||
108 | (unsigned long) monitor->back_page->flags); | ||
109 | |||
110 | page_cache_release(monitor->back_page); | ||
111 | |||
112 | fscache_end_io(op, monitor->netfs_page, error); | ||
113 | page_cache_release(monitor->netfs_page); | ||
114 | fscache_put_retrieval(op); | ||
115 | kfree(monitor); | ||
116 | |||
117 | /* let the thread pool have some air occasionally */ | ||
118 | max--; | ||
119 | if (max < 0 || need_resched()) { | ||
120 | if (!list_empty(&op->to_do)) | ||
121 | fscache_enqueue_retrieval(op); | ||
122 | _leave(" [maxed out]"); | ||
123 | return; | ||
124 | } | ||
125 | |||
126 | spin_lock_irq(&object->work_lock); | ||
127 | } | ||
128 | |||
129 | spin_unlock_irq(&object->work_lock); | ||
130 | _leave(""); | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * read the corresponding page to the given set from the backing file | ||
135 | * - an uncertain page is simply discarded, to be tried again another time | ||
136 | */ | ||
137 | static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | ||
138 | struct fscache_retrieval *op, | ||
139 | struct page *netpage, | ||
140 | struct pagevec *pagevec) | ||
141 | { | ||
142 | struct cachefiles_one_read *monitor; | ||
143 | struct address_space *bmapping; | ||
144 | struct page *newpage, *backpage; | ||
145 | int ret; | ||
146 | |||
147 | _enter(""); | ||
148 | |||
149 | pagevec_reinit(pagevec); | ||
150 | |||
151 | _debug("read back %p{%lu,%d}", | ||
152 | netpage, netpage->index, page_count(netpage)); | ||
153 | |||
154 | monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); | ||
155 | if (!monitor) | ||
156 | goto nomem; | ||
157 | |||
158 | monitor->netfs_page = netpage; | ||
159 | monitor->op = fscache_get_retrieval(op); | ||
160 | |||
161 | init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter); | ||
162 | |||
163 | /* attempt to get hold of the backing page */ | ||
164 | bmapping = object->backer->d_inode->i_mapping; | ||
165 | newpage = NULL; | ||
166 | |||
167 | for (;;) { | ||
168 | backpage = find_get_page(bmapping, netpage->index); | ||
169 | if (backpage) | ||
170 | goto backing_page_already_present; | ||
171 | |||
172 | if (!newpage) { | ||
173 | newpage = page_cache_alloc_cold(bmapping); | ||
174 | if (!newpage) | ||
175 | goto nomem_monitor; | ||
176 | } | ||
177 | |||
178 | ret = add_to_page_cache(newpage, bmapping, | ||
179 | netpage->index, GFP_KERNEL); | ||
180 | if (ret == 0) | ||
181 | goto installed_new_backing_page; | ||
182 | if (ret != -EEXIST) | ||
183 | goto nomem_page; | ||
184 | } | ||
185 | |||
186 | /* we've installed a new backing page, so now we need to add it | ||
187 | * to the LRU list and start it reading */ | ||
188 | installed_new_backing_page: | ||
189 | _debug("- new %p", newpage); | ||
190 | |||
191 | backpage = newpage; | ||
192 | newpage = NULL; | ||
193 | |||
194 | page_cache_get(backpage); | ||
195 | pagevec_add(pagevec, backpage); | ||
196 | __pagevec_lru_add_file(pagevec); | ||
197 | |||
198 | read_backing_page: | ||
199 | ret = bmapping->a_ops->readpage(NULL, backpage); | ||
200 | if (ret < 0) | ||
201 | goto read_error; | ||
202 | |||
203 | /* set the monitor to transfer the data across */ | ||
204 | monitor_backing_page: | ||
205 | _debug("- monitor add"); | ||
206 | |||
207 | /* install the monitor */ | ||
208 | page_cache_get(monitor->netfs_page); | ||
209 | page_cache_get(backpage); | ||
210 | monitor->back_page = backpage; | ||
211 | monitor->monitor.private = backpage; | ||
212 | add_page_wait_queue(backpage, &monitor->monitor); | ||
213 | monitor = NULL; | ||
214 | |||
215 | /* but the page may have been read before the monitor was installed, so | ||
216 | * the monitor may miss the event - so we have to ensure that we do get | ||
217 | * one in such a case */ | ||
218 | if (trylock_page(backpage)) { | ||
219 | _debug("jumpstart %p {%lx}", backpage, backpage->flags); | ||
220 | unlock_page(backpage); | ||
221 | } | ||
222 | goto success; | ||
223 | |||
224 | /* if the backing page is already present, it can be in one of | ||
225 | * three states: read in progress, read failed or read okay */ | ||
226 | backing_page_already_present: | ||
227 | _debug("- present"); | ||
228 | |||
229 | if (newpage) { | ||
230 | page_cache_release(newpage); | ||
231 | newpage = NULL; | ||
232 | } | ||
233 | |||
234 | if (PageError(backpage)) | ||
235 | goto io_error; | ||
236 | |||
237 | if (PageUptodate(backpage)) | ||
238 | goto backing_page_already_uptodate; | ||
239 | |||
240 | if (!trylock_page(backpage)) | ||
241 | goto monitor_backing_page; | ||
242 | _debug("read %p {%lx}", backpage, backpage->flags); | ||
243 | goto read_backing_page; | ||
244 | |||
245 | /* the backing page is already up to date, attach the netfs | ||
246 | * page to the pagecache and LRU and copy the data across */ | ||
247 | backing_page_already_uptodate: | ||
248 | _debug("- uptodate"); | ||
249 | |||
250 | pagevec_add(pagevec, netpage); | ||
251 | fscache_mark_pages_cached(op, pagevec); | ||
252 | |||
253 | copy_highpage(netpage, backpage); | ||
254 | fscache_end_io(op, netpage, 0); | ||
255 | |||
256 | success: | ||
257 | _debug("success"); | ||
258 | ret = 0; | ||
259 | |||
260 | out: | ||
261 | if (backpage) | ||
262 | page_cache_release(backpage); | ||
263 | if (monitor) { | ||
264 | fscache_put_retrieval(monitor->op); | ||
265 | kfree(monitor); | ||
266 | } | ||
267 | _leave(" = %d", ret); | ||
268 | return ret; | ||
269 | |||
270 | read_error: | ||
271 | _debug("read error %d", ret); | ||
272 | if (ret == -ENOMEM) | ||
273 | goto out; | ||
274 | io_error: | ||
275 | cachefiles_io_error_obj(object, "Page read error on backing file"); | ||
276 | ret = -ENOBUFS; | ||
277 | goto out; | ||
278 | |||
279 | nomem_page: | ||
280 | page_cache_release(newpage); | ||
281 | nomem_monitor: | ||
282 | fscache_put_retrieval(monitor->op); | ||
283 | kfree(monitor); | ||
284 | nomem: | ||
285 | _leave(" = -ENOMEM"); | ||
286 | return -ENOMEM; | ||
287 | } | ||
288 | |||
289 | /* | ||
290 | * read a page from the cache or allocate a block in which to store it | ||
291 | * - cache withdrawal is prevented by the caller | ||
292 | * - returns -EINTR if interrupted | ||
293 | * - returns -ENOMEM if ran out of memory | ||
294 | * - returns -ENOBUFS if no buffers can be made available | ||
295 | * - returns -ENOBUFS if page is beyond EOF | ||
296 | * - if the page is backed by a block in the cache: | ||
297 | * - a read will be started which will call the callback on completion | ||
298 | * - 0 will be returned | ||
299 | * - else if the page is unbacked: | ||
300 | * - the metadata will be retained | ||
301 | * - -ENODATA will be returned | ||
302 | */ | ||
303 | int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, | ||
304 | struct page *page, | ||
305 | gfp_t gfp) | ||
306 | { | ||
307 | struct cachefiles_object *object; | ||
308 | struct cachefiles_cache *cache; | ||
309 | struct pagevec pagevec; | ||
310 | struct inode *inode; | ||
311 | sector_t block0, block; | ||
312 | unsigned shift; | ||
313 | int ret; | ||
314 | |||
315 | object = container_of(op->op.object, | ||
316 | struct cachefiles_object, fscache); | ||
317 | cache = container_of(object->fscache.cache, | ||
318 | struct cachefiles_cache, cache); | ||
319 | |||
320 | _enter("{%p},{%lx},,,", object, page->index); | ||
321 | |||
322 | if (!object->backer) | ||
323 | return -ENOBUFS; | ||
324 | |||
325 | inode = object->backer->d_inode; | ||
326 | ASSERT(S_ISREG(inode->i_mode)); | ||
327 | ASSERT(inode->i_mapping->a_ops->bmap); | ||
328 | ASSERT(inode->i_mapping->a_ops->readpages); | ||
329 | |||
330 | /* calculate the shift required to use bmap */ | ||
331 | if (inode->i_sb->s_blocksize > PAGE_SIZE) | ||
332 | return -ENOBUFS; | ||
333 | |||
334 | shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; | ||
335 | |||
336 | op->op.flags = FSCACHE_OP_FAST; | ||
337 | op->op.processor = cachefiles_read_copier; | ||
338 | |||
339 | pagevec_init(&pagevec, 0); | ||
340 | |||
341 | /* we assume the absence or presence of the first block is a good | ||
342 | * enough indication for the page as a whole | ||
343 | * - TODO: don't use bmap() for this as it is _not_ actually good | ||
344 | * enough for this as it doesn't indicate errors, but it's all we've | ||
345 | * got for the moment | ||
346 | */ | ||
347 | block0 = page->index; | ||
348 | block0 <<= shift; | ||
349 | |||
350 | block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); | ||
351 | _debug("%llx -> %llx", | ||
352 | (unsigned long long) block0, | ||
353 | (unsigned long long) block); | ||
354 | |||
355 | if (block) { | ||
356 | /* submit the apparently valid page to the backing fs to be | ||
357 | * read from disk */ | ||
358 | ret = cachefiles_read_backing_file_one(object, op, page, | ||
359 | &pagevec); | ||
360 | } else if (cachefiles_has_space(cache, 0, 1) == 0) { | ||
361 | /* there's space in the cache we can use */ | ||
362 | pagevec_add(&pagevec, page); | ||
363 | fscache_mark_pages_cached(op, &pagevec); | ||
364 | ret = -ENODATA; | ||
365 | } else { | ||
366 | ret = -ENOBUFS; | ||
367 | } | ||
368 | |||
369 | _leave(" = %d", ret); | ||
370 | return ret; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * read the corresponding pages to the given set from the backing file | ||
375 | * - any uncertain pages are simply discarded, to be tried again another time | ||
376 | */ | ||
377 | static int cachefiles_read_backing_file(struct cachefiles_object *object, | ||
378 | struct fscache_retrieval *op, | ||
379 | struct list_head *list, | ||
380 | struct pagevec *mark_pvec) | ||
381 | { | ||
382 | struct cachefiles_one_read *monitor = NULL; | ||
383 | struct address_space *bmapping = object->backer->d_inode->i_mapping; | ||
384 | struct pagevec lru_pvec; | ||
385 | struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; | ||
386 | int ret = 0; | ||
387 | |||
388 | _enter(""); | ||
389 | |||
390 | pagevec_init(&lru_pvec, 0); | ||
391 | |||
392 | list_for_each_entry_safe(netpage, _n, list, lru) { | ||
393 | list_del(&netpage->lru); | ||
394 | |||
395 | _debug("read back %p{%lu,%d}", | ||
396 | netpage, netpage->index, page_count(netpage)); | ||
397 | |||
398 | if (!monitor) { | ||
399 | monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); | ||
400 | if (!monitor) | ||
401 | goto nomem; | ||
402 | |||
403 | monitor->op = fscache_get_retrieval(op); | ||
404 | init_waitqueue_func_entry(&monitor->monitor, | ||
405 | cachefiles_read_waiter); | ||
406 | } | ||
407 | |||
408 | for (;;) { | ||
409 | backpage = find_get_page(bmapping, netpage->index); | ||
410 | if (backpage) | ||
411 | goto backing_page_already_present; | ||
412 | |||
413 | if (!newpage) { | ||
414 | newpage = page_cache_alloc_cold(bmapping); | ||
415 | if (!newpage) | ||
416 | goto nomem; | ||
417 | } | ||
418 | |||
419 | ret = add_to_page_cache(newpage, bmapping, | ||
420 | netpage->index, GFP_KERNEL); | ||
421 | if (ret == 0) | ||
422 | goto installed_new_backing_page; | ||
423 | if (ret != -EEXIST) | ||
424 | goto nomem; | ||
425 | } | ||
426 | |||
427 | /* we've installed a new backing page, so now we need to add it | ||
428 | * to the LRU list and start it reading */ | ||
429 | installed_new_backing_page: | ||
430 | _debug("- new %p", newpage); | ||
431 | |||
432 | backpage = newpage; | ||
433 | newpage = NULL; | ||
434 | |||
435 | page_cache_get(backpage); | ||
436 | if (!pagevec_add(&lru_pvec, backpage)) | ||
437 | __pagevec_lru_add_file(&lru_pvec); | ||
438 | |||
439 | reread_backing_page: | ||
440 | ret = bmapping->a_ops->readpage(NULL, backpage); | ||
441 | if (ret < 0) | ||
442 | goto read_error; | ||
443 | |||
444 | /* add the netfs page to the pagecache and LRU, and set the | ||
445 | * monitor to transfer the data across */ | ||
446 | monitor_backing_page: | ||
447 | _debug("- monitor add"); | ||
448 | |||
449 | ret = add_to_page_cache(netpage, op->mapping, netpage->index, | ||
450 | GFP_KERNEL); | ||
451 | if (ret < 0) { | ||
452 | if (ret == -EEXIST) { | ||
453 | page_cache_release(netpage); | ||
454 | continue; | ||
455 | } | ||
456 | goto nomem; | ||
457 | } | ||
458 | |||
459 | page_cache_get(netpage); | ||
460 | if (!pagevec_add(&lru_pvec, netpage)) | ||
461 | __pagevec_lru_add_file(&lru_pvec); | ||
462 | |||
463 | /* install a monitor */ | ||
464 | page_cache_get(netpage); | ||
465 | monitor->netfs_page = netpage; | ||
466 | |||
467 | page_cache_get(backpage); | ||
468 | monitor->back_page = backpage; | ||
469 | monitor->monitor.private = backpage; | ||
470 | add_page_wait_queue(backpage, &monitor->monitor); | ||
471 | monitor = NULL; | ||
472 | |||
473 | /* but the page may have been read before the monitor was | ||
474 | * installed, so the monitor may miss the event - so we have to | ||
475 | * ensure that we do get one in such a case */ | ||
476 | if (trylock_page(backpage)) { | ||
477 | _debug("2unlock %p {%lx}", backpage, backpage->flags); | ||
478 | unlock_page(backpage); | ||
479 | } | ||
480 | |||
481 | page_cache_release(backpage); | ||
482 | backpage = NULL; | ||
483 | |||
484 | page_cache_release(netpage); | ||
485 | netpage = NULL; | ||
486 | continue; | ||
487 | |||
488 | /* if the backing page is already present, it can be in one of | ||
489 | * three states: read in progress, read failed or read okay */ | ||
490 | backing_page_already_present: | ||
491 | _debug("- present %p", backpage); | ||
492 | |||
493 | if (PageError(backpage)) | ||
494 | goto io_error; | ||
495 | |||
496 | if (PageUptodate(backpage)) | ||
497 | goto backing_page_already_uptodate; | ||
498 | |||
499 | _debug("- not ready %p{%lx}", backpage, backpage->flags); | ||
500 | |||
501 | if (!trylock_page(backpage)) | ||
502 | goto monitor_backing_page; | ||
503 | |||
504 | if (PageError(backpage)) { | ||
505 | _debug("error %lx", backpage->flags); | ||
506 | unlock_page(backpage); | ||
507 | goto io_error; | ||
508 | } | ||
509 | |||
510 | if (PageUptodate(backpage)) | ||
511 | goto backing_page_already_uptodate_unlock; | ||
512 | |||
513 | /* we've locked a page that's neither up to date nor erroneous, | ||
514 | * so we need to attempt to read it again */ | ||
515 | goto reread_backing_page; | ||
516 | |||
517 | /* the backing page is already up to date, attach the netfs | ||
518 | * page to the pagecache and LRU and copy the data across */ | ||
519 | backing_page_already_uptodate_unlock: | ||
520 | _debug("uptodate %lx", backpage->flags); | ||
521 | unlock_page(backpage); | ||
522 | backing_page_already_uptodate: | ||
523 | _debug("- uptodate"); | ||
524 | |||
525 | ret = add_to_page_cache(netpage, op->mapping, netpage->index, | ||
526 | GFP_KERNEL); | ||
527 | if (ret < 0) { | ||
528 | if (ret == -EEXIST) { | ||
529 | page_cache_release(netpage); | ||
530 | continue; | ||
531 | } | ||
532 | goto nomem; | ||
533 | } | ||
534 | |||
535 | copy_highpage(netpage, backpage); | ||
536 | |||
537 | page_cache_release(backpage); | ||
538 | backpage = NULL; | ||
539 | |||
540 | if (!pagevec_add(mark_pvec, netpage)) | ||
541 | fscache_mark_pages_cached(op, mark_pvec); | ||
542 | |||
543 | page_cache_get(netpage); | ||
544 | if (!pagevec_add(&lru_pvec, netpage)) | ||
545 | __pagevec_lru_add_file(&lru_pvec); | ||
546 | |||
547 | fscache_end_io(op, netpage, 0); | ||
548 | page_cache_release(netpage); | ||
549 | netpage = NULL; | ||
550 | continue; | ||
551 | } | ||
552 | |||
553 | netpage = NULL; | ||
554 | |||
555 | _debug("out"); | ||
556 | |||
557 | out: | ||
558 | /* tidy up */ | ||
559 | pagevec_lru_add_file(&lru_pvec); | ||
560 | |||
561 | if (newpage) | ||
562 | page_cache_release(newpage); | ||
563 | if (netpage) | ||
564 | page_cache_release(netpage); | ||
565 | if (backpage) | ||
566 | page_cache_release(backpage); | ||
567 | if (monitor) { | ||
568 | fscache_put_retrieval(op); | ||
569 | kfree(monitor); | ||
570 | } | ||
571 | |||
572 | list_for_each_entry_safe(netpage, _n, list, lru) { | ||
573 | list_del(&netpage->lru); | ||
574 | page_cache_release(netpage); | ||
575 | } | ||
576 | |||
577 | _leave(" = %d", ret); | ||
578 | return ret; | ||
579 | |||
580 | nomem: | ||
581 | _debug("nomem"); | ||
582 | ret = -ENOMEM; | ||
583 | goto out; | ||
584 | |||
585 | read_error: | ||
586 | _debug("read error %d", ret); | ||
587 | if (ret == -ENOMEM) | ||
588 | goto out; | ||
589 | io_error: | ||
590 | cachefiles_io_error_obj(object, "Page read error on backing file"); | ||
591 | ret = -ENOBUFS; | ||
592 | goto out; | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * read a list of pages from the cache or allocate blocks in which to store | ||
597 | * them | ||
598 | */ | ||
599 | int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, | ||
600 | struct list_head *pages, | ||
601 | unsigned *nr_pages, | ||
602 | gfp_t gfp) | ||
603 | { | ||
604 | struct cachefiles_object *object; | ||
605 | struct cachefiles_cache *cache; | ||
606 | struct list_head backpages; | ||
607 | struct pagevec pagevec; | ||
608 | struct inode *inode; | ||
609 | struct page *page, *_n; | ||
610 | unsigned shift, nrbackpages; | ||
611 | int ret, ret2, space; | ||
612 | |||
613 | object = container_of(op->op.object, | ||
614 | struct cachefiles_object, fscache); | ||
615 | cache = container_of(object->fscache.cache, | ||
616 | struct cachefiles_cache, cache); | ||
617 | |||
618 | _enter("{OBJ%x,%d},,%d,,", | ||
619 | object->fscache.debug_id, atomic_read(&op->op.usage), | ||
620 | *nr_pages); | ||
621 | |||
622 | if (!object->backer) | ||
623 | return -ENOBUFS; | ||
624 | |||
625 | space = 1; | ||
626 | if (cachefiles_has_space(cache, 0, *nr_pages) < 0) | ||
627 | space = 0; | ||
628 | |||
629 | inode = object->backer->d_inode; | ||
630 | ASSERT(S_ISREG(inode->i_mode)); | ||
631 | ASSERT(inode->i_mapping->a_ops->bmap); | ||
632 | ASSERT(inode->i_mapping->a_ops->readpages); | ||
633 | |||
634 | /* calculate the shift required to use bmap */ | ||
635 | if (inode->i_sb->s_blocksize > PAGE_SIZE) | ||
636 | return -ENOBUFS; | ||
637 | |||
638 | shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; | ||
639 | |||
640 | pagevec_init(&pagevec, 0); | ||
641 | |||
642 | op->op.flags = FSCACHE_OP_FAST; | ||
643 | op->op.processor = cachefiles_read_copier; | ||
644 | |||
645 | INIT_LIST_HEAD(&backpages); | ||
646 | nrbackpages = 0; | ||
647 | |||
648 | ret = space ? -ENODATA : -ENOBUFS; | ||
649 | list_for_each_entry_safe(page, _n, pages, lru) { | ||
650 | sector_t block0, block; | ||
651 | |||
652 | /* we assume the absence or presence of the first block is a | ||
653 | * good enough indication for the page as a whole | ||
654 | * - TODO: don't use bmap() for this as it is _not_ actually | ||
655 | * good enough for this as it doesn't indicate errors, but | ||
656 | * it's all we've got for the moment | ||
657 | */ | ||
658 | block0 = page->index; | ||
659 | block0 <<= shift; | ||
660 | |||
661 | block = inode->i_mapping->a_ops->bmap(inode->i_mapping, | ||
662 | block0); | ||
663 | _debug("%llx -> %llx", | ||
664 | (unsigned long long) block0, | ||
665 | (unsigned long long) block); | ||
666 | |||
667 | if (block) { | ||
668 | /* we have data - add it to the list to give to the | ||
669 | * backing fs */ | ||
670 | list_move(&page->lru, &backpages); | ||
671 | (*nr_pages)--; | ||
672 | nrbackpages++; | ||
673 | } else if (space && pagevec_add(&pagevec, page) == 0) { | ||
674 | fscache_mark_pages_cached(op, &pagevec); | ||
675 | ret = -ENODATA; | ||
676 | } | ||
677 | } | ||
678 | |||
679 | if (pagevec_count(&pagevec) > 0) | ||
680 | fscache_mark_pages_cached(op, &pagevec); | ||
681 | |||
682 | if (list_empty(pages)) | ||
683 | ret = 0; | ||
684 | |||
685 | /* submit the apparently valid pages to the backing fs to be read from | ||
686 | * disk */ | ||
687 | if (nrbackpages > 0) { | ||
688 | ret2 = cachefiles_read_backing_file(object, op, &backpages, | ||
689 | &pagevec); | ||
690 | if (ret2 == -ENOMEM || ret2 == -EINTR) | ||
691 | ret = ret2; | ||
692 | } | ||
693 | |||
694 | if (pagevec_count(&pagevec) > 0) | ||
695 | fscache_mark_pages_cached(op, &pagevec); | ||
696 | |||
697 | _leave(" = %d [nr=%u%s]", | ||
698 | ret, *nr_pages, list_empty(pages) ? " empty" : ""); | ||
699 | return ret; | ||
700 | } | ||
701 | |||
702 | /* | ||
703 | * allocate a block in the cache in which to store a page | ||
704 | * - cache withdrawal is prevented by the caller | ||
705 | * - returns -EINTR if interrupted | ||
706 | * - returns -ENOMEM if ran out of memory | ||
707 | * - returns -ENOBUFS if no buffers can be made available | ||
708 | * - returns -ENOBUFS if page is beyond EOF | ||
709 | * - otherwise: | ||
710 | * - the metadata will be retained | ||
711 | * - 0 will be returned | ||
712 | */ | ||
713 | int cachefiles_allocate_page(struct fscache_retrieval *op, | ||
714 | struct page *page, | ||
715 | gfp_t gfp) | ||
716 | { | ||
717 | struct cachefiles_object *object; | ||
718 | struct cachefiles_cache *cache; | ||
719 | struct pagevec pagevec; | ||
720 | int ret; | ||
721 | |||
722 | object = container_of(op->op.object, | ||
723 | struct cachefiles_object, fscache); | ||
724 | cache = container_of(object->fscache.cache, | ||
725 | struct cachefiles_cache, cache); | ||
726 | |||
727 | _enter("%p,{%lx},", object, page->index); | ||
728 | |||
729 | ret = cachefiles_has_space(cache, 0, 1); | ||
730 | if (ret == 0) { | ||
731 | pagevec_init(&pagevec, 0); | ||
732 | pagevec_add(&pagevec, page); | ||
733 | fscache_mark_pages_cached(op, &pagevec); | ||
734 | } else { | ||
735 | ret = -ENOBUFS; | ||
736 | } | ||
737 | |||
738 | _leave(" = %d", ret); | ||
739 | return ret; | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * allocate blocks in the cache in which to store a set of pages | ||
744 | * - cache withdrawal is prevented by the caller | ||
745 | * - returns -EINTR if interrupted | ||
746 | * - returns -ENOMEM if ran out of memory | ||
747 | * - returns -ENOBUFS if some buffers couldn't be made available | ||
748 | * - returns -ENOBUFS if some pages are beyond EOF | ||
749 | * - otherwise: | ||
750 | * - -ENODATA will be returned | ||
751 | * - metadata will be retained for any page marked | ||
752 | */ | ||
753 | int cachefiles_allocate_pages(struct fscache_retrieval *op, | ||
754 | struct list_head *pages, | ||
755 | unsigned *nr_pages, | ||
756 | gfp_t gfp) | ||
757 | { | ||
758 | struct cachefiles_object *object; | ||
759 | struct cachefiles_cache *cache; | ||
760 | struct pagevec pagevec; | ||
761 | struct page *page; | ||
762 | int ret; | ||
763 | |||
764 | object = container_of(op->op.object, | ||
765 | struct cachefiles_object, fscache); | ||
766 | cache = container_of(object->fscache.cache, | ||
767 | struct cachefiles_cache, cache); | ||
768 | |||
769 | _enter("%p,,,%d,", object, *nr_pages); | ||
770 | |||
771 | ret = cachefiles_has_space(cache, 0, *nr_pages); | ||
772 | if (ret == 0) { | ||
773 | pagevec_init(&pagevec, 0); | ||
774 | |||
775 | list_for_each_entry(page, pages, lru) { | ||
776 | if (pagevec_add(&pagevec, page) == 0) | ||
777 | fscache_mark_pages_cached(op, &pagevec); | ||
778 | } | ||
779 | |||
780 | if (pagevec_count(&pagevec) > 0) | ||
781 | fscache_mark_pages_cached(op, &pagevec); | ||
782 | ret = -ENODATA; | ||
783 | } else { | ||
784 | ret = -ENOBUFS; | ||
785 | } | ||
786 | |||
787 | _leave(" = %d", ret); | ||
788 | return ret; | ||
789 | } | ||
790 | |||
791 | /* | ||
792 | * request a page be stored in the cache | ||
793 | * - cache withdrawal is prevented by the caller | ||
794 | * - this request may be ignored if there's no cache block available, in which | ||
795 | * case -ENOBUFS will be returned | ||
796 | * - if the op is in progress, 0 will be returned | ||
797 | */ | ||
798 | int cachefiles_write_page(struct fscache_storage *op, struct page *page) | ||
799 | { | ||
800 | struct cachefiles_object *object; | ||
801 | struct cachefiles_cache *cache; | ||
802 | mm_segment_t old_fs; | ||
803 | struct file *file; | ||
804 | loff_t pos; | ||
805 | void *data; | ||
806 | int ret; | ||
807 | |||
808 | ASSERT(op != NULL); | ||
809 | ASSERT(page != NULL); | ||
810 | |||
811 | object = container_of(op->op.object, | ||
812 | struct cachefiles_object, fscache); | ||
813 | |||
814 | _enter("%p,%p{%lx},,,", object, page, page->index); | ||
815 | |||
816 | if (!object->backer) { | ||
817 | _leave(" = -ENOBUFS"); | ||
818 | return -ENOBUFS; | ||
819 | } | ||
820 | |||
821 | ASSERT(S_ISREG(object->backer->d_inode->i_mode)); | ||
822 | |||
823 | cache = container_of(object->fscache.cache, | ||
824 | struct cachefiles_cache, cache); | ||
825 | |||
826 | /* write the page to the backing filesystem and let it store it in its | ||
827 | * own time */ | ||
828 | dget(object->backer); | ||
829 | mntget(cache->mnt); | ||
830 | file = dentry_open(object->backer, cache->mnt, O_RDWR, | ||
831 | cache->cache_cred); | ||
832 | if (IS_ERR(file)) { | ||
833 | ret = PTR_ERR(file); | ||
834 | } else { | ||
835 | ret = -EIO; | ||
836 | if (file->f_op->write) { | ||
837 | pos = (loff_t) page->index << PAGE_SHIFT; | ||
838 | data = kmap(page); | ||
839 | old_fs = get_fs(); | ||
840 | set_fs(KERNEL_DS); | ||
841 | ret = file->f_op->write( | ||
842 | file, (const void __user *) data, PAGE_SIZE, | ||
843 | &pos); | ||
844 | set_fs(old_fs); | ||
845 | kunmap(page); | ||
846 | if (ret != PAGE_SIZE) | ||
847 | ret = -EIO; | ||
848 | } | ||
849 | fput(file); | ||
850 | } | ||
851 | |||
852 | if (ret < 0) { | ||
853 | if (ret == -EIO) | ||
854 | cachefiles_io_error_obj( | ||
855 | object, "Write page to backing file failed"); | ||
856 | ret = -ENOBUFS; | ||
857 | } | ||
858 | |||
859 | _leave(" = %d", ret); | ||
860 | return ret; | ||
861 | } | ||
862 | |||
863 | /* | ||
864 | * detach a backing block from a page | ||
865 | * - cache withdrawal is prevented by the caller | ||
866 | */ | ||
867 | void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) | ||
868 | { | ||
869 | struct cachefiles_object *object; | ||
870 | struct cachefiles_cache *cache; | ||
871 | |||
872 | object = container_of(_object, struct cachefiles_object, fscache); | ||
873 | cache = container_of(object->fscache.cache, | ||
874 | struct cachefiles_cache, cache); | ||
875 | |||
876 | _enter("%p,{%lu}", object, page->index); | ||
877 | |||
878 | spin_unlock(&object->fscache.cookie->lock); | ||
879 | } | ||
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c new file mode 100644 index 000000000000..b5808cdb2232 --- /dev/null +++ b/fs/cachefiles/security.c | |||
@@ -0,0 +1,116 @@ | |||
1 | /* CacheFiles security management | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | #include <linux/cred.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | /* | ||
17 | * determine the security context within which we access the cache from within | ||
18 | * the kernel | ||
19 | */ | ||
20 | int cachefiles_get_security_ID(struct cachefiles_cache *cache) | ||
21 | { | ||
22 | struct cred *new; | ||
23 | int ret; | ||
24 | |||
25 | _enter("{%s}", cache->secctx); | ||
26 | |||
27 | new = prepare_kernel_cred(current); | ||
28 | if (!new) { | ||
29 | ret = -ENOMEM; | ||
30 | goto error; | ||
31 | } | ||
32 | |||
33 | if (cache->secctx) { | ||
34 | ret = set_security_override_from_ctx(new, cache->secctx); | ||
35 | if (ret < 0) { | ||
36 | put_cred(new); | ||
37 | printk(KERN_ERR "CacheFiles:" | ||
38 | " Security denies permission to nominate" | ||
39 | " security context: error %d\n", | ||
40 | ret); | ||
41 | goto error; | ||
42 | } | ||
43 | } | ||
44 | |||
45 | cache->cache_cred = new; | ||
46 | ret = 0; | ||
47 | error: | ||
48 | _leave(" = %d", ret); | ||
49 | return ret; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * see if mkdir and create can be performed in the root directory | ||
54 | */ | ||
55 | static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, | ||
56 | struct dentry *root) | ||
57 | { | ||
58 | int ret; | ||
59 | |||
60 | ret = security_inode_mkdir(root->d_inode, root, 0); | ||
61 | if (ret < 0) { | ||
62 | printk(KERN_ERR "CacheFiles:" | ||
63 | " Security denies permission to make dirs: error %d", | ||
64 | ret); | ||
65 | return ret; | ||
66 | } | ||
67 | |||
68 | ret = security_inode_create(root->d_inode, root, 0); | ||
69 | if (ret < 0) | ||
70 | printk(KERN_ERR "CacheFiles:" | ||
71 | " Security denies permission to create files: error %d", | ||
72 | ret); | ||
73 | |||
74 | return ret; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * check the security details of the on-disk cache | ||
79 | * - must be called with security override in force | ||
80 | */ | ||
81 | int cachefiles_determine_cache_security(struct cachefiles_cache *cache, | ||
82 | struct dentry *root, | ||
83 | const struct cred **_saved_cred) | ||
84 | { | ||
85 | struct cred *new; | ||
86 | int ret; | ||
87 | |||
88 | _enter(""); | ||
89 | |||
90 | /* duplicate the cache creds for COW (the override is currently in | ||
91 | * force, so we can use prepare_creds() to do this) */ | ||
92 | new = prepare_creds(); | ||
93 | if (!new) | ||
94 | return -ENOMEM; | ||
95 | |||
96 | cachefiles_end_secure(cache, *_saved_cred); | ||
97 | |||
98 | /* use the cache root dir's security context as the basis with | ||
99 | * which create files */ | ||
100 | ret = set_create_files_as(new, root->d_inode); | ||
101 | if (ret < 0) { | ||
102 | _leave(" = %d [cfa]", ret); | ||
103 | return ret; | ||
104 | } | ||
105 | |||
106 | put_cred(cache->cache_cred); | ||
107 | cache->cache_cred = new; | ||
108 | |||
109 | cachefiles_begin_secure(cache, _saved_cred); | ||
110 | ret = cachefiles_check_cache_dir(cache, root); | ||
111 | |||
112 | if (ret == -EOPNOTSUPP) | ||
113 | ret = 0; | ||
114 | _leave(" = %d", ret); | ||
115 | return ret; | ||
116 | } | ||
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c new file mode 100644 index 000000000000..f3e7a0bf068b --- /dev/null +++ b/fs/cachefiles/xattr.c | |||
@@ -0,0 +1,291 @@ | |||
1 | /* CacheFiles extended attribute management | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/file.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/fsnotify.h> | ||
17 | #include <linux/quotaops.h> | ||
18 | #include <linux/xattr.h> | ||
19 | #include "internal.h" | ||
20 | |||
21 | static const char cachefiles_xattr_cache[] = | ||
22 | XATTR_USER_PREFIX "CacheFiles.cache"; | ||
23 | |||
24 | /* | ||
25 | * check the type label on an object | ||
26 | * - done using xattrs | ||
27 | */ | ||
28 | int cachefiles_check_object_type(struct cachefiles_object *object) | ||
29 | { | ||
30 | struct dentry *dentry = object->dentry; | ||
31 | char type[3], xtype[3]; | ||
32 | int ret; | ||
33 | |||
34 | ASSERT(dentry); | ||
35 | ASSERT(dentry->d_inode); | ||
36 | |||
37 | if (!object->fscache.cookie) | ||
38 | strcpy(type, "C3"); | ||
39 | else | ||
40 | snprintf(type, 3, "%02x", object->fscache.cookie->def->type); | ||
41 | |||
42 | _enter("%p{%s}", object, type); | ||
43 | |||
44 | /* attempt to install a type label directly */ | ||
45 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2, | ||
46 | XATTR_CREATE); | ||
47 | if (ret == 0) { | ||
48 | _debug("SET"); /* we succeeded */ | ||
49 | goto error; | ||
50 | } | ||
51 | |||
52 | if (ret != -EEXIST) { | ||
53 | kerror("Can't set xattr on %*.*s [%lu] (err %d)", | ||
54 | dentry->d_name.len, dentry->d_name.len, | ||
55 | dentry->d_name.name, dentry->d_inode->i_ino, | ||
56 | -ret); | ||
57 | goto error; | ||
58 | } | ||
59 | |||
60 | /* read the current type label */ | ||
61 | ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3); | ||
62 | if (ret < 0) { | ||
63 | if (ret == -ERANGE) | ||
64 | goto bad_type_length; | ||
65 | |||
66 | kerror("Can't read xattr on %*.*s [%lu] (err %d)", | ||
67 | dentry->d_name.len, dentry->d_name.len, | ||
68 | dentry->d_name.name, dentry->d_inode->i_ino, | ||
69 | -ret); | ||
70 | goto error; | ||
71 | } | ||
72 | |||
73 | /* check the type is what we're expecting */ | ||
74 | if (ret != 2) | ||
75 | goto bad_type_length; | ||
76 | |||
77 | if (xtype[0] != type[0] || xtype[1] != type[1]) | ||
78 | goto bad_type; | ||
79 | |||
80 | ret = 0; | ||
81 | |||
82 | error: | ||
83 | _leave(" = %d", ret); | ||
84 | return ret; | ||
85 | |||
86 | bad_type_length: | ||
87 | kerror("Cache object %lu type xattr length incorrect", | ||
88 | dentry->d_inode->i_ino); | ||
89 | ret = -EIO; | ||
90 | goto error; | ||
91 | |||
92 | bad_type: | ||
93 | xtype[2] = 0; | ||
94 | kerror("Cache object %*.*s [%lu] type %s not %s", | ||
95 | dentry->d_name.len, dentry->d_name.len, | ||
96 | dentry->d_name.name, dentry->d_inode->i_ino, | ||
97 | xtype, type); | ||
98 | ret = -EIO; | ||
99 | goto error; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * set the state xattr on a cache file | ||
104 | */ | ||
105 | int cachefiles_set_object_xattr(struct cachefiles_object *object, | ||
106 | struct cachefiles_xattr *auxdata) | ||
107 | { | ||
108 | struct dentry *dentry = object->dentry; | ||
109 | int ret; | ||
110 | |||
111 | ASSERT(object->fscache.cookie); | ||
112 | ASSERT(dentry); | ||
113 | |||
114 | _enter("%p,#%d", object, auxdata->len); | ||
115 | |||
116 | /* attempt to install the cache metadata directly */ | ||
117 | _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); | ||
118 | |||
119 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | ||
120 | &auxdata->type, auxdata->len, | ||
121 | XATTR_CREATE); | ||
122 | if (ret < 0 && ret != -ENOMEM) | ||
123 | cachefiles_io_error_obj( | ||
124 | object, | ||
125 | "Failed to set xattr with error %d", ret); | ||
126 | |||
127 | _leave(" = %d", ret); | ||
128 | return ret; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * update the state xattr on a cache file | ||
133 | */ | ||
134 | int cachefiles_update_object_xattr(struct cachefiles_object *object, | ||
135 | struct cachefiles_xattr *auxdata) | ||
136 | { | ||
137 | struct dentry *dentry = object->dentry; | ||
138 | int ret; | ||
139 | |||
140 | ASSERT(object->fscache.cookie); | ||
141 | ASSERT(dentry); | ||
142 | |||
143 | _enter("%p,#%d", object, auxdata->len); | ||
144 | |||
145 | /* attempt to install the cache metadata directly */ | ||
146 | _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); | ||
147 | |||
148 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | ||
149 | &auxdata->type, auxdata->len, | ||
150 | XATTR_REPLACE); | ||
151 | if (ret < 0 && ret != -ENOMEM) | ||
152 | cachefiles_io_error_obj( | ||
153 | object, | ||
154 | "Failed to update xattr with error %d", ret); | ||
155 | |||
156 | _leave(" = %d", ret); | ||
157 | return ret; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * check the state xattr on a cache file | ||
162 | * - return -ESTALE if the object should be deleted | ||
163 | */ | ||
164 | int cachefiles_check_object_xattr(struct cachefiles_object *object, | ||
165 | struct cachefiles_xattr *auxdata) | ||
166 | { | ||
167 | struct cachefiles_xattr *auxbuf; | ||
168 | struct dentry *dentry = object->dentry; | ||
169 | int ret; | ||
170 | |||
171 | _enter("%p,#%d", object, auxdata->len); | ||
172 | |||
173 | ASSERT(dentry); | ||
174 | ASSERT(dentry->d_inode); | ||
175 | |||
176 | auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); | ||
177 | if (!auxbuf) { | ||
178 | _leave(" = -ENOMEM"); | ||
179 | return -ENOMEM; | ||
180 | } | ||
181 | |||
182 | /* read the current type label */ | ||
183 | ret = vfs_getxattr(dentry, cachefiles_xattr_cache, | ||
184 | &auxbuf->type, 512 + 1); | ||
185 | if (ret < 0) { | ||
186 | if (ret == -ENODATA) | ||
187 | goto stale; /* no attribute - power went off | ||
188 | * mid-cull? */ | ||
189 | |||
190 | if (ret == -ERANGE) | ||
191 | goto bad_type_length; | ||
192 | |||
193 | cachefiles_io_error_obj(object, | ||
194 | "Can't read xattr on %lu (err %d)", | ||
195 | dentry->d_inode->i_ino, -ret); | ||
196 | goto error; | ||
197 | } | ||
198 | |||
199 | /* check the on-disk object */ | ||
200 | if (ret < 1) | ||
201 | goto bad_type_length; | ||
202 | |||
203 | if (auxbuf->type != auxdata->type) | ||
204 | goto stale; | ||
205 | |||
206 | auxbuf->len = ret; | ||
207 | |||
208 | /* consult the netfs */ | ||
209 | if (object->fscache.cookie->def->check_aux) { | ||
210 | enum fscache_checkaux result; | ||
211 | unsigned int dlen; | ||
212 | |||
213 | dlen = auxbuf->len - 1; | ||
214 | |||
215 | _debug("checkaux %s #%u", | ||
216 | object->fscache.cookie->def->name, dlen); | ||
217 | |||
218 | result = fscache_check_aux(&object->fscache, | ||
219 | &auxbuf->data, dlen); | ||
220 | |||
221 | switch (result) { | ||
222 | /* entry okay as is */ | ||
223 | case FSCACHE_CHECKAUX_OKAY: | ||
224 | goto okay; | ||
225 | |||
226 | /* entry requires update */ | ||
227 | case FSCACHE_CHECKAUX_NEEDS_UPDATE: | ||
228 | break; | ||
229 | |||
230 | /* entry requires deletion */ | ||
231 | case FSCACHE_CHECKAUX_OBSOLETE: | ||
232 | goto stale; | ||
233 | |||
234 | default: | ||
235 | BUG(); | ||
236 | } | ||
237 | |||
238 | /* update the current label */ | ||
239 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | ||
240 | &auxdata->type, auxdata->len, | ||
241 | XATTR_REPLACE); | ||
242 | if (ret < 0) { | ||
243 | cachefiles_io_error_obj(object, | ||
244 | "Can't update xattr on %lu" | ||
245 | " (error %d)", | ||
246 | dentry->d_inode->i_ino, -ret); | ||
247 | goto error; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | okay: | ||
252 | ret = 0; | ||
253 | |||
254 | error: | ||
255 | kfree(auxbuf); | ||
256 | _leave(" = %d", ret); | ||
257 | return ret; | ||
258 | |||
259 | bad_type_length: | ||
260 | kerror("Cache object %lu xattr length incorrect", | ||
261 | dentry->d_inode->i_ino); | ||
262 | ret = -EIO; | ||
263 | goto error; | ||
264 | |||
265 | stale: | ||
266 | ret = -ESTALE; | ||
267 | goto error; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * remove the object's xattr to mark it stale | ||
272 | */ | ||
273 | int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, | ||
274 | struct dentry *dentry) | ||
275 | { | ||
276 | int ret; | ||
277 | |||
278 | ret = vfs_removexattr(dentry, cachefiles_xattr_cache); | ||
279 | if (ret < 0) { | ||
280 | if (ret == -ENOENT || ret == -ENODATA) | ||
281 | ret = 0; | ||
282 | else if (ret != -ENOMEM) | ||
283 | cachefiles_io_error(cache, | ||
284 | "Can't remove xattr from %lu" | ||
285 | " (error %d)", | ||
286 | dentry->d_inode->i_ino, -ret); | ||
287 | } | ||
288 | |||
289 | _leave(" = %d", ret); | ||
290 | return ret; | ||
291 | } | ||
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 2f35cccfcd8d..54dce78fbb73 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -254,7 +254,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
254 | return -ENOMEM; | 254 | return -ENOMEM; |
255 | } | 255 | } |
256 | 256 | ||
257 | mode &= ~current->fs->umask; | 257 | mode &= ~current_umask(); |
258 | if (oplockEnabled) | 258 | if (oplockEnabled) |
259 | oplock = REQ_OPLOCK; | 259 | oplock = REQ_OPLOCK; |
260 | 260 | ||
@@ -479,7 +479,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
479 | rc = -ENOMEM; | 479 | rc = -ENOMEM; |
480 | else if (pTcon->unix_ext) { | 480 | else if (pTcon->unix_ext) { |
481 | struct cifs_unix_set_info_args args = { | 481 | struct cifs_unix_set_info_args args = { |
482 | .mode = mode & ~current->fs->umask, | 482 | .mode = mode & ~current_umask(), |
483 | .ctime = NO_CHANGE_64, | 483 | .ctime = NO_CHANGE_64, |
484 | .atime = NO_CHANGE_64, | 484 | .atime = NO_CHANGE_64, |
485 | .mtime = NO_CHANGE_64, | 485 | .mtime = NO_CHANGE_64, |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a8797cc60805..f121a80fdd6f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -1125,7 +1125,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
1125 | goto mkdir_out; | 1125 | goto mkdir_out; |
1126 | } | 1126 | } |
1127 | 1127 | ||
1128 | mode &= ~current->fs->umask; | 1128 | mode &= ~current_umask(); |
1129 | rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT, | 1129 | rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT, |
1130 | mode, NULL /* netfid */, pInfo, &oplock, | 1130 | mode, NULL /* netfid */, pInfo, &oplock, |
1131 | full_path, cifs_sb->local_nls, | 1131 | full_path, cifs_sb->local_nls, |
@@ -1204,7 +1204,7 @@ mkdir_get_info: | |||
1204 | if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) | 1204 | if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) |
1205 | direntry->d_inode->i_nlink = 2; | 1205 | direntry->d_inode->i_nlink = 2; |
1206 | 1206 | ||
1207 | mode &= ~current->fs->umask; | 1207 | mode &= ~current_umask(); |
1208 | /* must turn on setgid bit if parent dir has it */ | 1208 | /* must turn on setgid bit if parent dir has it */ |
1209 | if (inode->i_mode & S_ISGID) | 1209 | if (inode->i_mode & S_ISGID) |
1210 | mode |= S_ISGID; | 1210 | mode |= S_ISGID; |
diff --git a/fs/compat.c b/fs/compat.c index 55efdfebdf5a..3f84d5f15889 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/poll.h> | 51 | #include <linux/poll.h> |
52 | #include <linux/mm.h> | 52 | #include <linux/mm.h> |
53 | #include <linux/eventpoll.h> | 53 | #include <linux/eventpoll.h> |
54 | #include <linux/fs_struct.h> | ||
54 | 55 | ||
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/mmu_context.h> | 57 | #include <asm/mmu_context.h> |
@@ -1195,16 +1196,12 @@ out: | |||
1195 | return ret; | 1196 | return ret; |
1196 | } | 1197 | } |
1197 | 1198 | ||
1198 | asmlinkage ssize_t | 1199 | static size_t compat_readv(struct file *file, |
1199 | compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen) | 1200 | const struct compat_iovec __user *vec, |
1201 | unsigned long vlen, loff_t *pos) | ||
1200 | { | 1202 | { |
1201 | struct file *file; | ||
1202 | ssize_t ret = -EBADF; | 1203 | ssize_t ret = -EBADF; |
1203 | 1204 | ||
1204 | file = fget(fd); | ||
1205 | if (!file) | ||
1206 | return -EBADF; | ||
1207 | |||
1208 | if (!(file->f_mode & FMODE_READ)) | 1205 | if (!(file->f_mode & FMODE_READ)) |
1209 | goto out; | 1206 | goto out; |
1210 | 1207 | ||
@@ -1212,25 +1209,56 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsign | |||
1212 | if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) | 1209 | if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) |
1213 | goto out; | 1210 | goto out; |
1214 | 1211 | ||
1215 | ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos); | 1212 | ret = compat_do_readv_writev(READ, file, vec, vlen, pos); |
1216 | 1213 | ||
1217 | out: | 1214 | out: |
1218 | if (ret > 0) | 1215 | if (ret > 0) |
1219 | add_rchar(current, ret); | 1216 | add_rchar(current, ret); |
1220 | inc_syscr(current); | 1217 | inc_syscr(current); |
1221 | fput(file); | ||
1222 | return ret; | 1218 | return ret; |
1223 | } | 1219 | } |
1224 | 1220 | ||
1225 | asmlinkage ssize_t | 1221 | asmlinkage ssize_t |
1226 | compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen) | 1222 | compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, |
1223 | unsigned long vlen) | ||
1227 | { | 1224 | { |
1228 | struct file *file; | 1225 | struct file *file; |
1229 | ssize_t ret = -EBADF; | 1226 | int fput_needed; |
1227 | ssize_t ret; | ||
1230 | 1228 | ||
1231 | file = fget(fd); | 1229 | file = fget_light(fd, &fput_needed); |
1232 | if (!file) | 1230 | if (!file) |
1233 | return -EBADF; | 1231 | return -EBADF; |
1232 | ret = compat_readv(file, vec, vlen, &file->f_pos); | ||
1233 | fput_light(file, fput_needed); | ||
1234 | return ret; | ||
1235 | } | ||
1236 | |||
1237 | asmlinkage ssize_t | ||
1238 | compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec, | ||
1239 | unsigned long vlen, u32 pos_low, u32 pos_high) | ||
1240 | { | ||
1241 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | ||
1242 | struct file *file; | ||
1243 | int fput_needed; | ||
1244 | ssize_t ret; | ||
1245 | |||
1246 | if (pos < 0) | ||
1247 | return -EINVAL; | ||
1248 | file = fget_light(fd, &fput_needed); | ||
1249 | if (!file) | ||
1250 | return -EBADF; | ||
1251 | ret = compat_readv(file, vec, vlen, &pos); | ||
1252 | fput_light(file, fput_needed); | ||
1253 | return ret; | ||
1254 | } | ||
1255 | |||
1256 | static size_t compat_writev(struct file *file, | ||
1257 | const struct compat_iovec __user *vec, | ||
1258 | unsigned long vlen, loff_t *pos) | ||
1259 | { | ||
1260 | ssize_t ret = -EBADF; | ||
1261 | |||
1234 | if (!(file->f_mode & FMODE_WRITE)) | 1262 | if (!(file->f_mode & FMODE_WRITE)) |
1235 | goto out; | 1263 | goto out; |
1236 | 1264 | ||
@@ -1238,13 +1266,47 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsig | |||
1238 | if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) | 1266 | if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) |
1239 | goto out; | 1267 | goto out; |
1240 | 1268 | ||
1241 | ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos); | 1269 | ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); |
1242 | 1270 | ||
1243 | out: | 1271 | out: |
1244 | if (ret > 0) | 1272 | if (ret > 0) |
1245 | add_wchar(current, ret); | 1273 | add_wchar(current, ret); |
1246 | inc_syscw(current); | 1274 | inc_syscw(current); |
1247 | fput(file); | 1275 | return ret; |
1276 | } | ||
1277 | |||
1278 | asmlinkage ssize_t | ||
1279 | compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, | ||
1280 | unsigned long vlen) | ||
1281 | { | ||
1282 | struct file *file; | ||
1283 | int fput_needed; | ||
1284 | ssize_t ret; | ||
1285 | |||
1286 | file = fget_light(fd, &fput_needed); | ||
1287 | if (!file) | ||
1288 | return -EBADF; | ||
1289 | ret = compat_writev(file, vec, vlen, &file->f_pos); | ||
1290 | fput_light(file, fput_needed); | ||
1291 | return ret; | ||
1292 | } | ||
1293 | |||
1294 | asmlinkage ssize_t | ||
1295 | compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, | ||
1296 | unsigned long vlen, u32 pos_low, u32 pos_high) | ||
1297 | { | ||
1298 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | ||
1299 | struct file *file; | ||
1300 | int fput_needed; | ||
1301 | ssize_t ret; | ||
1302 | |||
1303 | if (pos < 0) | ||
1304 | return -EINVAL; | ||
1305 | file = fget_light(fd, &fput_needed); | ||
1306 | if (!file) | ||
1307 | return -EBADF; | ||
1308 | ret = compat_writev(file, vec, vlen, &pos); | ||
1309 | fput_light(file, fput_needed); | ||
1248 | return ret; | 1310 | return ret; |
1249 | } | 1311 | } |
1250 | 1312 | ||
@@ -1441,12 +1503,15 @@ int compat_do_execve(char * filename, | |||
1441 | bprm->cred = prepare_exec_creds(); | 1503 | bprm->cred = prepare_exec_creds(); |
1442 | if (!bprm->cred) | 1504 | if (!bprm->cred) |
1443 | goto out_unlock; | 1505 | goto out_unlock; |
1444 | check_unsafe_exec(bprm); | 1506 | |
1507 | retval = check_unsafe_exec(bprm); | ||
1508 | if (retval) | ||
1509 | goto out_unlock; | ||
1445 | 1510 | ||
1446 | file = open_exec(filename); | 1511 | file = open_exec(filename); |
1447 | retval = PTR_ERR(file); | 1512 | retval = PTR_ERR(file); |
1448 | if (IS_ERR(file)) | 1513 | if (IS_ERR(file)) |
1449 | goto out_unlock; | 1514 | goto out_unmark; |
1450 | 1515 | ||
1451 | sched_exec(); | 1516 | sched_exec(); |
1452 | 1517 | ||
@@ -1488,6 +1553,9 @@ int compat_do_execve(char * filename, | |||
1488 | goto out; | 1553 | goto out; |
1489 | 1554 | ||
1490 | /* execve succeeded */ | 1555 | /* execve succeeded */ |
1556 | write_lock(¤t->fs->lock); | ||
1557 | current->fs->in_exec = 0; | ||
1558 | write_unlock(¤t->fs->lock); | ||
1491 | current->in_execve = 0; | 1559 | current->in_execve = 0; |
1492 | mutex_unlock(¤t->cred_exec_mutex); | 1560 | mutex_unlock(¤t->cred_exec_mutex); |
1493 | acct_update_integrals(current); | 1561 | acct_update_integrals(current); |
@@ -1506,6 +1574,11 @@ out_file: | |||
1506 | fput(bprm->file); | 1574 | fput(bprm->file); |
1507 | } | 1575 | } |
1508 | 1576 | ||
1577 | out_unmark: | ||
1578 | write_lock(¤t->fs->lock); | ||
1579 | current->fs->in_exec = 0; | ||
1580 | write_unlock(¤t->fs->lock); | ||
1581 | |||
1509 | out_unlock: | 1582 | out_unlock: |
1510 | current->in_execve = 0; | 1583 | current->in_execve = 0; |
1511 | mutex_unlock(¤t->cred_exec_mutex); | 1584 | mutex_unlock(¤t->cred_exec_mutex); |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index ff786687e93b..3e87ce443ea2 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/if.h> | 23 | #include <linux/if.h> |
24 | #include <linux/if_bridge.h> | 24 | #include <linux/if_bridge.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/raid/md.h> | 26 | #include <linux/raid/md_u.h> |
27 | #include <linux/kd.h> | 27 | #include <linux/kd.h> |
28 | #include <linux/route.h> | 28 | #include <linux/route.h> |
29 | #include <linux/in6.h> | 29 | #include <linux/in6.h> |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a07338d2d140..dd3634e4c967 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -318,6 +318,7 @@ out: | |||
318 | static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 318 | static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
319 | { | 319 | { |
320 | struct super_block *sb = dentry->d_sb; | 320 | struct super_block *sb = dentry->d_sb; |
321 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
321 | 322 | ||
322 | buf->f_type = CRAMFS_MAGIC; | 323 | buf->f_type = CRAMFS_MAGIC; |
323 | buf->f_bsize = PAGE_CACHE_SIZE; | 324 | buf->f_bsize = PAGE_CACHE_SIZE; |
@@ -326,6 +327,8 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
326 | buf->f_bavail = 0; | 327 | buf->f_bavail = 0; |
327 | buf->f_files = CRAMFS_SB(sb)->files; | 328 | buf->f_files = CRAMFS_SB(sb)->files; |
328 | buf->f_ffree = 0; | 329 | buf->f_ffree = 0; |
330 | buf->f_fsid.val[0] = (u32)id; | ||
331 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
329 | buf->f_namelen = CRAMFS_MAXPATHLEN; | 332 | buf->f_namelen = CRAMFS_MAXPATHLEN; |
330 | return 0; | 333 | return 0; |
331 | } | 334 | } |
@@ -459,11 +462,14 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s | |||
459 | static int cramfs_readpage(struct file *file, struct page * page) | 462 | static int cramfs_readpage(struct file *file, struct page * page) |
460 | { | 463 | { |
461 | struct inode *inode = page->mapping->host; | 464 | struct inode *inode = page->mapping->host; |
462 | u32 maxblock, bytes_filled; | 465 | u32 maxblock; |
466 | int bytes_filled; | ||
463 | void *pgdata; | 467 | void *pgdata; |
464 | 468 | ||
465 | maxblock = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 469 | maxblock = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
466 | bytes_filled = 0; | 470 | bytes_filled = 0; |
471 | pgdata = kmap(page); | ||
472 | |||
467 | if (page->index < maxblock) { | 473 | if (page->index < maxblock) { |
468 | struct super_block *sb = inode->i_sb; | 474 | struct super_block *sb = inode->i_sb; |
469 | u32 blkptr_offset = OFFSET(inode) + page->index*4; | 475 | u32 blkptr_offset = OFFSET(inode) + page->index*4; |
@@ -472,30 +478,43 @@ static int cramfs_readpage(struct file *file, struct page * page) | |||
472 | start_offset = OFFSET(inode) + maxblock*4; | 478 | start_offset = OFFSET(inode) + maxblock*4; |
473 | mutex_lock(&read_mutex); | 479 | mutex_lock(&read_mutex); |
474 | if (page->index) | 480 | if (page->index) |
475 | start_offset = *(u32 *) cramfs_read(sb, blkptr_offset-4, 4); | 481 | start_offset = *(u32 *) cramfs_read(sb, blkptr_offset-4, |
476 | compr_len = (*(u32 *) cramfs_read(sb, blkptr_offset, 4) - start_offset); | 482 | 4); |
483 | compr_len = (*(u32 *) cramfs_read(sb, blkptr_offset, 4) - | ||
484 | start_offset); | ||
477 | mutex_unlock(&read_mutex); | 485 | mutex_unlock(&read_mutex); |
478 | pgdata = kmap(page); | 486 | |
479 | if (compr_len == 0) | 487 | if (compr_len == 0) |
480 | ; /* hole */ | 488 | ; /* hole */ |
481 | else if (compr_len > (PAGE_CACHE_SIZE << 1)) | 489 | else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) { |
482 | printk(KERN_ERR "cramfs: bad compressed blocksize %u\n", compr_len); | 490 | pr_err("cramfs: bad compressed blocksize %u\n", |
483 | else { | 491 | compr_len); |
492 | goto err; | ||
493 | } else { | ||
484 | mutex_lock(&read_mutex); | 494 | mutex_lock(&read_mutex); |
485 | bytes_filled = cramfs_uncompress_block(pgdata, | 495 | bytes_filled = cramfs_uncompress_block(pgdata, |
486 | PAGE_CACHE_SIZE, | 496 | PAGE_CACHE_SIZE, |
487 | cramfs_read(sb, start_offset, compr_len), | 497 | cramfs_read(sb, start_offset, compr_len), |
488 | compr_len); | 498 | compr_len); |
489 | mutex_unlock(&read_mutex); | 499 | mutex_unlock(&read_mutex); |
500 | if (unlikely(bytes_filled < 0)) | ||
501 | goto err; | ||
490 | } | 502 | } |
491 | } else | 503 | } |
492 | pgdata = kmap(page); | 504 | |
493 | memset(pgdata + bytes_filled, 0, PAGE_CACHE_SIZE - bytes_filled); | 505 | memset(pgdata + bytes_filled, 0, PAGE_CACHE_SIZE - bytes_filled); |
494 | kunmap(page); | ||
495 | flush_dcache_page(page); | 506 | flush_dcache_page(page); |
507 | kunmap(page); | ||
496 | SetPageUptodate(page); | 508 | SetPageUptodate(page); |
497 | unlock_page(page); | 509 | unlock_page(page); |
498 | return 0; | 510 | return 0; |
511 | |||
512 | err: | ||
513 | kunmap(page); | ||
514 | ClearPageUptodate(page); | ||
515 | SetPageError(page); | ||
516 | unlock_page(page); | ||
517 | return 0; | ||
499 | } | 518 | } |
500 | 519 | ||
501 | static const struct address_space_operations cramfs_aops = { | 520 | static const struct address_space_operations cramfs_aops = { |
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c index fc3ccb74626f..023329800d2e 100644 --- a/fs/cramfs/uncompress.c +++ b/fs/cramfs/uncompress.c | |||
@@ -50,7 +50,7 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen) | |||
50 | err: | 50 | err: |
51 | printk("Error %d while decompressing!\n", err); | 51 | printk("Error %d while decompressing!\n", err); |
52 | printk("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen); | 52 | printk("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen); |
53 | return 0; | 53 | return -EIO; |
54 | } | 54 | } |
55 | 55 | ||
56 | int cramfs_uncompress_init(void) | 56 | int cramfs_uncompress_init(void) |
diff --git a/fs/dcache.c b/fs/dcache.c index 90bbd7e1b116..761d30be2683 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/syscalls.h> | 17 | #include <linux/syscalls.h> |
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/fdtable.h> | ||
21 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
22 | #include <linux/fsnotify.h> | 21 | #include <linux/fsnotify.h> |
23 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
@@ -32,6 +31,7 @@ | |||
32 | #include <linux/seqlock.h> | 31 | #include <linux/seqlock.h> |
33 | #include <linux/swap.h> | 32 | #include <linux/swap.h> |
34 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
34 | #include <linux/fs_struct.h> | ||
35 | #include "internal.h" | 35 | #include "internal.h" |
36 | 36 | ||
37 | int sysctl_vfs_cache_pressure __read_mostly = 100; | 37 | int sysctl_vfs_cache_pressure __read_mostly = 100; |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 44d725f612cf..b6a719a909f8 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb) | |||
18 | 18 | ||
19 | spin_lock(&inode_lock); | 19 | spin_lock(&inode_lock); |
20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
21 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 21 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) |
22 | continue; | 22 | continue; |
23 | if (inode->i_mapping->nrpages == 0) | 23 | if (inode->i_mapping->nrpages == 0) |
24 | continue; | 24 | continue; |
diff --git a/fs/efs/super.c b/fs/efs/super.c index 73b19cfc91fc..f04942810818 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -329,18 +329,22 @@ out_no_fs: | |||
329 | } | 329 | } |
330 | 330 | ||
331 | static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { | 331 | static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { |
332 | struct efs_sb_info *sb = SUPER_INFO(dentry->d_sb); | 332 | struct super_block *sb = dentry->d_sb; |
333 | struct efs_sb_info *sbi = SUPER_INFO(sb); | ||
334 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
333 | 335 | ||
334 | buf->f_type = EFS_SUPER_MAGIC; /* efs magic number */ | 336 | buf->f_type = EFS_SUPER_MAGIC; /* efs magic number */ |
335 | buf->f_bsize = EFS_BLOCKSIZE; /* blocksize */ | 337 | buf->f_bsize = EFS_BLOCKSIZE; /* blocksize */ |
336 | buf->f_blocks = sb->total_groups * /* total data blocks */ | 338 | buf->f_blocks = sbi->total_groups * /* total data blocks */ |
337 | (sb->group_size - sb->inode_blocks); | 339 | (sbi->group_size - sbi->inode_blocks); |
338 | buf->f_bfree = sb->data_free; /* free data blocks */ | 340 | buf->f_bfree = sbi->data_free; /* free data blocks */ |
339 | buf->f_bavail = sb->data_free; /* free blocks for non-root */ | 341 | buf->f_bavail = sbi->data_free; /* free blocks for non-root */ |
340 | buf->f_files = sb->total_groups * /* total inodes */ | 342 | buf->f_files = sbi->total_groups * /* total inodes */ |
341 | sb->inode_blocks * | 343 | sbi->inode_blocks * |
342 | (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); | 344 | (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); |
343 | buf->f_ffree = sb->inode_free; /* free inodes */ | 345 | buf->f_ffree = sbi->inode_free; /* free inodes */ |
346 | buf->f_fsid.val[0] = (u32)id; | ||
347 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
344 | buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ | 348 | buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ |
345 | 349 | ||
346 | return 0; | 350 | return 0; |
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/tracehook.h> | 53 | #include <linux/tracehook.h> |
54 | #include <linux/kmod.h> | 54 | #include <linux/kmod.h> |
55 | #include <linux/fsnotify.h> | 55 | #include <linux/fsnotify.h> |
56 | #include <linux/fs_struct.h> | ||
56 | 57 | ||
57 | #include <asm/uaccess.h> | 58 | #include <asm/uaccess.h> |
58 | #include <asm/mmu_context.h> | 59 | #include <asm/mmu_context.h> |
@@ -1056,28 +1057,35 @@ EXPORT_SYMBOL(install_exec_creds); | |||
1056 | * - the caller must hold current->cred_exec_mutex to protect against | 1057 | * - the caller must hold current->cred_exec_mutex to protect against |
1057 | * PTRACE_ATTACH | 1058 | * PTRACE_ATTACH |
1058 | */ | 1059 | */ |
1059 | void check_unsafe_exec(struct linux_binprm *bprm) | 1060 | int check_unsafe_exec(struct linux_binprm *bprm) |
1060 | { | 1061 | { |
1061 | struct task_struct *p = current, *t; | 1062 | struct task_struct *p = current, *t; |
1062 | unsigned long flags; | 1063 | unsigned long flags; |
1063 | unsigned n_fs, n_sighand; | 1064 | unsigned n_fs; |
1065 | int res = 0; | ||
1064 | 1066 | ||
1065 | bprm->unsafe = tracehook_unsafe_exec(p); | 1067 | bprm->unsafe = tracehook_unsafe_exec(p); |
1066 | 1068 | ||
1067 | n_fs = 1; | 1069 | n_fs = 1; |
1068 | n_sighand = 1; | 1070 | write_lock(&p->fs->lock); |
1069 | lock_task_sighand(p, &flags); | 1071 | lock_task_sighand(p, &flags); |
1070 | for (t = next_thread(p); t != p; t = next_thread(t)) { | 1072 | for (t = next_thread(p); t != p; t = next_thread(t)) { |
1071 | if (t->fs == p->fs) | 1073 | if (t->fs == p->fs) |
1072 | n_fs++; | 1074 | n_fs++; |
1073 | n_sighand++; | ||
1074 | } | 1075 | } |
1075 | 1076 | ||
1076 | if (atomic_read(&p->fs->count) > n_fs || | 1077 | if (p->fs->users > n_fs) { |
1077 | atomic_read(&p->sighand->count) > n_sighand) | ||
1078 | bprm->unsafe |= LSM_UNSAFE_SHARE; | 1078 | bprm->unsafe |= LSM_UNSAFE_SHARE; |
1079 | } else { | ||
1080 | if (p->fs->in_exec) | ||
1081 | res = -EAGAIN; | ||
1082 | p->fs->in_exec = 1; | ||
1083 | } | ||
1079 | 1084 | ||
1080 | unlock_task_sighand(p, &flags); | 1085 | unlock_task_sighand(p, &flags); |
1086 | write_unlock(&p->fs->lock); | ||
1087 | |||
1088 | return res; | ||
1081 | } | 1089 | } |
1082 | 1090 | ||
1083 | /* | 1091 | /* |
@@ -1296,12 +1304,15 @@ int do_execve(char * filename, | |||
1296 | bprm->cred = prepare_exec_creds(); | 1304 | bprm->cred = prepare_exec_creds(); |
1297 | if (!bprm->cred) | 1305 | if (!bprm->cred) |
1298 | goto out_unlock; | 1306 | goto out_unlock; |
1299 | check_unsafe_exec(bprm); | 1307 | |
1308 | retval = check_unsafe_exec(bprm); | ||
1309 | if (retval) | ||
1310 | goto out_unlock; | ||
1300 | 1311 | ||
1301 | file = open_exec(filename); | 1312 | file = open_exec(filename); |
1302 | retval = PTR_ERR(file); | 1313 | retval = PTR_ERR(file); |
1303 | if (IS_ERR(file)) | 1314 | if (IS_ERR(file)) |
1304 | goto out_unlock; | 1315 | goto out_unmark; |
1305 | 1316 | ||
1306 | sched_exec(); | 1317 | sched_exec(); |
1307 | 1318 | ||
@@ -1344,6 +1355,9 @@ int do_execve(char * filename, | |||
1344 | goto out; | 1355 | goto out; |
1345 | 1356 | ||
1346 | /* execve succeeded */ | 1357 | /* execve succeeded */ |
1358 | write_lock(¤t->fs->lock); | ||
1359 | current->fs->in_exec = 0; | ||
1360 | write_unlock(¤t->fs->lock); | ||
1347 | current->in_execve = 0; | 1361 | current->in_execve = 0; |
1348 | mutex_unlock(¤t->cred_exec_mutex); | 1362 | mutex_unlock(¤t->cred_exec_mutex); |
1349 | acct_update_integrals(current); | 1363 | acct_update_integrals(current); |
@@ -1362,6 +1376,11 @@ out_file: | |||
1362 | fput(bprm->file); | 1376 | fput(bprm->file); |
1363 | } | 1377 | } |
1364 | 1378 | ||
1379 | out_unmark: | ||
1380 | write_lock(¤t->fs->lock); | ||
1381 | current->fs->in_exec = 0; | ||
1382 | write_unlock(¤t->fs->lock); | ||
1383 | |||
1365 | out_unlock: | 1384 | out_unlock: |
1366 | current->in_execve = 0; | 1385 | current->in_execve = 0; |
1367 | mutex_unlock(¤t->cred_exec_mutex); | 1386 | mutex_unlock(¤t->cred_exec_mutex); |
diff --git a/fs/exofs/BUGS b/fs/exofs/BUGS new file mode 100644 index 000000000000..1b2d4c63a579 --- /dev/null +++ b/fs/exofs/BUGS | |||
@@ -0,0 +1,3 @@ | |||
1 | - Out-of-space may cause a severe problem if the object (and directory entry) | ||
2 | were written, but the inode attributes failed. Then if the filesystem was | ||
3 | unmounted and mounted the kernel can get into an endless loop doing a readdir. | ||
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild new file mode 100644 index 000000000000..cc2d22db119c --- /dev/null +++ b/fs/exofs/Kbuild | |||
@@ -0,0 +1,16 @@ | |||
1 | # | ||
2 | # Kbuild for the EXOFS module | ||
3 | # | ||
4 | # Copyright (C) 2008 Panasas Inc. All rights reserved. | ||
5 | # | ||
6 | # Authors: | ||
7 | # Boaz Harrosh <bharrosh@panasas.com> | ||
8 | # | ||
9 | # This program is free software; you can redistribute it and/or modify | ||
10 | # it under the terms of the GNU General Public License version 2 | ||
11 | # | ||
12 | # Kbuild - Gets included from the Kernels Makefile and build system | ||
13 | # | ||
14 | |||
15 | exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o | ||
16 | obj-$(CONFIG_EXOFS_FS) += exofs.o | ||
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig new file mode 100644 index 000000000000..86194b2f799d --- /dev/null +++ b/fs/exofs/Kconfig | |||
@@ -0,0 +1,13 @@ | |||
1 | config EXOFS_FS | ||
2 | tristate "exofs: OSD based file system support" | ||
3 | depends on SCSI_OSD_ULD | ||
4 | help | ||
5 | EXOFS is a file system that uses an OSD storage device, | ||
6 | as its backing storage. | ||
7 | |||
8 | # Debugging-related stuff | ||
9 | config EXOFS_DEBUG | ||
10 | bool "Enable debugging" | ||
11 | depends on EXOFS_FS | ||
12 | help | ||
13 | This option enables EXOFS debug prints. | ||
diff --git a/fs/exofs/common.h b/fs/exofs/common.h new file mode 100644 index 000000000000..b1512c4bb8c7 --- /dev/null +++ b/fs/exofs/common.h | |||
@@ -0,0 +1,184 @@ | |||
1 | /* | ||
2 | * common.h - Common definitions for both Kernel and user-mode utilities | ||
3 | * | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
6 | * Copyright (C) 2005, 2006 | ||
7 | * International Business Machines | ||
8 | * Copyright (C) 2008, 2009 | ||
9 | * Boaz Harrosh <bharrosh@panasas.com> | ||
10 | * | ||
11 | * Copyrights for code taken from ext2: | ||
12 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
13 | * Remy Card (card@masi.ibp.fr) | ||
14 | * Laboratoire MASI - Institut Blaise Pascal | ||
15 | * Universite Pierre et Marie Curie (Paris VI) | ||
16 | * from | ||
17 | * linux/fs/minix/inode.c | ||
18 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
19 | * | ||
20 | * This file is part of exofs. | ||
21 | * | ||
22 | * exofs is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
25 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
26 | * version of GPL for exofs is version 2. | ||
27 | * | ||
28 | * exofs is distributed in the hope that it will be useful, | ||
29 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
30 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
31 | * GNU General Public License for more details. | ||
32 | * | ||
33 | * You should have received a copy of the GNU General Public License | ||
34 | * along with exofs; if not, write to the Free Software | ||
35 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
36 | */ | ||
37 | |||
38 | #ifndef __EXOFS_COM_H__ | ||
39 | #define __EXOFS_COM_H__ | ||
40 | |||
41 | #include <linux/types.h> | ||
42 | |||
43 | #include <scsi/osd_attributes.h> | ||
44 | #include <scsi/osd_initiator.h> | ||
45 | #include <scsi/osd_sec.h> | ||
46 | |||
47 | /**************************************************************************** | ||
48 | * Object ID related defines | ||
49 | * NOTE: inode# = object ID - EXOFS_OBJ_OFF | ||
50 | ****************************************************************************/ | ||
51 | #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ | ||
52 | #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ | ||
53 | #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ | ||
54 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ | ||
55 | |||
56 | /* exofs Application specific page/attribute */ | ||
57 | # define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) | ||
58 | # define EXOFS_ATTR_INODE_DATA 1 | ||
59 | |||
60 | /* | ||
61 | * The maximum number of files we can have is limited by the size of the | ||
62 | * inode number. This is the largest object ID that the file system supports. | ||
63 | * Object IDs 0, 1, and 2 are always in use (see above defines). | ||
64 | */ | ||
65 | enum { | ||
66 | EXOFS_MAX_INO_ID = (sizeof(ino_t) * 8 == 64) ? ULLONG_MAX : | ||
67 | (1ULL << (sizeof(ino_t) * 8ULL - 1ULL)), | ||
68 | EXOFS_MAX_ID = (EXOFS_MAX_INO_ID - 1 - EXOFS_OBJ_OFF), | ||
69 | }; | ||
70 | |||
71 | /**************************************************************************** | ||
72 | * Misc. | ||
73 | ****************************************************************************/ | ||
74 | #define EXOFS_BLKSHIFT 12 | ||
75 | #define EXOFS_BLKSIZE (1UL << EXOFS_BLKSHIFT) | ||
76 | |||
77 | /**************************************************************************** | ||
78 | * superblock-related things | ||
79 | ****************************************************************************/ | ||
80 | #define EXOFS_SUPER_MAGIC 0x5DF5 | ||
81 | |||
82 | /* | ||
83 | * The file system control block - stored in an object's data (mainly, the one | ||
84 | * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored | ||
85 | * on disk. Right now it just has a magic value, which is basically a sanity | ||
86 | * check on our ability to communicate with the object store. | ||
87 | */ | ||
88 | struct exofs_fscb { | ||
89 | __le64 s_nextid; /* Highest object ID used */ | ||
90 | __le32 s_numfiles; /* Number of files on fs */ | ||
91 | __le16 s_magic; /* Magic signature */ | ||
92 | __le16 s_newfs; /* Non-zero if this is a new fs */ | ||
93 | }; | ||
94 | |||
95 | /**************************************************************************** | ||
96 | * inode-related things | ||
97 | ****************************************************************************/ | ||
98 | #define EXOFS_IDATA 5 | ||
99 | |||
100 | /* | ||
101 | * The file control block - stored in an object's attributes. This is where | ||
102 | * the in-memory inode is stored on disk. | ||
103 | */ | ||
104 | struct exofs_fcb { | ||
105 | __le64 i_size; /* Size of the file */ | ||
106 | __le16 i_mode; /* File mode */ | ||
107 | __le16 i_links_count; /* Links count */ | ||
108 | __le32 i_uid; /* Owner Uid */ | ||
109 | __le32 i_gid; /* Group Id */ | ||
110 | __le32 i_atime; /* Access time */ | ||
111 | __le32 i_ctime; /* Creation time */ | ||
112 | __le32 i_mtime; /* Modification time */ | ||
113 | __le32 i_flags; /* File flags (unused for now)*/ | ||
114 | __le32 i_generation; /* File version (for NFS) */ | ||
115 | __le32 i_data[EXOFS_IDATA]; /* Short symlink names and device #s */ | ||
116 | }; | ||
117 | |||
118 | #define EXOFS_INO_ATTR_SIZE sizeof(struct exofs_fcb) | ||
119 | |||
120 | /* This is the Attribute the fcb is stored in */ | ||
121 | static const struct __weak osd_attr g_attr_inode_data = ATTR_DEF( | ||
122 | EXOFS_APAGE_FS_DATA, | ||
123 | EXOFS_ATTR_INODE_DATA, | ||
124 | EXOFS_INO_ATTR_SIZE); | ||
125 | |||
126 | /**************************************************************************** | ||
127 | * dentry-related things | ||
128 | ****************************************************************************/ | ||
129 | #define EXOFS_NAME_LEN 255 | ||
130 | |||
131 | /* | ||
132 | * The on-disk directory entry | ||
133 | */ | ||
134 | struct exofs_dir_entry { | ||
135 | __le64 inode_no; /* inode number */ | ||
136 | __le16 rec_len; /* directory entry length */ | ||
137 | u8 name_len; /* name length */ | ||
138 | u8 file_type; /* umm...file type */ | ||
139 | char name[EXOFS_NAME_LEN]; /* file name */ | ||
140 | }; | ||
141 | |||
142 | enum { | ||
143 | EXOFS_FT_UNKNOWN, | ||
144 | EXOFS_FT_REG_FILE, | ||
145 | EXOFS_FT_DIR, | ||
146 | EXOFS_FT_CHRDEV, | ||
147 | EXOFS_FT_BLKDEV, | ||
148 | EXOFS_FT_FIFO, | ||
149 | EXOFS_FT_SOCK, | ||
150 | EXOFS_FT_SYMLINK, | ||
151 | EXOFS_FT_MAX | ||
152 | }; | ||
153 | |||
154 | #define EXOFS_DIR_PAD 4 | ||
155 | #define EXOFS_DIR_ROUND (EXOFS_DIR_PAD - 1) | ||
156 | #define EXOFS_DIR_REC_LEN(name_len) \ | ||
157 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ | ||
158 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) | ||
159 | |||
160 | /************************* | ||
161 | * function declarations * | ||
162 | *************************/ | ||
163 | /* osd.c */ | ||
164 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
165 | const struct osd_obj_id *obj); | ||
166 | |||
167 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid); | ||
168 | static inline int exofs_check_ok(struct osd_request *or) | ||
169 | { | ||
170 | return exofs_check_ok_resid(or, NULL, NULL); | ||
171 | } | ||
172 | int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred); | ||
173 | int exofs_async_op(struct osd_request *or, | ||
174 | osd_req_done_fn *async_done, void *caller_context, u8 *cred); | ||
175 | |||
176 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); | ||
177 | |||
178 | int osd_req_read_kern(struct osd_request *or, | ||
179 | const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); | ||
180 | |||
181 | int osd_req_write_kern(struct osd_request *or, | ||
182 | const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); | ||
183 | |||
184 | #endif /*ifndef __EXOFS_COM_H__*/ | ||
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c new file mode 100644 index 000000000000..65b0c8c776a1 --- /dev/null +++ b/fs/exofs/dir.c | |||
@@ -0,0 +1,672 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include "exofs.h" | ||
37 | |||
38 | static inline unsigned exofs_chunk_size(struct inode *inode) | ||
39 | { | ||
40 | return inode->i_sb->s_blocksize; | ||
41 | } | ||
42 | |||
43 | static inline void exofs_put_page(struct page *page) | ||
44 | { | ||
45 | kunmap(page); | ||
46 | page_cache_release(page); | ||
47 | } | ||
48 | |||
49 | /* Accesses dir's inode->i_size must be called under inode lock */ | ||
50 | static inline unsigned long dir_pages(struct inode *inode) | ||
51 | { | ||
52 | return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
53 | } | ||
54 | |||
55 | static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr) | ||
56 | { | ||
57 | loff_t last_byte = inode->i_size; | ||
58 | |||
59 | last_byte -= page_nr << PAGE_CACHE_SHIFT; | ||
60 | if (last_byte > PAGE_CACHE_SIZE) | ||
61 | last_byte = PAGE_CACHE_SIZE; | ||
62 | return last_byte; | ||
63 | } | ||
64 | |||
65 | static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len) | ||
66 | { | ||
67 | struct address_space *mapping = page->mapping; | ||
68 | struct inode *dir = mapping->host; | ||
69 | int err = 0; | ||
70 | |||
71 | dir->i_version++; | ||
72 | |||
73 | if (!PageUptodate(page)) | ||
74 | SetPageUptodate(page); | ||
75 | |||
76 | if (pos+len > dir->i_size) { | ||
77 | i_size_write(dir, pos+len); | ||
78 | mark_inode_dirty(dir); | ||
79 | } | ||
80 | set_page_dirty(page); | ||
81 | |||
82 | if (IS_DIRSYNC(dir)) | ||
83 | err = write_one_page(page, 1); | ||
84 | else | ||
85 | unlock_page(page); | ||
86 | |||
87 | return err; | ||
88 | } | ||
89 | |||
90 | static void exofs_check_page(struct page *page) | ||
91 | { | ||
92 | struct inode *dir = page->mapping->host; | ||
93 | unsigned chunk_size = exofs_chunk_size(dir); | ||
94 | char *kaddr = page_address(page); | ||
95 | unsigned offs, rec_len; | ||
96 | unsigned limit = PAGE_CACHE_SIZE; | ||
97 | struct exofs_dir_entry *p; | ||
98 | char *error; | ||
99 | |||
100 | /* if the page is the last one in the directory */ | ||
101 | if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { | ||
102 | limit = dir->i_size & ~PAGE_CACHE_MASK; | ||
103 | if (limit & (chunk_size - 1)) | ||
104 | goto Ebadsize; | ||
105 | if (!limit) | ||
106 | goto out; | ||
107 | } | ||
108 | for (offs = 0; offs <= limit - EXOFS_DIR_REC_LEN(1); offs += rec_len) { | ||
109 | p = (struct exofs_dir_entry *)(kaddr + offs); | ||
110 | rec_len = le16_to_cpu(p->rec_len); | ||
111 | |||
112 | if (rec_len < EXOFS_DIR_REC_LEN(1)) | ||
113 | goto Eshort; | ||
114 | if (rec_len & 3) | ||
115 | goto Ealign; | ||
116 | if (rec_len < EXOFS_DIR_REC_LEN(p->name_len)) | ||
117 | goto Enamelen; | ||
118 | if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) | ||
119 | goto Espan; | ||
120 | } | ||
121 | if (offs != limit) | ||
122 | goto Eend; | ||
123 | out: | ||
124 | SetPageChecked(page); | ||
125 | return; | ||
126 | |||
127 | Ebadsize: | ||
128 | EXOFS_ERR("ERROR [exofs_check_page]: " | ||
129 | "size of directory #%lu is not a multiple of chunk size", | ||
130 | dir->i_ino | ||
131 | ); | ||
132 | goto fail; | ||
133 | Eshort: | ||
134 | error = "rec_len is smaller than minimal"; | ||
135 | goto bad_entry; | ||
136 | Ealign: | ||
137 | error = "unaligned directory entry"; | ||
138 | goto bad_entry; | ||
139 | Enamelen: | ||
140 | error = "rec_len is too small for name_len"; | ||
141 | goto bad_entry; | ||
142 | Espan: | ||
143 | error = "directory entry across blocks"; | ||
144 | goto bad_entry; | ||
145 | bad_entry: | ||
146 | EXOFS_ERR( | ||
147 | "ERROR [exofs_check_page]: bad entry in directory #%lu: %s - " | ||
148 | "offset=%lu, inode=%llu, rec_len=%d, name_len=%d", | ||
149 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
150 | _LLU(le64_to_cpu(p->inode_no)), | ||
151 | rec_len, p->name_len); | ||
152 | goto fail; | ||
153 | Eend: | ||
154 | p = (struct exofs_dir_entry *)(kaddr + offs); | ||
155 | EXOFS_ERR("ERROR [exofs_check_page]: " | ||
156 | "entry in directory #%lu spans the page boundary" | ||
157 | "offset=%lu, inode=%llu", | ||
158 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
159 | _LLU(le64_to_cpu(p->inode_no))); | ||
160 | fail: | ||
161 | SetPageChecked(page); | ||
162 | SetPageError(page); | ||
163 | } | ||
164 | |||
165 | static struct page *exofs_get_page(struct inode *dir, unsigned long n) | ||
166 | { | ||
167 | struct address_space *mapping = dir->i_mapping; | ||
168 | struct page *page = read_mapping_page(mapping, n, NULL); | ||
169 | |||
170 | if (!IS_ERR(page)) { | ||
171 | kmap(page); | ||
172 | if (!PageChecked(page)) | ||
173 | exofs_check_page(page); | ||
174 | if (PageError(page)) | ||
175 | goto fail; | ||
176 | } | ||
177 | return page; | ||
178 | |||
179 | fail: | ||
180 | exofs_put_page(page); | ||
181 | return ERR_PTR(-EIO); | ||
182 | } | ||
183 | |||
184 | static inline int exofs_match(int len, const unsigned char *name, | ||
185 | struct exofs_dir_entry *de) | ||
186 | { | ||
187 | if (len != de->name_len) | ||
188 | return 0; | ||
189 | if (!de->inode_no) | ||
190 | return 0; | ||
191 | return !memcmp(name, de->name, len); | ||
192 | } | ||
193 | |||
194 | static inline | ||
195 | struct exofs_dir_entry *exofs_next_entry(struct exofs_dir_entry *p) | ||
196 | { | ||
197 | return (struct exofs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); | ||
198 | } | ||
199 | |||
200 | static inline unsigned | ||
201 | exofs_validate_entry(char *base, unsigned offset, unsigned mask) | ||
202 | { | ||
203 | struct exofs_dir_entry *de = (struct exofs_dir_entry *)(base + offset); | ||
204 | struct exofs_dir_entry *p = | ||
205 | (struct exofs_dir_entry *)(base + (offset&mask)); | ||
206 | while ((char *)p < (char *)de) { | ||
207 | if (p->rec_len == 0) | ||
208 | break; | ||
209 | p = exofs_next_entry(p); | ||
210 | } | ||
211 | return (char *)p - base; | ||
212 | } | ||
213 | |||
214 | static unsigned char exofs_filetype_table[EXOFS_FT_MAX] = { | ||
215 | [EXOFS_FT_UNKNOWN] = DT_UNKNOWN, | ||
216 | [EXOFS_FT_REG_FILE] = DT_REG, | ||
217 | [EXOFS_FT_DIR] = DT_DIR, | ||
218 | [EXOFS_FT_CHRDEV] = DT_CHR, | ||
219 | [EXOFS_FT_BLKDEV] = DT_BLK, | ||
220 | [EXOFS_FT_FIFO] = DT_FIFO, | ||
221 | [EXOFS_FT_SOCK] = DT_SOCK, | ||
222 | [EXOFS_FT_SYMLINK] = DT_LNK, | ||
223 | }; | ||
224 | |||
225 | #define S_SHIFT 12 | ||
226 | static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = { | ||
227 | [S_IFREG >> S_SHIFT] = EXOFS_FT_REG_FILE, | ||
228 | [S_IFDIR >> S_SHIFT] = EXOFS_FT_DIR, | ||
229 | [S_IFCHR >> S_SHIFT] = EXOFS_FT_CHRDEV, | ||
230 | [S_IFBLK >> S_SHIFT] = EXOFS_FT_BLKDEV, | ||
231 | [S_IFIFO >> S_SHIFT] = EXOFS_FT_FIFO, | ||
232 | [S_IFSOCK >> S_SHIFT] = EXOFS_FT_SOCK, | ||
233 | [S_IFLNK >> S_SHIFT] = EXOFS_FT_SYMLINK, | ||
234 | }; | ||
235 | |||
236 | static inline | ||
237 | void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode) | ||
238 | { | ||
239 | mode_t mode = inode->i_mode; | ||
240 | de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; | ||
241 | } | ||
242 | |||
243 | static int | ||
244 | exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
245 | { | ||
246 | loff_t pos = filp->f_pos; | ||
247 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
248 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | ||
249 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
250 | unsigned long npages = dir_pages(inode); | ||
251 | unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); | ||
252 | unsigned char *types = NULL; | ||
253 | int need_revalidate = (filp->f_version != inode->i_version); | ||
254 | |||
255 | if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) | ||
256 | return 0; | ||
257 | |||
258 | types = exofs_filetype_table; | ||
259 | |||
260 | for ( ; n < npages; n++, offset = 0) { | ||
261 | char *kaddr, *limit; | ||
262 | struct exofs_dir_entry *de; | ||
263 | struct page *page = exofs_get_page(inode, n); | ||
264 | |||
265 | if (IS_ERR(page)) { | ||
266 | EXOFS_ERR("ERROR: " | ||
267 | "bad page in #%lu", | ||
268 | inode->i_ino); | ||
269 | filp->f_pos += PAGE_CACHE_SIZE - offset; | ||
270 | return PTR_ERR(page); | ||
271 | } | ||
272 | kaddr = page_address(page); | ||
273 | if (unlikely(need_revalidate)) { | ||
274 | if (offset) { | ||
275 | offset = exofs_validate_entry(kaddr, offset, | ||
276 | chunk_mask); | ||
277 | filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; | ||
278 | } | ||
279 | filp->f_version = inode->i_version; | ||
280 | need_revalidate = 0; | ||
281 | } | ||
282 | de = (struct exofs_dir_entry *)(kaddr + offset); | ||
283 | limit = kaddr + exofs_last_byte(inode, n) - | ||
284 | EXOFS_DIR_REC_LEN(1); | ||
285 | for (; (char *)de <= limit; de = exofs_next_entry(de)) { | ||
286 | if (de->rec_len == 0) { | ||
287 | EXOFS_ERR("ERROR: " | ||
288 | "zero-length directory entry"); | ||
289 | exofs_put_page(page); | ||
290 | return -EIO; | ||
291 | } | ||
292 | if (de->inode_no) { | ||
293 | int over; | ||
294 | unsigned char d_type = DT_UNKNOWN; | ||
295 | |||
296 | if (types && de->file_type < EXOFS_FT_MAX) | ||
297 | d_type = types[de->file_type]; | ||
298 | |||
299 | offset = (char *)de - kaddr; | ||
300 | over = filldir(dirent, de->name, de->name_len, | ||
301 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
302 | le64_to_cpu(de->inode_no), | ||
303 | d_type); | ||
304 | if (over) { | ||
305 | exofs_put_page(page); | ||
306 | return 0; | ||
307 | } | ||
308 | } | ||
309 | filp->f_pos += le16_to_cpu(de->rec_len); | ||
310 | } | ||
311 | exofs_put_page(page); | ||
312 | } | ||
313 | |||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | struct exofs_dir_entry *exofs_find_entry(struct inode *dir, | ||
318 | struct dentry *dentry, struct page **res_page) | ||
319 | { | ||
320 | const unsigned char *name = dentry->d_name.name; | ||
321 | int namelen = dentry->d_name.len; | ||
322 | unsigned reclen = EXOFS_DIR_REC_LEN(namelen); | ||
323 | unsigned long start, n; | ||
324 | unsigned long npages = dir_pages(dir); | ||
325 | struct page *page = NULL; | ||
326 | struct exofs_i_info *oi = exofs_i(dir); | ||
327 | struct exofs_dir_entry *de; | ||
328 | |||
329 | if (npages == 0) | ||
330 | goto out; | ||
331 | |||
332 | *res_page = NULL; | ||
333 | |||
334 | start = oi->i_dir_start_lookup; | ||
335 | if (start >= npages) | ||
336 | start = 0; | ||
337 | n = start; | ||
338 | do { | ||
339 | char *kaddr; | ||
340 | page = exofs_get_page(dir, n); | ||
341 | if (!IS_ERR(page)) { | ||
342 | kaddr = page_address(page); | ||
343 | de = (struct exofs_dir_entry *) kaddr; | ||
344 | kaddr += exofs_last_byte(dir, n) - reclen; | ||
345 | while ((char *) de <= kaddr) { | ||
346 | if (de->rec_len == 0) { | ||
347 | EXOFS_ERR( | ||
348 | "ERROR: exofs_find_entry: " | ||
349 | "zero-length directory entry"); | ||
350 | exofs_put_page(page); | ||
351 | goto out; | ||
352 | } | ||
353 | if (exofs_match(namelen, name, de)) | ||
354 | goto found; | ||
355 | de = exofs_next_entry(de); | ||
356 | } | ||
357 | exofs_put_page(page); | ||
358 | } | ||
359 | if (++n >= npages) | ||
360 | n = 0; | ||
361 | } while (n != start); | ||
362 | out: | ||
363 | return NULL; | ||
364 | |||
365 | found: | ||
366 | *res_page = page; | ||
367 | oi->i_dir_start_lookup = n; | ||
368 | return de; | ||
369 | } | ||
370 | |||
371 | struct exofs_dir_entry *exofs_dotdot(struct inode *dir, struct page **p) | ||
372 | { | ||
373 | struct page *page = exofs_get_page(dir, 0); | ||
374 | struct exofs_dir_entry *de = NULL; | ||
375 | |||
376 | if (!IS_ERR(page)) { | ||
377 | de = exofs_next_entry( | ||
378 | (struct exofs_dir_entry *)page_address(page)); | ||
379 | *p = page; | ||
380 | } | ||
381 | return de; | ||
382 | } | ||
383 | |||
384 | ino_t exofs_parent_ino(struct dentry *child) | ||
385 | { | ||
386 | struct page *page; | ||
387 | struct exofs_dir_entry *de; | ||
388 | ino_t ino; | ||
389 | |||
390 | de = exofs_dotdot(child->d_inode, &page); | ||
391 | if (!de) | ||
392 | return 0; | ||
393 | |||
394 | ino = le64_to_cpu(de->inode_no); | ||
395 | exofs_put_page(page); | ||
396 | return ino; | ||
397 | } | ||
398 | |||
399 | ino_t exofs_inode_by_name(struct inode *dir, struct dentry *dentry) | ||
400 | { | ||
401 | ino_t res = 0; | ||
402 | struct exofs_dir_entry *de; | ||
403 | struct page *page; | ||
404 | |||
405 | de = exofs_find_entry(dir, dentry, &page); | ||
406 | if (de) { | ||
407 | res = le64_to_cpu(de->inode_no); | ||
408 | exofs_put_page(page); | ||
409 | } | ||
410 | return res; | ||
411 | } | ||
412 | |||
413 | int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de, | ||
414 | struct page *page, struct inode *inode) | ||
415 | { | ||
416 | loff_t pos = page_offset(page) + | ||
417 | (char *) de - (char *) page_address(page); | ||
418 | unsigned len = le16_to_cpu(de->rec_len); | ||
419 | int err; | ||
420 | |||
421 | lock_page(page); | ||
422 | err = exofs_write_begin(NULL, page->mapping, pos, len, | ||
423 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
424 | if (err) | ||
425 | EXOFS_ERR("exofs_set_link: exofs_write_begin FAILD => %d\n", | ||
426 | err); | ||
427 | |||
428 | de->inode_no = cpu_to_le64(inode->i_ino); | ||
429 | exofs_set_de_type(de, inode); | ||
430 | if (likely(!err)) | ||
431 | err = exofs_commit_chunk(page, pos, len); | ||
432 | exofs_put_page(page); | ||
433 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
434 | mark_inode_dirty(dir); | ||
435 | return err; | ||
436 | } | ||
437 | |||
438 | int exofs_add_link(struct dentry *dentry, struct inode *inode) | ||
439 | { | ||
440 | struct inode *dir = dentry->d_parent->d_inode; | ||
441 | const unsigned char *name = dentry->d_name.name; | ||
442 | int namelen = dentry->d_name.len; | ||
443 | unsigned chunk_size = exofs_chunk_size(dir); | ||
444 | unsigned reclen = EXOFS_DIR_REC_LEN(namelen); | ||
445 | unsigned short rec_len, name_len; | ||
446 | struct page *page = NULL; | ||
447 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
448 | struct exofs_dir_entry *de; | ||
449 | unsigned long npages = dir_pages(dir); | ||
450 | unsigned long n; | ||
451 | char *kaddr; | ||
452 | loff_t pos; | ||
453 | int err; | ||
454 | |||
455 | for (n = 0; n <= npages; n++) { | ||
456 | char *dir_end; | ||
457 | |||
458 | page = exofs_get_page(dir, n); | ||
459 | err = PTR_ERR(page); | ||
460 | if (IS_ERR(page)) | ||
461 | goto out; | ||
462 | lock_page(page); | ||
463 | kaddr = page_address(page); | ||
464 | dir_end = kaddr + exofs_last_byte(dir, n); | ||
465 | de = (struct exofs_dir_entry *)kaddr; | ||
466 | kaddr += PAGE_CACHE_SIZE - reclen; | ||
467 | while ((char *)de <= kaddr) { | ||
468 | if ((char *)de == dir_end) { | ||
469 | name_len = 0; | ||
470 | rec_len = chunk_size; | ||
471 | de->rec_len = cpu_to_le16(chunk_size); | ||
472 | de->inode_no = 0; | ||
473 | goto got_it; | ||
474 | } | ||
475 | if (de->rec_len == 0) { | ||
476 | EXOFS_ERR("ERROR: exofs_add_link: " | ||
477 | "zero-length directory entry"); | ||
478 | err = -EIO; | ||
479 | goto out_unlock; | ||
480 | } | ||
481 | err = -EEXIST; | ||
482 | if (exofs_match(namelen, name, de)) | ||
483 | goto out_unlock; | ||
484 | name_len = EXOFS_DIR_REC_LEN(de->name_len); | ||
485 | rec_len = le16_to_cpu(de->rec_len); | ||
486 | if (!de->inode_no && rec_len >= reclen) | ||
487 | goto got_it; | ||
488 | if (rec_len >= name_len + reclen) | ||
489 | goto got_it; | ||
490 | de = (struct exofs_dir_entry *) ((char *) de + rec_len); | ||
491 | } | ||
492 | unlock_page(page); | ||
493 | exofs_put_page(page); | ||
494 | } | ||
495 | |||
496 | EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode); | ||
497 | return -EINVAL; | ||
498 | |||
499 | got_it: | ||
500 | pos = page_offset(page) + | ||
501 | (char *)de - (char *)page_address(page); | ||
502 | err = exofs_write_begin(NULL, page->mapping, pos, rec_len, 0, | ||
503 | &page, NULL); | ||
504 | if (err) | ||
505 | goto out_unlock; | ||
506 | if (de->inode_no) { | ||
507 | struct exofs_dir_entry *de1 = | ||
508 | (struct exofs_dir_entry *)((char *)de + name_len); | ||
509 | de1->rec_len = cpu_to_le16(rec_len - name_len); | ||
510 | de->rec_len = cpu_to_le16(name_len); | ||
511 | de = de1; | ||
512 | } | ||
513 | de->name_len = namelen; | ||
514 | memcpy(de->name, name, namelen); | ||
515 | de->inode_no = cpu_to_le64(inode->i_ino); | ||
516 | exofs_set_de_type(de, inode); | ||
517 | err = exofs_commit_chunk(page, pos, rec_len); | ||
518 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
519 | mark_inode_dirty(dir); | ||
520 | sbi->s_numfiles++; | ||
521 | |||
522 | out_put: | ||
523 | exofs_put_page(page); | ||
524 | out: | ||
525 | return err; | ||
526 | out_unlock: | ||
527 | unlock_page(page); | ||
528 | goto out_put; | ||
529 | } | ||
530 | |||
531 | int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page) | ||
532 | { | ||
533 | struct address_space *mapping = page->mapping; | ||
534 | struct inode *inode = mapping->host; | ||
535 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
536 | char *kaddr = page_address(page); | ||
537 | unsigned from = ((char *)dir - kaddr) & ~(exofs_chunk_size(inode)-1); | ||
538 | unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); | ||
539 | loff_t pos; | ||
540 | struct exofs_dir_entry *pde = NULL; | ||
541 | struct exofs_dir_entry *de = (struct exofs_dir_entry *) (kaddr + from); | ||
542 | int err; | ||
543 | |||
544 | while (de < dir) { | ||
545 | if (de->rec_len == 0) { | ||
546 | EXOFS_ERR("ERROR: exofs_delete_entry:" | ||
547 | "zero-length directory entry"); | ||
548 | err = -EIO; | ||
549 | goto out; | ||
550 | } | ||
551 | pde = de; | ||
552 | de = exofs_next_entry(de); | ||
553 | } | ||
554 | if (pde) | ||
555 | from = (char *)pde - (char *)page_address(page); | ||
556 | pos = page_offset(page) + from; | ||
557 | lock_page(page); | ||
558 | err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0, | ||
559 | &page, NULL); | ||
560 | if (err) | ||
561 | EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILD => %d\n", | ||
562 | err); | ||
563 | if (pde) | ||
564 | pde->rec_len = cpu_to_le16(to - from); | ||
565 | dir->inode_no = 0; | ||
566 | if (likely(!err)) | ||
567 | err = exofs_commit_chunk(page, pos, to - from); | ||
568 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | ||
569 | mark_inode_dirty(inode); | ||
570 | sbi->s_numfiles--; | ||
571 | out: | ||
572 | exofs_put_page(page); | ||
573 | return err; | ||
574 | } | ||
575 | |||
576 | /* kept aligned on 4 bytes */ | ||
577 | #define THIS_DIR ".\0\0" | ||
578 | #define PARENT_DIR "..\0" | ||
579 | |||
580 | int exofs_make_empty(struct inode *inode, struct inode *parent) | ||
581 | { | ||
582 | struct address_space *mapping = inode->i_mapping; | ||
583 | struct page *page = grab_cache_page(mapping, 0); | ||
584 | unsigned chunk_size = exofs_chunk_size(inode); | ||
585 | struct exofs_dir_entry *de; | ||
586 | int err; | ||
587 | void *kaddr; | ||
588 | |||
589 | if (!page) | ||
590 | return -ENOMEM; | ||
591 | |||
592 | err = exofs_write_begin(NULL, page->mapping, 0, chunk_size, 0, | ||
593 | &page, NULL); | ||
594 | if (err) { | ||
595 | unlock_page(page); | ||
596 | goto fail; | ||
597 | } | ||
598 | |||
599 | kaddr = kmap_atomic(page, KM_USER0); | ||
600 | de = (struct exofs_dir_entry *)kaddr; | ||
601 | de->name_len = 1; | ||
602 | de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1)); | ||
603 | memcpy(de->name, THIS_DIR, sizeof(THIS_DIR)); | ||
604 | de->inode_no = cpu_to_le64(inode->i_ino); | ||
605 | exofs_set_de_type(de, inode); | ||
606 | |||
607 | de = (struct exofs_dir_entry *)(kaddr + EXOFS_DIR_REC_LEN(1)); | ||
608 | de->name_len = 2; | ||
609 | de->rec_len = cpu_to_le16(chunk_size - EXOFS_DIR_REC_LEN(1)); | ||
610 | de->inode_no = cpu_to_le64(parent->i_ino); | ||
611 | memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); | ||
612 | exofs_set_de_type(de, inode); | ||
613 | kunmap_atomic(page, KM_USER0); | ||
614 | err = exofs_commit_chunk(page, 0, chunk_size); | ||
615 | fail: | ||
616 | page_cache_release(page); | ||
617 | return err; | ||
618 | } | ||
619 | |||
620 | int exofs_empty_dir(struct inode *inode) | ||
621 | { | ||
622 | struct page *page = NULL; | ||
623 | unsigned long i, npages = dir_pages(inode); | ||
624 | |||
625 | for (i = 0; i < npages; i++) { | ||
626 | char *kaddr; | ||
627 | struct exofs_dir_entry *de; | ||
628 | page = exofs_get_page(inode, i); | ||
629 | |||
630 | if (IS_ERR(page)) | ||
631 | continue; | ||
632 | |||
633 | kaddr = page_address(page); | ||
634 | de = (struct exofs_dir_entry *)kaddr; | ||
635 | kaddr += exofs_last_byte(inode, i) - EXOFS_DIR_REC_LEN(1); | ||
636 | |||
637 | while ((char *)de <= kaddr) { | ||
638 | if (de->rec_len == 0) { | ||
639 | EXOFS_ERR("ERROR: exofs_empty_dir: " | ||
640 | "zero-length directory entry" | ||
641 | "kaddr=%p, de=%p\n", kaddr, de); | ||
642 | goto not_empty; | ||
643 | } | ||
644 | if (de->inode_no != 0) { | ||
645 | /* check for . and .. */ | ||
646 | if (de->name[0] != '.') | ||
647 | goto not_empty; | ||
648 | if (de->name_len > 2) | ||
649 | goto not_empty; | ||
650 | if (de->name_len < 2) { | ||
651 | if (le64_to_cpu(de->inode_no) != | ||
652 | inode->i_ino) | ||
653 | goto not_empty; | ||
654 | } else if (de->name[1] != '.') | ||
655 | goto not_empty; | ||
656 | } | ||
657 | de = exofs_next_entry(de); | ||
658 | } | ||
659 | exofs_put_page(page); | ||
660 | } | ||
661 | return 1; | ||
662 | |||
663 | not_empty: | ||
664 | exofs_put_page(page); | ||
665 | return 0; | ||
666 | } | ||
667 | |||
668 | const struct file_operations exofs_dir_operations = { | ||
669 | .llseek = generic_file_llseek, | ||
670 | .read = generic_read_dir, | ||
671 | .readdir = exofs_readdir, | ||
672 | }; | ||
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h new file mode 100644 index 000000000000..0fd4c7859679 --- /dev/null +++ b/fs/exofs/exofs.h | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/fs.h> | ||
37 | #include <linux/time.h> | ||
38 | #include "common.h" | ||
39 | |||
40 | #ifndef __EXOFS_H__ | ||
41 | #define __EXOFS_H__ | ||
42 | |||
43 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) | ||
44 | |||
45 | #ifdef CONFIG_EXOFS_DEBUG | ||
46 | #define EXOFS_DBGMSG(fmt, a...) \ | ||
47 | printk(KERN_NOTICE "exofs @%s:%d: " fmt, __func__, __LINE__, ##a) | ||
48 | #else | ||
49 | #define EXOFS_DBGMSG(fmt, a...) \ | ||
50 | do { if (0) printk(fmt, ##a); } while (0) | ||
51 | #endif | ||
52 | |||
53 | /* u64 has problems with printk this will cast it to unsigned long long */ | ||
54 | #define _LLU(x) (unsigned long long)(x) | ||
55 | |||
56 | /* | ||
57 | * our extension to the in-memory superblock | ||
58 | */ | ||
59 | struct exofs_sb_info { | ||
60 | struct osd_dev *s_dev; /* returned by get_osd_dev */ | ||
61 | osd_id s_pid; /* partition ID of file system*/ | ||
62 | int s_timeout; /* timeout for OSD operations */ | ||
63 | uint64_t s_nextid; /* highest object ID used */ | ||
64 | uint32_t s_numfiles; /* number of files on fs */ | ||
65 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ | ||
66 | u32 s_next_generation; /* next gen # to use */ | ||
67 | atomic_t s_curr_pending; /* number of pending commands */ | ||
68 | uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ | ||
69 | }; | ||
70 | |||
71 | /* | ||
72 | * our extension to the in-memory inode | ||
73 | */ | ||
74 | struct exofs_i_info { | ||
75 | unsigned long i_flags; /* various atomic flags */ | ||
76 | uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ | ||
77 | uint32_t i_dir_start_lookup; /* which page to start lookup */ | ||
78 | wait_queue_head_t i_wq; /* wait queue for inode */ | ||
79 | uint64_t i_commit_size; /* the object's written length */ | ||
80 | uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ | ||
81 | struct inode vfs_inode; /* normal in-memory inode */ | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * our inode flags | ||
86 | */ | ||
87 | #define OBJ_2BCREATED 0 /* object will be created soon*/ | ||
88 | #define OBJ_CREATED 1 /* object has been created on the osd*/ | ||
89 | |||
90 | static inline int obj_2bcreated(struct exofs_i_info *oi) | ||
91 | { | ||
92 | return test_bit(OBJ_2BCREATED, &oi->i_flags); | ||
93 | } | ||
94 | |||
95 | static inline void set_obj_2bcreated(struct exofs_i_info *oi) | ||
96 | { | ||
97 | set_bit(OBJ_2BCREATED, &oi->i_flags); | ||
98 | } | ||
99 | |||
100 | static inline int obj_created(struct exofs_i_info *oi) | ||
101 | { | ||
102 | return test_bit(OBJ_CREATED, &oi->i_flags); | ||
103 | } | ||
104 | |||
105 | static inline void set_obj_created(struct exofs_i_info *oi) | ||
106 | { | ||
107 | set_bit(OBJ_CREATED, &oi->i_flags); | ||
108 | } | ||
109 | |||
110 | int __exofs_wait_obj_created(struct exofs_i_info *oi); | ||
111 | static inline int wait_obj_created(struct exofs_i_info *oi) | ||
112 | { | ||
113 | if (likely(obj_created(oi))) | ||
114 | return 0; | ||
115 | |||
116 | return __exofs_wait_obj_created(oi); | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * get to our inode from the vfs inode | ||
121 | */ | ||
122 | static inline struct exofs_i_info *exofs_i(struct inode *inode) | ||
123 | { | ||
124 | return container_of(inode, struct exofs_i_info, vfs_inode); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Maximum count of links to a file | ||
129 | */ | ||
130 | #define EXOFS_LINK_MAX 32000 | ||
131 | |||
132 | /************************* | ||
133 | * function declarations * | ||
134 | *************************/ | ||
135 | /* inode.c */ | ||
136 | void exofs_truncate(struct inode *inode); | ||
137 | int exofs_setattr(struct dentry *, struct iattr *); | ||
138 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
139 | loff_t pos, unsigned len, unsigned flags, | ||
140 | struct page **pagep, void **fsdata); | ||
141 | extern struct inode *exofs_iget(struct super_block *, unsigned long); | ||
142 | struct inode *exofs_new_inode(struct inode *, int); | ||
143 | extern int exofs_write_inode(struct inode *, int); | ||
144 | extern void exofs_delete_inode(struct inode *); | ||
145 | |||
146 | /* dir.c: */ | ||
147 | int exofs_add_link(struct dentry *, struct inode *); | ||
148 | ino_t exofs_inode_by_name(struct inode *, struct dentry *); | ||
149 | int exofs_delete_entry(struct exofs_dir_entry *, struct page *); | ||
150 | int exofs_make_empty(struct inode *, struct inode *); | ||
151 | struct exofs_dir_entry *exofs_find_entry(struct inode *, struct dentry *, | ||
152 | struct page **); | ||
153 | int exofs_empty_dir(struct inode *); | ||
154 | struct exofs_dir_entry *exofs_dotdot(struct inode *, struct page **); | ||
155 | ino_t exofs_parent_ino(struct dentry *child); | ||
156 | int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, | ||
157 | struct inode *); | ||
158 | |||
159 | /********************* | ||
160 | * operation vectors * | ||
161 | *********************/ | ||
162 | /* dir.c: */ | ||
163 | extern const struct file_operations exofs_dir_operations; | ||
164 | |||
165 | /* file.c */ | ||
166 | extern const struct inode_operations exofs_file_inode_operations; | ||
167 | extern const struct file_operations exofs_file_operations; | ||
168 | |||
169 | /* inode.c */ | ||
170 | extern const struct address_space_operations exofs_aops; | ||
171 | |||
172 | /* namei.c */ | ||
173 | extern const struct inode_operations exofs_dir_inode_operations; | ||
174 | extern const struct inode_operations exofs_special_inode_operations; | ||
175 | |||
176 | /* symlink.c */ | ||
177 | extern const struct inode_operations exofs_symlink_inode_operations; | ||
178 | extern const struct inode_operations exofs_fast_symlink_inode_operations; | ||
179 | |||
180 | #endif | ||
diff --git a/fs/exofs/file.c b/fs/exofs/file.c new file mode 100644 index 000000000000..6ed7fe484752 --- /dev/null +++ b/fs/exofs/file.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/buffer_head.h> | ||
37 | |||
38 | #include "exofs.h" | ||
39 | |||
40 | static int exofs_release_file(struct inode *inode, struct file *filp) | ||
41 | { | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static int exofs_file_fsync(struct file *filp, struct dentry *dentry, | ||
46 | int datasync) | ||
47 | { | ||
48 | int ret; | ||
49 | struct address_space *mapping = filp->f_mapping; | ||
50 | |||
51 | ret = filemap_write_and_wait(mapping); | ||
52 | if (ret) | ||
53 | return ret; | ||
54 | |||
55 | /*Note: file_fsync below also calles sync_blockdev, which is a no-op | ||
56 | * for exofs, but other then that it does sync_inode and | ||
57 | * sync_superblock which is what we need here. | ||
58 | */ | ||
59 | return file_fsync(filp, dentry, datasync); | ||
60 | } | ||
61 | |||
62 | static int exofs_flush(struct file *file, fl_owner_t id) | ||
63 | { | ||
64 | exofs_file_fsync(file, file->f_path.dentry, 1); | ||
65 | /* TODO: Flush the OSD target */ | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | const struct file_operations exofs_file_operations = { | ||
70 | .llseek = generic_file_llseek, | ||
71 | .read = do_sync_read, | ||
72 | .write = do_sync_write, | ||
73 | .aio_read = generic_file_aio_read, | ||
74 | .aio_write = generic_file_aio_write, | ||
75 | .mmap = generic_file_mmap, | ||
76 | .open = generic_file_open, | ||
77 | .release = exofs_release_file, | ||
78 | .fsync = exofs_file_fsync, | ||
79 | .flush = exofs_flush, | ||
80 | .splice_read = generic_file_splice_read, | ||
81 | .splice_write = generic_file_splice_write, | ||
82 | }; | ||
83 | |||
84 | const struct inode_operations exofs_file_inode_operations = { | ||
85 | .truncate = exofs_truncate, | ||
86 | .setattr = exofs_setattr, | ||
87 | }; | ||
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c new file mode 100644 index 000000000000..ba8d9fab4693 --- /dev/null +++ b/fs/exofs/inode.c | |||
@@ -0,0 +1,1303 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/writeback.h> | ||
37 | #include <linux/buffer_head.h> | ||
38 | #include <scsi/scsi_device.h> | ||
39 | |||
40 | #include "exofs.h" | ||
41 | |||
42 | #ifdef CONFIG_EXOFS_DEBUG | ||
43 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | ||
44 | #endif | ||
45 | |||
46 | struct page_collect { | ||
47 | struct exofs_sb_info *sbi; | ||
48 | struct request_queue *req_q; | ||
49 | struct inode *inode; | ||
50 | unsigned expected_pages; | ||
51 | |||
52 | struct bio *bio; | ||
53 | unsigned nr_pages; | ||
54 | unsigned long length; | ||
55 | loff_t pg_first; /* keep 64bit also in 32-arches */ | ||
56 | }; | ||
57 | |||
58 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | ||
59 | struct inode *inode) | ||
60 | { | ||
61 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
62 | struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; | ||
63 | |||
64 | pcol->sbi = sbi; | ||
65 | pcol->req_q = req_q; | ||
66 | pcol->inode = inode; | ||
67 | pcol->expected_pages = expected_pages; | ||
68 | |||
69 | pcol->bio = NULL; | ||
70 | pcol->nr_pages = 0; | ||
71 | pcol->length = 0; | ||
72 | pcol->pg_first = -1; | ||
73 | |||
74 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
75 | expected_pages); | ||
76 | } | ||
77 | |||
78 | static void _pcol_reset(struct page_collect *pcol) | ||
79 | { | ||
80 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); | ||
81 | |||
82 | pcol->bio = NULL; | ||
83 | pcol->nr_pages = 0; | ||
84 | pcol->length = 0; | ||
85 | pcol->pg_first = -1; | ||
86 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | ||
87 | pcol->inode->i_ino, pcol->expected_pages); | ||
88 | |||
89 | /* this is probably the end of the loop but in writes | ||
90 | * it might not end here. don't be left with nothing | ||
91 | */ | ||
92 | if (!pcol->expected_pages) | ||
93 | pcol->expected_pages = 128; | ||
94 | } | ||
95 | |||
96 | static int pcol_try_alloc(struct page_collect *pcol) | ||
97 | { | ||
98 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | ||
99 | |||
100 | for (; pages; pages >>= 1) { | ||
101 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | ||
102 | if (likely(pcol->bio)) | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | ||
107 | pcol->expected_pages); | ||
108 | return -ENOMEM; | ||
109 | } | ||
110 | |||
111 | static void pcol_free(struct page_collect *pcol) | ||
112 | { | ||
113 | bio_put(pcol->bio); | ||
114 | pcol->bio = NULL; | ||
115 | } | ||
116 | |||
117 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | ||
118 | unsigned len) | ||
119 | { | ||
120 | int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); | ||
121 | if (unlikely(len != added_len)) | ||
122 | return -ENOMEM; | ||
123 | |||
124 | ++pcol->nr_pages; | ||
125 | pcol->length += len; | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static int update_read_page(struct page *page, int ret) | ||
130 | { | ||
131 | if (ret == 0) { | ||
132 | /* Everything is OK */ | ||
133 | SetPageUptodate(page); | ||
134 | if (PageError(page)) | ||
135 | ClearPageError(page); | ||
136 | } else if (ret == -EFAULT) { | ||
137 | /* In this case we were trying to read something that wasn't on | ||
138 | * disk yet - return a page full of zeroes. This should be OK, | ||
139 | * because the object should be empty (if there was a write | ||
140 | * before this read, the read would be waiting with the page | ||
141 | * locked */ | ||
142 | clear_highpage(page); | ||
143 | |||
144 | SetPageUptodate(page); | ||
145 | if (PageError(page)) | ||
146 | ClearPageError(page); | ||
147 | ret = 0; /* recovered error */ | ||
148 | EXOFS_DBGMSG("recovered read error\n"); | ||
149 | } else /* Error */ | ||
150 | SetPageError(page); | ||
151 | |||
152 | return ret; | ||
153 | } | ||
154 | |||
155 | static void update_write_page(struct page *page, int ret) | ||
156 | { | ||
157 | if (ret) { | ||
158 | mapping_set_error(page->mapping, ret); | ||
159 | SetPageError(page); | ||
160 | } | ||
161 | end_page_writeback(page); | ||
162 | } | ||
163 | |||
164 | /* Called at the end of reads, to optionally unlock pages and update their | ||
165 | * status. | ||
166 | */ | ||
167 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | ||
168 | bool do_unlock) | ||
169 | { | ||
170 | struct bio_vec *bvec; | ||
171 | int i; | ||
172 | u64 resid; | ||
173 | u64 good_bytes; | ||
174 | u64 length = 0; | ||
175 | int ret = exofs_check_ok_resid(or, &resid, NULL); | ||
176 | |||
177 | osd_end_request(or); | ||
178 | |||
179 | if (likely(!ret)) | ||
180 | good_bytes = pcol->length; | ||
181 | else if (!resid) | ||
182 | good_bytes = 0; | ||
183 | else | ||
184 | good_bytes = pcol->length - resid; | ||
185 | |||
186 | EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx" | ||
187 | " length=0x%lx nr_pages=%u\n", | ||
188 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
189 | pcol->nr_pages); | ||
190 | |||
191 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
192 | struct page *page = bvec->bv_page; | ||
193 | struct inode *inode = page->mapping->host; | ||
194 | int page_stat; | ||
195 | |||
196 | if (inode != pcol->inode) | ||
197 | continue; /* osd might add more pages at end */ | ||
198 | |||
199 | if (likely(length < good_bytes)) | ||
200 | page_stat = 0; | ||
201 | else | ||
202 | page_stat = ret; | ||
203 | |||
204 | EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | ||
205 | inode->i_ino, page->index, | ||
206 | page_stat ? "bad_bytes" : "good_bytes"); | ||
207 | |||
208 | ret = update_read_page(page, page_stat); | ||
209 | if (do_unlock) | ||
210 | unlock_page(page); | ||
211 | length += bvec->bv_len; | ||
212 | } | ||
213 | |||
214 | pcol_free(pcol); | ||
215 | EXOFS_DBGMSG("readpages_done END\n"); | ||
216 | return ret; | ||
217 | } | ||
218 | |||
219 | /* callback of async reads */ | ||
220 | static void readpages_done(struct osd_request *or, void *p) | ||
221 | { | ||
222 | struct page_collect *pcol = p; | ||
223 | |||
224 | __readpages_done(or, pcol, true); | ||
225 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
226 | kfree(p); | ||
227 | } | ||
228 | |||
229 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | ||
230 | { | ||
231 | struct bio_vec *bvec; | ||
232 | int i; | ||
233 | |||
234 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
235 | struct page *page = bvec->bv_page; | ||
236 | |||
237 | if (rw == READ) | ||
238 | update_read_page(page, ret); | ||
239 | else | ||
240 | update_write_page(page, ret); | ||
241 | |||
242 | unlock_page(page); | ||
243 | } | ||
244 | pcol_free(pcol); | ||
245 | } | ||
246 | |||
247 | static int read_exec(struct page_collect *pcol, bool is_sync) | ||
248 | { | ||
249 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
250 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
251 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
252 | struct osd_request *or = NULL; | ||
253 | struct page_collect *pcol_copy = NULL; | ||
254 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
255 | int ret; | ||
256 | |||
257 | if (!pcol->bio) | ||
258 | return 0; | ||
259 | |||
260 | /* see comment in _readpage() about sync reads */ | ||
261 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | ||
262 | |||
263 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
264 | if (unlikely(!or)) { | ||
265 | ret = -ENOMEM; | ||
266 | goto err; | ||
267 | } | ||
268 | |||
269 | osd_req_read(or, &obj, pcol->bio, i_start); | ||
270 | |||
271 | if (is_sync) { | ||
272 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | ||
273 | return __readpages_done(or, pcol, false); | ||
274 | } | ||
275 | |||
276 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
277 | if (!pcol_copy) { | ||
278 | ret = -ENOMEM; | ||
279 | goto err; | ||
280 | } | ||
281 | |||
282 | *pcol_copy = *pcol; | ||
283 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | ||
284 | if (unlikely(ret)) | ||
285 | goto err; | ||
286 | |||
287 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
288 | |||
289 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | ||
290 | obj.id, _LLU(i_start), pcol->length); | ||
291 | |||
292 | /* pages ownership was passed to pcol_copy */ | ||
293 | _pcol_reset(pcol); | ||
294 | return 0; | ||
295 | |||
296 | err: | ||
297 | if (!is_sync) | ||
298 | _unlock_pcol_pages(pcol, ret, READ); | ||
299 | kfree(pcol_copy); | ||
300 | if (or) | ||
301 | osd_end_request(or); | ||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | /* readpage_strip is called either directly from readpage() or by the VFS from | ||
306 | * within read_cache_pages(), to add one more page to be read. It will try to | ||
307 | * collect as many contiguous pages as posible. If a discontinuity is | ||
308 | * encountered, or it runs out of resources, it will submit the previous segment | ||
309 | * and will start a new collection. Eventually caller must submit the last | ||
310 | * segment if present. | ||
311 | */ | ||
312 | static int readpage_strip(void *data, struct page *page) | ||
313 | { | ||
314 | struct page_collect *pcol = data; | ||
315 | struct inode *inode = pcol->inode; | ||
316 | struct exofs_i_info *oi = exofs_i(inode); | ||
317 | loff_t i_size = i_size_read(inode); | ||
318 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
319 | size_t len; | ||
320 | int ret; | ||
321 | |||
322 | /* FIXME: Just for debugging, will be removed */ | ||
323 | if (PageUptodate(page)) | ||
324 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, | ||
325 | page->index); | ||
326 | |||
327 | if (page->index < end_index) | ||
328 | len = PAGE_CACHE_SIZE; | ||
329 | else if (page->index == end_index) | ||
330 | len = i_size & ~PAGE_CACHE_MASK; | ||
331 | else | ||
332 | len = 0; | ||
333 | |||
334 | if (!len || !obj_created(oi)) { | ||
335 | /* this will be out of bounds, or doesn't exist yet. | ||
336 | * Current page is cleared and the request is split | ||
337 | */ | ||
338 | clear_highpage(page); | ||
339 | |||
340 | SetPageUptodate(page); | ||
341 | if (PageError(page)) | ||
342 | ClearPageError(page); | ||
343 | |||
344 | unlock_page(page); | ||
345 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | ||
346 | " splitting\n", inode->i_ino, page->index); | ||
347 | |||
348 | return read_exec(pcol, false); | ||
349 | } | ||
350 | |||
351 | try_again: | ||
352 | |||
353 | if (unlikely(pcol->pg_first == -1)) { | ||
354 | pcol->pg_first = page->index; | ||
355 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
356 | page->index)) { | ||
357 | /* Discontinuity detected, split the request */ | ||
358 | ret = read_exec(pcol, false); | ||
359 | if (unlikely(ret)) | ||
360 | goto fail; | ||
361 | goto try_again; | ||
362 | } | ||
363 | |||
364 | if (!pcol->bio) { | ||
365 | ret = pcol_try_alloc(pcol); | ||
366 | if (unlikely(ret)) | ||
367 | goto fail; | ||
368 | } | ||
369 | |||
370 | if (len != PAGE_CACHE_SIZE) | ||
371 | zero_user(page, len, PAGE_CACHE_SIZE - len); | ||
372 | |||
373 | EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
374 | inode->i_ino, page->index, len); | ||
375 | |||
376 | ret = pcol_add_page(pcol, page, len); | ||
377 | if (ret) { | ||
378 | EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | ||
379 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", | ||
380 | page, len, pcol->nr_pages, pcol->length); | ||
381 | |||
382 | /* split the request, and start again with current page */ | ||
383 | ret = read_exec(pcol, false); | ||
384 | if (unlikely(ret)) | ||
385 | goto fail; | ||
386 | |||
387 | goto try_again; | ||
388 | } | ||
389 | |||
390 | return 0; | ||
391 | |||
392 | fail: | ||
393 | /* SetPageError(page); ??? */ | ||
394 | unlock_page(page); | ||
395 | return ret; | ||
396 | } | ||
397 | |||
398 | static int exofs_readpages(struct file *file, struct address_space *mapping, | ||
399 | struct list_head *pages, unsigned nr_pages) | ||
400 | { | ||
401 | struct page_collect pcol; | ||
402 | int ret; | ||
403 | |||
404 | _pcol_init(&pcol, nr_pages, mapping->host); | ||
405 | |||
406 | ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); | ||
407 | if (ret) { | ||
408 | EXOFS_ERR("read_cache_pages => %d\n", ret); | ||
409 | return ret; | ||
410 | } | ||
411 | |||
412 | return read_exec(&pcol, false); | ||
413 | } | ||
414 | |||
415 | static int _readpage(struct page *page, bool is_sync) | ||
416 | { | ||
417 | struct page_collect pcol; | ||
418 | int ret; | ||
419 | |||
420 | _pcol_init(&pcol, 1, page->mapping->host); | ||
421 | |||
422 | /* readpage_strip might call read_exec(,async) inside at several places | ||
423 | * but this is safe for is_async=0 since read_exec will not do anything | ||
424 | * when we have a single page. | ||
425 | */ | ||
426 | ret = readpage_strip(&pcol, page); | ||
427 | if (ret) { | ||
428 | EXOFS_ERR("_readpage => %d\n", ret); | ||
429 | return ret; | ||
430 | } | ||
431 | |||
432 | return read_exec(&pcol, is_sync); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * We don't need the file | ||
437 | */ | ||
438 | static int exofs_readpage(struct file *file, struct page *page) | ||
439 | { | ||
440 | return _readpage(page, false); | ||
441 | } | ||
442 | |||
443 | /* Callback for osd_write. All writes are asynchronouse */ | ||
444 | static void writepages_done(struct osd_request *or, void *p) | ||
445 | { | ||
446 | struct page_collect *pcol = p; | ||
447 | struct bio_vec *bvec; | ||
448 | int i; | ||
449 | u64 resid; | ||
450 | u64 good_bytes; | ||
451 | u64 length = 0; | ||
452 | |||
453 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
454 | |||
455 | osd_end_request(or); | ||
456 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
457 | |||
458 | if (likely(!ret)) | ||
459 | good_bytes = pcol->length; | ||
460 | else if (!resid) | ||
461 | good_bytes = 0; | ||
462 | else | ||
463 | good_bytes = pcol->length - resid; | ||
464 | |||
465 | EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx" | ||
466 | " length=0x%lx nr_pages=%u\n", | ||
467 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
468 | pcol->nr_pages); | ||
469 | |||
470 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
471 | struct page *page = bvec->bv_page; | ||
472 | struct inode *inode = page->mapping->host; | ||
473 | int page_stat; | ||
474 | |||
475 | if (inode != pcol->inode) | ||
476 | continue; /* osd might add more pages to a bio */ | ||
477 | |||
478 | if (likely(length < good_bytes)) | ||
479 | page_stat = 0; | ||
480 | else | ||
481 | page_stat = ret; | ||
482 | |||
483 | update_write_page(page, page_stat); | ||
484 | unlock_page(page); | ||
485 | EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | ||
486 | inode->i_ino, page->index, page_stat); | ||
487 | |||
488 | length += bvec->bv_len; | ||
489 | } | ||
490 | |||
491 | pcol_free(pcol); | ||
492 | kfree(pcol); | ||
493 | EXOFS_DBGMSG("writepages_done END\n"); | ||
494 | } | ||
495 | |||
496 | static int write_exec(struct page_collect *pcol) | ||
497 | { | ||
498 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
499 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
500 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
501 | struct osd_request *or = NULL; | ||
502 | struct page_collect *pcol_copy = NULL; | ||
503 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
504 | int ret; | ||
505 | |||
506 | if (!pcol->bio) | ||
507 | return 0; | ||
508 | |||
509 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
510 | if (unlikely(!or)) { | ||
511 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
512 | ret = -ENOMEM; | ||
513 | goto err; | ||
514 | } | ||
515 | |||
516 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
517 | if (!pcol_copy) { | ||
518 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | ||
519 | ret = -ENOMEM; | ||
520 | goto err; | ||
521 | } | ||
522 | |||
523 | *pcol_copy = *pcol; | ||
524 | |||
525 | osd_req_write(or, &obj, pcol_copy->bio, i_start); | ||
526 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | ||
527 | if (unlikely(ret)) { | ||
528 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | ||
529 | goto err; | ||
530 | } | ||
531 | |||
532 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
533 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | ||
534 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | ||
535 | pcol->length); | ||
536 | /* pages ownership was passed to pcol_copy */ | ||
537 | _pcol_reset(pcol); | ||
538 | return 0; | ||
539 | |||
540 | err: | ||
541 | _unlock_pcol_pages(pcol, ret, WRITE); | ||
542 | kfree(pcol_copy); | ||
543 | if (or) | ||
544 | osd_end_request(or); | ||
545 | return ret; | ||
546 | } | ||
547 | |||
548 | /* writepage_strip is called either directly from writepage() or by the VFS from | ||
549 | * within write_cache_pages(), to add one more page to be written to storage. | ||
550 | * It will try to collect as many contiguous pages as possible. If a | ||
551 | * discontinuity is encountered or it runs out of resources it will submit the | ||
552 | * previous segment and will start a new collection. | ||
553 | * Eventually caller must submit the last segment if present. | ||
554 | */ | ||
555 | static int writepage_strip(struct page *page, | ||
556 | struct writeback_control *wbc_unused, void *data) | ||
557 | { | ||
558 | struct page_collect *pcol = data; | ||
559 | struct inode *inode = pcol->inode; | ||
560 | struct exofs_i_info *oi = exofs_i(inode); | ||
561 | loff_t i_size = i_size_read(inode); | ||
562 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
563 | size_t len; | ||
564 | int ret; | ||
565 | |||
566 | BUG_ON(!PageLocked(page)); | ||
567 | |||
568 | ret = wait_obj_created(oi); | ||
569 | if (unlikely(ret)) | ||
570 | goto fail; | ||
571 | |||
572 | if (page->index < end_index) | ||
573 | /* in this case, the page is within the limits of the file */ | ||
574 | len = PAGE_CACHE_SIZE; | ||
575 | else { | ||
576 | len = i_size & ~PAGE_CACHE_MASK; | ||
577 | |||
578 | if (page->index > end_index || !len) { | ||
579 | /* in this case, the page is outside the limits | ||
580 | * (truncate in progress) | ||
581 | */ | ||
582 | ret = write_exec(pcol); | ||
583 | if (unlikely(ret)) | ||
584 | goto fail; | ||
585 | if (PageError(page)) | ||
586 | ClearPageError(page); | ||
587 | unlock_page(page); | ||
588 | return 0; | ||
589 | } | ||
590 | } | ||
591 | |||
592 | try_again: | ||
593 | |||
594 | if (unlikely(pcol->pg_first == -1)) { | ||
595 | pcol->pg_first = page->index; | ||
596 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
597 | page->index)) { | ||
598 | /* Discontinuity detected, split the request */ | ||
599 | ret = write_exec(pcol); | ||
600 | if (unlikely(ret)) | ||
601 | goto fail; | ||
602 | goto try_again; | ||
603 | } | ||
604 | |||
605 | if (!pcol->bio) { | ||
606 | ret = pcol_try_alloc(pcol); | ||
607 | if (unlikely(ret)) | ||
608 | goto fail; | ||
609 | } | ||
610 | |||
611 | EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
612 | inode->i_ino, page->index, len); | ||
613 | |||
614 | ret = pcol_add_page(pcol, page, len); | ||
615 | if (unlikely(ret)) { | ||
616 | EXOFS_DBGMSG("Failed pcol_add_page " | ||
617 | "nr_pages=%u total_length=0x%lx\n", | ||
618 | pcol->nr_pages, pcol->length); | ||
619 | |||
620 | /* split the request, next loop will start again */ | ||
621 | ret = write_exec(pcol); | ||
622 | if (unlikely(ret)) { | ||
623 | EXOFS_DBGMSG("write_exec faild => %d", ret); | ||
624 | goto fail; | ||
625 | } | ||
626 | |||
627 | goto try_again; | ||
628 | } | ||
629 | |||
630 | BUG_ON(PageWriteback(page)); | ||
631 | set_page_writeback(page); | ||
632 | |||
633 | return 0; | ||
634 | |||
635 | fail: | ||
636 | set_bit(AS_EIO, &page->mapping->flags); | ||
637 | unlock_page(page); | ||
638 | return ret; | ||
639 | } | ||
640 | |||
641 | static int exofs_writepages(struct address_space *mapping, | ||
642 | struct writeback_control *wbc) | ||
643 | { | ||
644 | struct page_collect pcol; | ||
645 | long start, end, expected_pages; | ||
646 | int ret; | ||
647 | |||
648 | start = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
649 | end = (wbc->range_end == LLONG_MAX) ? | ||
650 | start + mapping->nrpages : | ||
651 | wbc->range_end >> PAGE_CACHE_SHIFT; | ||
652 | |||
653 | if (start || end) | ||
654 | expected_pages = min(end - start + 1, 32L); | ||
655 | else | ||
656 | expected_pages = mapping->nrpages; | ||
657 | |||
658 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | ||
659 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | ||
660 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | ||
661 | mapping->nrpages, start, end); | ||
662 | |||
663 | _pcol_init(&pcol, expected_pages, mapping->host); | ||
664 | |||
665 | ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); | ||
666 | if (ret) { | ||
667 | EXOFS_ERR("write_cache_pages => %d\n", ret); | ||
668 | return ret; | ||
669 | } | ||
670 | |||
671 | return write_exec(&pcol); | ||
672 | } | ||
673 | |||
674 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) | ||
675 | { | ||
676 | struct page_collect pcol; | ||
677 | int ret; | ||
678 | |||
679 | _pcol_init(&pcol, 1, page->mapping->host); | ||
680 | |||
681 | ret = writepage_strip(page, NULL, &pcol); | ||
682 | if (ret) { | ||
683 | EXOFS_ERR("exofs_writepage => %d\n", ret); | ||
684 | return ret; | ||
685 | } | ||
686 | |||
687 | return write_exec(&pcol); | ||
688 | } | ||
689 | |||
690 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
691 | loff_t pos, unsigned len, unsigned flags, | ||
692 | struct page **pagep, void **fsdata) | ||
693 | { | ||
694 | int ret = 0; | ||
695 | struct page *page; | ||
696 | |||
697 | page = *pagep; | ||
698 | if (page == NULL) { | ||
699 | ret = simple_write_begin(file, mapping, pos, len, flags, pagep, | ||
700 | fsdata); | ||
701 | if (ret) { | ||
702 | EXOFS_DBGMSG("simple_write_begin faild\n"); | ||
703 | return ret; | ||
704 | } | ||
705 | |||
706 | page = *pagep; | ||
707 | } | ||
708 | |||
709 | /* read modify write */ | ||
710 | if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { | ||
711 | ret = _readpage(page, true); | ||
712 | if (ret) { | ||
713 | /*SetPageError was done by _readpage. Is it ok?*/ | ||
714 | unlock_page(page); | ||
715 | EXOFS_DBGMSG("__readpage_filler faild\n"); | ||
716 | } | ||
717 | } | ||
718 | |||
719 | return ret; | ||
720 | } | ||
721 | |||
722 | static int exofs_write_begin_export(struct file *file, | ||
723 | struct address_space *mapping, | ||
724 | loff_t pos, unsigned len, unsigned flags, | ||
725 | struct page **pagep, void **fsdata) | ||
726 | { | ||
727 | *pagep = NULL; | ||
728 | |||
729 | return exofs_write_begin(file, mapping, pos, len, flags, pagep, | ||
730 | fsdata); | ||
731 | } | ||
732 | |||
733 | const struct address_space_operations exofs_aops = { | ||
734 | .readpage = exofs_readpage, | ||
735 | .readpages = exofs_readpages, | ||
736 | .writepage = exofs_writepage, | ||
737 | .writepages = exofs_writepages, | ||
738 | .write_begin = exofs_write_begin_export, | ||
739 | .write_end = simple_write_end, | ||
740 | }; | ||
741 | |||
742 | /****************************************************************************** | ||
743 | * INODE OPERATIONS | ||
744 | *****************************************************************************/ | ||
745 | |||
746 | /* | ||
747 | * Test whether an inode is a fast symlink. | ||
748 | */ | ||
749 | static inline int exofs_inode_is_fast_symlink(struct inode *inode) | ||
750 | { | ||
751 | struct exofs_i_info *oi = exofs_i(inode); | ||
752 | |||
753 | return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * get_block_t - Fill in a buffer_head | ||
758 | * An OSD takes care of block allocation so we just fake an allocation by | ||
759 | * putting in the inode's sector_t in the buffer_head. | ||
760 | * TODO: What about the case of create==0 and @iblock does not exist in the | ||
761 | * object? | ||
762 | */ | ||
763 | static int exofs_get_block(struct inode *inode, sector_t iblock, | ||
764 | struct buffer_head *bh_result, int create) | ||
765 | { | ||
766 | map_bh(bh_result, inode->i_sb, iblock); | ||
767 | return 0; | ||
768 | } | ||
769 | |||
770 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | ||
771 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | ||
772 | |||
773 | /* | ||
774 | * Truncate a file to the specified size - all we have to do is set the size | ||
775 | * attribute. We make sure the object exists first. | ||
776 | */ | ||
777 | void exofs_truncate(struct inode *inode) | ||
778 | { | ||
779 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
780 | struct exofs_i_info *oi = exofs_i(inode); | ||
781 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
782 | struct osd_request *or; | ||
783 | struct osd_attr attr; | ||
784 | loff_t isize = i_size_read(inode); | ||
785 | __be64 newsize; | ||
786 | int ret; | ||
787 | |||
788 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | ||
789 | || S_ISLNK(inode->i_mode))) | ||
790 | return; | ||
791 | if (exofs_inode_is_fast_symlink(inode)) | ||
792 | return; | ||
793 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
794 | return; | ||
795 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
796 | |||
797 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
798 | |||
799 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
800 | if (unlikely(!or)) { | ||
801 | EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); | ||
802 | goto fail; | ||
803 | } | ||
804 | |||
805 | osd_req_set_attributes(or, &obj); | ||
806 | |||
807 | newsize = cpu_to_be64((u64)isize); | ||
808 | attr = g_attr_logical_length; | ||
809 | attr.val_ptr = &newsize; | ||
810 | osd_req_add_set_attr_list(or, &attr, 1); | ||
811 | |||
812 | /* if we are about to truncate an object, and it hasn't been | ||
813 | * created yet, wait | ||
814 | */ | ||
815 | if (unlikely(wait_obj_created(oi))) | ||
816 | goto fail; | ||
817 | |||
818 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
819 | osd_end_request(or); | ||
820 | if (ret) | ||
821 | goto fail; | ||
822 | |||
823 | out: | ||
824 | mark_inode_dirty(inode); | ||
825 | return; | ||
826 | fail: | ||
827 | make_bad_inode(inode); | ||
828 | goto out; | ||
829 | } | ||
830 | |||
831 | /* | ||
832 | * Set inode attributes - just call generic functions. | ||
833 | */ | ||
834 | int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
835 | { | ||
836 | struct inode *inode = dentry->d_inode; | ||
837 | int error; | ||
838 | |||
839 | error = inode_change_ok(inode, iattr); | ||
840 | if (error) | ||
841 | return error; | ||
842 | |||
843 | error = inode_setattr(inode, iattr); | ||
844 | return error; | ||
845 | } | ||
846 | |||
847 | /* | ||
848 | * Read an inode from the OSD, and return it as is. We also return the size | ||
849 | * attribute in the 'sanity' argument if we got compiled with debugging turned | ||
850 | * on. | ||
851 | */ | ||
852 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | ||
853 | struct exofs_fcb *inode, uint64_t *sanity) | ||
854 | { | ||
855 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
856 | struct osd_request *or; | ||
857 | struct osd_attr attr; | ||
858 | struct osd_obj_id obj = {sbi->s_pid, | ||
859 | oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; | ||
860 | int ret; | ||
861 | |||
862 | exofs_make_credential(oi->i_cred, &obj); | ||
863 | |||
864 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
865 | if (unlikely(!or)) { | ||
866 | EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); | ||
867 | return -ENOMEM; | ||
868 | } | ||
869 | osd_req_get_attributes(or, &obj); | ||
870 | |||
871 | /* we need the inode attribute */ | ||
872 | osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); | ||
873 | |||
874 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
875 | /* we get the size attributes to do a sanity check */ | ||
876 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | ||
877 | #endif | ||
878 | |||
879 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
880 | if (ret) | ||
881 | goto out; | ||
882 | |||
883 | attr = g_attr_inode_data; | ||
884 | ret = extract_attr_from_req(or, &attr); | ||
885 | if (ret) { | ||
886 | EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); | ||
887 | goto out; | ||
888 | } | ||
889 | |||
890 | WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); | ||
891 | memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); | ||
892 | |||
893 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
894 | attr = g_attr_logical_length; | ||
895 | ret = extract_attr_from_req(or, &attr); | ||
896 | if (ret) { | ||
897 | EXOFS_ERR("ERROR: extract attr from or failed\n"); | ||
898 | goto out; | ||
899 | } | ||
900 | *sanity = get_unaligned_be64(attr.val_ptr); | ||
901 | #endif | ||
902 | |||
903 | out: | ||
904 | osd_end_request(or); | ||
905 | return ret; | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Fill in an inode read from the OSD and set it up for use | ||
910 | */ | ||
911 | struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | ||
912 | { | ||
913 | struct exofs_i_info *oi; | ||
914 | struct exofs_fcb fcb; | ||
915 | struct inode *inode; | ||
916 | uint64_t uninitialized_var(sanity); | ||
917 | int ret; | ||
918 | |||
919 | inode = iget_locked(sb, ino); | ||
920 | if (!inode) | ||
921 | return ERR_PTR(-ENOMEM); | ||
922 | if (!(inode->i_state & I_NEW)) | ||
923 | return inode; | ||
924 | oi = exofs_i(inode); | ||
925 | |||
926 | /* read the inode from the osd */ | ||
927 | ret = exofs_get_inode(sb, oi, &fcb, &sanity); | ||
928 | if (ret) | ||
929 | goto bad_inode; | ||
930 | |||
931 | init_waitqueue_head(&oi->i_wq); | ||
932 | set_obj_created(oi); | ||
933 | |||
934 | /* copy stuff from on-disk struct to in-memory struct */ | ||
935 | inode->i_mode = le16_to_cpu(fcb.i_mode); | ||
936 | inode->i_uid = le32_to_cpu(fcb.i_uid); | ||
937 | inode->i_gid = le32_to_cpu(fcb.i_gid); | ||
938 | inode->i_nlink = le16_to_cpu(fcb.i_links_count); | ||
939 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); | ||
940 | inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); | ||
941 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); | ||
942 | inode->i_ctime.tv_nsec = | ||
943 | inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; | ||
944 | oi->i_commit_size = le64_to_cpu(fcb.i_size); | ||
945 | i_size_write(inode, oi->i_commit_size); | ||
946 | inode->i_blkbits = EXOFS_BLKSHIFT; | ||
947 | inode->i_generation = le32_to_cpu(fcb.i_generation); | ||
948 | |||
949 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
950 | if ((inode->i_size != sanity) && | ||
951 | (!exofs_inode_is_fast_symlink(inode))) { | ||
952 | EXOFS_ERR("WARNING: Size of object from inode and " | ||
953 | "attributes differ (%lld != %llu)\n", | ||
954 | inode->i_size, _LLU(sanity)); | ||
955 | } | ||
956 | #endif | ||
957 | |||
958 | oi->i_dir_start_lookup = 0; | ||
959 | |||
960 | if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { | ||
961 | ret = -ESTALE; | ||
962 | goto bad_inode; | ||
963 | } | ||
964 | |||
965 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | ||
966 | if (fcb.i_data[0]) | ||
967 | inode->i_rdev = | ||
968 | old_decode_dev(le32_to_cpu(fcb.i_data[0])); | ||
969 | else | ||
970 | inode->i_rdev = | ||
971 | new_decode_dev(le32_to_cpu(fcb.i_data[1])); | ||
972 | } else { | ||
973 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); | ||
974 | } | ||
975 | |||
976 | if (S_ISREG(inode->i_mode)) { | ||
977 | inode->i_op = &exofs_file_inode_operations; | ||
978 | inode->i_fop = &exofs_file_operations; | ||
979 | inode->i_mapping->a_ops = &exofs_aops; | ||
980 | } else if (S_ISDIR(inode->i_mode)) { | ||
981 | inode->i_op = &exofs_dir_inode_operations; | ||
982 | inode->i_fop = &exofs_dir_operations; | ||
983 | inode->i_mapping->a_ops = &exofs_aops; | ||
984 | } else if (S_ISLNK(inode->i_mode)) { | ||
985 | if (exofs_inode_is_fast_symlink(inode)) | ||
986 | inode->i_op = &exofs_fast_symlink_inode_operations; | ||
987 | else { | ||
988 | inode->i_op = &exofs_symlink_inode_operations; | ||
989 | inode->i_mapping->a_ops = &exofs_aops; | ||
990 | } | ||
991 | } else { | ||
992 | inode->i_op = &exofs_special_inode_operations; | ||
993 | if (fcb.i_data[0]) | ||
994 | init_special_inode(inode, inode->i_mode, | ||
995 | old_decode_dev(le32_to_cpu(fcb.i_data[0]))); | ||
996 | else | ||
997 | init_special_inode(inode, inode->i_mode, | ||
998 | new_decode_dev(le32_to_cpu(fcb.i_data[1]))); | ||
999 | } | ||
1000 | |||
1001 | unlock_new_inode(inode); | ||
1002 | return inode; | ||
1003 | |||
1004 | bad_inode: | ||
1005 | iget_failed(inode); | ||
1006 | return ERR_PTR(ret); | ||
1007 | } | ||
1008 | |||
1009 | int __exofs_wait_obj_created(struct exofs_i_info *oi) | ||
1010 | { | ||
1011 | if (!obj_created(oi)) { | ||
1012 | BUG_ON(!obj_2bcreated(oi)); | ||
1013 | wait_event(oi->i_wq, obj_created(oi)); | ||
1014 | } | ||
1015 | return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; | ||
1016 | } | ||
1017 | /* | ||
1018 | * Callback function from exofs_new_inode(). The important thing is that we | ||
1019 | * set the obj_created flag so that other methods know that the object exists on | ||
1020 | * the OSD. | ||
1021 | */ | ||
1022 | static void create_done(struct osd_request *or, void *p) | ||
1023 | { | ||
1024 | struct inode *inode = p; | ||
1025 | struct exofs_i_info *oi = exofs_i(inode); | ||
1026 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
1027 | int ret; | ||
1028 | |||
1029 | ret = exofs_check_ok(or); | ||
1030 | osd_end_request(or); | ||
1031 | atomic_dec(&sbi->s_curr_pending); | ||
1032 | |||
1033 | if (unlikely(ret)) { | ||
1034 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", | ||
1035 | _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); | ||
1036 | make_bad_inode(inode); | ||
1037 | } else | ||
1038 | set_obj_created(oi); | ||
1039 | |||
1040 | atomic_dec(&inode->i_count); | ||
1041 | wake_up(&oi->i_wq); | ||
1042 | } | ||
1043 | |||
1044 | /* | ||
1045 | * Set up a new inode and create an object for it on the OSD | ||
1046 | */ | ||
1047 | struct inode *exofs_new_inode(struct inode *dir, int mode) | ||
1048 | { | ||
1049 | struct super_block *sb; | ||
1050 | struct inode *inode; | ||
1051 | struct exofs_i_info *oi; | ||
1052 | struct exofs_sb_info *sbi; | ||
1053 | struct osd_request *or; | ||
1054 | struct osd_obj_id obj; | ||
1055 | int ret; | ||
1056 | |||
1057 | sb = dir->i_sb; | ||
1058 | inode = new_inode(sb); | ||
1059 | if (!inode) | ||
1060 | return ERR_PTR(-ENOMEM); | ||
1061 | |||
1062 | oi = exofs_i(inode); | ||
1063 | |||
1064 | init_waitqueue_head(&oi->i_wq); | ||
1065 | set_obj_2bcreated(oi); | ||
1066 | |||
1067 | sbi = sb->s_fs_info; | ||
1068 | |||
1069 | sb->s_dirt = 1; | ||
1070 | inode->i_uid = current->cred->fsuid; | ||
1071 | if (dir->i_mode & S_ISGID) { | ||
1072 | inode->i_gid = dir->i_gid; | ||
1073 | if (S_ISDIR(mode)) | ||
1074 | mode |= S_ISGID; | ||
1075 | } else { | ||
1076 | inode->i_gid = current->cred->fsgid; | ||
1077 | } | ||
1078 | inode->i_mode = mode; | ||
1079 | |||
1080 | inode->i_ino = sbi->s_nextid++; | ||
1081 | inode->i_blkbits = EXOFS_BLKSHIFT; | ||
1082 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
1083 | oi->i_commit_size = inode->i_size = 0; | ||
1084 | spin_lock(&sbi->s_next_gen_lock); | ||
1085 | inode->i_generation = sbi->s_next_generation++; | ||
1086 | spin_unlock(&sbi->s_next_gen_lock); | ||
1087 | insert_inode_hash(inode); | ||
1088 | |||
1089 | mark_inode_dirty(inode); | ||
1090 | |||
1091 | obj.partition = sbi->s_pid; | ||
1092 | obj.id = inode->i_ino + EXOFS_OBJ_OFF; | ||
1093 | exofs_make_credential(oi->i_cred, &obj); | ||
1094 | |||
1095 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1096 | if (unlikely(!or)) { | ||
1097 | EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); | ||
1098 | return ERR_PTR(-ENOMEM); | ||
1099 | } | ||
1100 | |||
1101 | osd_req_create_object(or, &obj); | ||
1102 | |||
1103 | /* increment the refcount so that the inode will still be around when we | ||
1104 | * reach the callback | ||
1105 | */ | ||
1106 | atomic_inc(&inode->i_count); | ||
1107 | |||
1108 | ret = exofs_async_op(or, create_done, inode, oi->i_cred); | ||
1109 | if (ret) { | ||
1110 | atomic_dec(&inode->i_count); | ||
1111 | osd_end_request(or); | ||
1112 | return ERR_PTR(-EIO); | ||
1113 | } | ||
1114 | atomic_inc(&sbi->s_curr_pending); | ||
1115 | |||
1116 | return inode; | ||
1117 | } | ||
1118 | |||
1119 | /* | ||
1120 | * struct to pass two arguments to update_inode's callback | ||
1121 | */ | ||
1122 | struct updatei_args { | ||
1123 | struct exofs_sb_info *sbi; | ||
1124 | struct exofs_fcb fcb; | ||
1125 | }; | ||
1126 | |||
1127 | /* | ||
1128 | * Callback function from exofs_update_inode(). | ||
1129 | */ | ||
1130 | static void updatei_done(struct osd_request *or, void *p) | ||
1131 | { | ||
1132 | struct updatei_args *args = p; | ||
1133 | |||
1134 | osd_end_request(or); | ||
1135 | |||
1136 | atomic_dec(&args->sbi->s_curr_pending); | ||
1137 | |||
1138 | kfree(args); | ||
1139 | } | ||
1140 | |||
1141 | /* | ||
1142 | * Write the inode to the OSD. Just fill up the struct, and set the attribute | ||
1143 | * synchronously or asynchronously depending on the do_sync flag. | ||
1144 | */ | ||
1145 | static int exofs_update_inode(struct inode *inode, int do_sync) | ||
1146 | { | ||
1147 | struct exofs_i_info *oi = exofs_i(inode); | ||
1148 | struct super_block *sb = inode->i_sb; | ||
1149 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
1150 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
1151 | struct osd_request *or; | ||
1152 | struct osd_attr attr; | ||
1153 | struct exofs_fcb *fcb; | ||
1154 | struct updatei_args *args; | ||
1155 | int ret; | ||
1156 | |||
1157 | args = kzalloc(sizeof(*args), GFP_KERNEL); | ||
1158 | if (!args) | ||
1159 | return -ENOMEM; | ||
1160 | |||
1161 | fcb = &args->fcb; | ||
1162 | |||
1163 | fcb->i_mode = cpu_to_le16(inode->i_mode); | ||
1164 | fcb->i_uid = cpu_to_le32(inode->i_uid); | ||
1165 | fcb->i_gid = cpu_to_le32(inode->i_gid); | ||
1166 | fcb->i_links_count = cpu_to_le16(inode->i_nlink); | ||
1167 | fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | ||
1168 | fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | ||
1169 | fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | ||
1170 | oi->i_commit_size = i_size_read(inode); | ||
1171 | fcb->i_size = cpu_to_le64(oi->i_commit_size); | ||
1172 | fcb->i_generation = cpu_to_le32(inode->i_generation); | ||
1173 | |||
1174 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | ||
1175 | if (old_valid_dev(inode->i_rdev)) { | ||
1176 | fcb->i_data[0] = | ||
1177 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | ||
1178 | fcb->i_data[1] = 0; | ||
1179 | } else { | ||
1180 | fcb->i_data[0] = 0; | ||
1181 | fcb->i_data[1] = | ||
1182 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | ||
1183 | fcb->i_data[2] = 0; | ||
1184 | } | ||
1185 | } else | ||
1186 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | ||
1187 | |||
1188 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1189 | if (unlikely(!or)) { | ||
1190 | EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); | ||
1191 | ret = -ENOMEM; | ||
1192 | goto free_args; | ||
1193 | } | ||
1194 | |||
1195 | osd_req_set_attributes(or, &obj); | ||
1196 | |||
1197 | attr = g_attr_inode_data; | ||
1198 | attr.val_ptr = fcb; | ||
1199 | osd_req_add_set_attr_list(or, &attr, 1); | ||
1200 | |||
1201 | if (!obj_created(oi)) { | ||
1202 | EXOFS_DBGMSG("!obj_created\n"); | ||
1203 | BUG_ON(!obj_2bcreated(oi)); | ||
1204 | wait_event(oi->i_wq, obj_created(oi)); | ||
1205 | EXOFS_DBGMSG("wait_event done\n"); | ||
1206 | } | ||
1207 | |||
1208 | if (do_sync) { | ||
1209 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
1210 | osd_end_request(or); | ||
1211 | goto free_args; | ||
1212 | } else { | ||
1213 | args->sbi = sbi; | ||
1214 | |||
1215 | ret = exofs_async_op(or, updatei_done, args, oi->i_cred); | ||
1216 | if (ret) { | ||
1217 | osd_end_request(or); | ||
1218 | goto free_args; | ||
1219 | } | ||
1220 | atomic_inc(&sbi->s_curr_pending); | ||
1221 | goto out; /* deallocation in updatei_done */ | ||
1222 | } | ||
1223 | |||
1224 | free_args: | ||
1225 | kfree(args); | ||
1226 | out: | ||
1227 | EXOFS_DBGMSG("ret=>%d\n", ret); | ||
1228 | return ret; | ||
1229 | } | ||
1230 | |||
1231 | int exofs_write_inode(struct inode *inode, int wait) | ||
1232 | { | ||
1233 | return exofs_update_inode(inode, wait); | ||
1234 | } | ||
1235 | |||
1236 | /* | ||
1237 | * Callback function from exofs_delete_inode() - don't have much cleaning up to | ||
1238 | * do. | ||
1239 | */ | ||
1240 | static void delete_done(struct osd_request *or, void *p) | ||
1241 | { | ||
1242 | struct exofs_sb_info *sbi; | ||
1243 | osd_end_request(or); | ||
1244 | sbi = p; | ||
1245 | atomic_dec(&sbi->s_curr_pending); | ||
1246 | } | ||
1247 | |||
1248 | /* | ||
1249 | * Called when the refcount of an inode reaches zero. We remove the object | ||
1250 | * from the OSD here. We make sure the object was created before we try and | ||
1251 | * delete it. | ||
1252 | */ | ||
1253 | void exofs_delete_inode(struct inode *inode) | ||
1254 | { | ||
1255 | struct exofs_i_info *oi = exofs_i(inode); | ||
1256 | struct super_block *sb = inode->i_sb; | ||
1257 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
1258 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
1259 | struct osd_request *or; | ||
1260 | int ret; | ||
1261 | |||
1262 | truncate_inode_pages(&inode->i_data, 0); | ||
1263 | |||
1264 | if (is_bad_inode(inode)) | ||
1265 | goto no_delete; | ||
1266 | |||
1267 | mark_inode_dirty(inode); | ||
1268 | exofs_update_inode(inode, inode_needs_sync(inode)); | ||
1269 | |||
1270 | inode->i_size = 0; | ||
1271 | if (inode->i_blocks) | ||
1272 | exofs_truncate(inode); | ||
1273 | |||
1274 | clear_inode(inode); | ||
1275 | |||
1276 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1277 | if (unlikely(!or)) { | ||
1278 | EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); | ||
1279 | return; | ||
1280 | } | ||
1281 | |||
1282 | osd_req_remove_object(or, &obj); | ||
1283 | |||
1284 | /* if we are deleting an obj that hasn't been created yet, wait */ | ||
1285 | if (!obj_created(oi)) { | ||
1286 | BUG_ON(!obj_2bcreated(oi)); | ||
1287 | wait_event(oi->i_wq, obj_created(oi)); | ||
1288 | } | ||
1289 | |||
1290 | ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); | ||
1291 | if (ret) { | ||
1292 | EXOFS_ERR( | ||
1293 | "ERROR: @exofs_delete_inode exofs_async_op failed\n"); | ||
1294 | osd_end_request(or); | ||
1295 | return; | ||
1296 | } | ||
1297 | atomic_inc(&sbi->s_curr_pending); | ||
1298 | |||
1299 | return; | ||
1300 | |||
1301 | no_delete: | ||
1302 | clear_inode(inode); | ||
1303 | } | ||
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c new file mode 100644 index 000000000000..77fdd765e76d --- /dev/null +++ b/fs/exofs/namei.c | |||
@@ -0,0 +1,342 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include "exofs.h" | ||
37 | |||
38 | static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode) | ||
39 | { | ||
40 | int err = exofs_add_link(dentry, inode); | ||
41 | if (!err) { | ||
42 | d_instantiate(dentry, inode); | ||
43 | return 0; | ||
44 | } | ||
45 | inode_dec_link_count(inode); | ||
46 | iput(inode); | ||
47 | return err; | ||
48 | } | ||
49 | |||
50 | static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry, | ||
51 | struct nameidata *nd) | ||
52 | { | ||
53 | struct inode *inode; | ||
54 | ino_t ino; | ||
55 | |||
56 | if (dentry->d_name.len > EXOFS_NAME_LEN) | ||
57 | return ERR_PTR(-ENAMETOOLONG); | ||
58 | |||
59 | ino = exofs_inode_by_name(dir, dentry); | ||
60 | inode = NULL; | ||
61 | if (ino) { | ||
62 | inode = exofs_iget(dir->i_sb, ino); | ||
63 | if (IS_ERR(inode)) | ||
64 | return ERR_CAST(inode); | ||
65 | } | ||
66 | return d_splice_alias(inode, dentry); | ||
67 | } | ||
68 | |||
69 | static int exofs_create(struct inode *dir, struct dentry *dentry, int mode, | ||
70 | struct nameidata *nd) | ||
71 | { | ||
72 | struct inode *inode = exofs_new_inode(dir, mode); | ||
73 | int err = PTR_ERR(inode); | ||
74 | if (!IS_ERR(inode)) { | ||
75 | inode->i_op = &exofs_file_inode_operations; | ||
76 | inode->i_fop = &exofs_file_operations; | ||
77 | inode->i_mapping->a_ops = &exofs_aops; | ||
78 | mark_inode_dirty(inode); | ||
79 | err = exofs_add_nondir(dentry, inode); | ||
80 | } | ||
81 | return err; | ||
82 | } | ||
83 | |||
84 | static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode, | ||
85 | dev_t rdev) | ||
86 | { | ||
87 | struct inode *inode; | ||
88 | int err; | ||
89 | |||
90 | if (!new_valid_dev(rdev)) | ||
91 | return -EINVAL; | ||
92 | |||
93 | inode = exofs_new_inode(dir, mode); | ||
94 | err = PTR_ERR(inode); | ||
95 | if (!IS_ERR(inode)) { | ||
96 | init_special_inode(inode, inode->i_mode, rdev); | ||
97 | mark_inode_dirty(inode); | ||
98 | err = exofs_add_nondir(dentry, inode); | ||
99 | } | ||
100 | return err; | ||
101 | } | ||
102 | |||
103 | static int exofs_symlink(struct inode *dir, struct dentry *dentry, | ||
104 | const char *symname) | ||
105 | { | ||
106 | struct super_block *sb = dir->i_sb; | ||
107 | int err = -ENAMETOOLONG; | ||
108 | unsigned l = strlen(symname)+1; | ||
109 | struct inode *inode; | ||
110 | struct exofs_i_info *oi; | ||
111 | |||
112 | if (l > sb->s_blocksize) | ||
113 | goto out; | ||
114 | |||
115 | inode = exofs_new_inode(dir, S_IFLNK | S_IRWXUGO); | ||
116 | err = PTR_ERR(inode); | ||
117 | if (IS_ERR(inode)) | ||
118 | goto out; | ||
119 | |||
120 | oi = exofs_i(inode); | ||
121 | if (l > sizeof(oi->i_data)) { | ||
122 | /* slow symlink */ | ||
123 | inode->i_op = &exofs_symlink_inode_operations; | ||
124 | inode->i_mapping->a_ops = &exofs_aops; | ||
125 | memset(oi->i_data, 0, sizeof(oi->i_data)); | ||
126 | |||
127 | err = page_symlink(inode, symname, l); | ||
128 | if (err) | ||
129 | goto out_fail; | ||
130 | } else { | ||
131 | /* fast symlink */ | ||
132 | inode->i_op = &exofs_fast_symlink_inode_operations; | ||
133 | memcpy(oi->i_data, symname, l); | ||
134 | inode->i_size = l-1; | ||
135 | } | ||
136 | mark_inode_dirty(inode); | ||
137 | |||
138 | err = exofs_add_nondir(dentry, inode); | ||
139 | out: | ||
140 | return err; | ||
141 | |||
142 | out_fail: | ||
143 | inode_dec_link_count(inode); | ||
144 | iput(inode); | ||
145 | goto out; | ||
146 | } | ||
147 | |||
148 | static int exofs_link(struct dentry *old_dentry, struct inode *dir, | ||
149 | struct dentry *dentry) | ||
150 | { | ||
151 | struct inode *inode = old_dentry->d_inode; | ||
152 | |||
153 | if (inode->i_nlink >= EXOFS_LINK_MAX) | ||
154 | return -EMLINK; | ||
155 | |||
156 | inode->i_ctime = CURRENT_TIME; | ||
157 | inode_inc_link_count(inode); | ||
158 | atomic_inc(&inode->i_count); | ||
159 | |||
160 | return exofs_add_nondir(dentry, inode); | ||
161 | } | ||
162 | |||
163 | static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
164 | { | ||
165 | struct inode *inode; | ||
166 | int err = -EMLINK; | ||
167 | |||
168 | if (dir->i_nlink >= EXOFS_LINK_MAX) | ||
169 | goto out; | ||
170 | |||
171 | inode_inc_link_count(dir); | ||
172 | |||
173 | inode = exofs_new_inode(dir, S_IFDIR | mode); | ||
174 | err = PTR_ERR(inode); | ||
175 | if (IS_ERR(inode)) | ||
176 | goto out_dir; | ||
177 | |||
178 | inode->i_op = &exofs_dir_inode_operations; | ||
179 | inode->i_fop = &exofs_dir_operations; | ||
180 | inode->i_mapping->a_ops = &exofs_aops; | ||
181 | |||
182 | inode_inc_link_count(inode); | ||
183 | |||
184 | err = exofs_make_empty(inode, dir); | ||
185 | if (err) | ||
186 | goto out_fail; | ||
187 | |||
188 | err = exofs_add_link(dentry, inode); | ||
189 | if (err) | ||
190 | goto out_fail; | ||
191 | |||
192 | d_instantiate(dentry, inode); | ||
193 | out: | ||
194 | return err; | ||
195 | |||
196 | out_fail: | ||
197 | inode_dec_link_count(inode); | ||
198 | inode_dec_link_count(inode); | ||
199 | iput(inode); | ||
200 | out_dir: | ||
201 | inode_dec_link_count(dir); | ||
202 | goto out; | ||
203 | } | ||
204 | |||
205 | static int exofs_unlink(struct inode *dir, struct dentry *dentry) | ||
206 | { | ||
207 | struct inode *inode = dentry->d_inode; | ||
208 | struct exofs_dir_entry *de; | ||
209 | struct page *page; | ||
210 | int err = -ENOENT; | ||
211 | |||
212 | de = exofs_find_entry(dir, dentry, &page); | ||
213 | if (!de) | ||
214 | goto out; | ||
215 | |||
216 | err = exofs_delete_entry(de, page); | ||
217 | if (err) | ||
218 | goto out; | ||
219 | |||
220 | inode->i_ctime = dir->i_ctime; | ||
221 | inode_dec_link_count(inode); | ||
222 | err = 0; | ||
223 | out: | ||
224 | return err; | ||
225 | } | ||
226 | |||
227 | static int exofs_rmdir(struct inode *dir, struct dentry *dentry) | ||
228 | { | ||
229 | struct inode *inode = dentry->d_inode; | ||
230 | int err = -ENOTEMPTY; | ||
231 | |||
232 | if (exofs_empty_dir(inode)) { | ||
233 | err = exofs_unlink(dir, dentry); | ||
234 | if (!err) { | ||
235 | inode->i_size = 0; | ||
236 | inode_dec_link_count(inode); | ||
237 | inode_dec_link_count(dir); | ||
238 | } | ||
239 | } | ||
240 | return err; | ||
241 | } | ||
242 | |||
243 | static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
244 | struct inode *new_dir, struct dentry *new_dentry) | ||
245 | { | ||
246 | struct inode *old_inode = old_dentry->d_inode; | ||
247 | struct inode *new_inode = new_dentry->d_inode; | ||
248 | struct page *dir_page = NULL; | ||
249 | struct exofs_dir_entry *dir_de = NULL; | ||
250 | struct page *old_page; | ||
251 | struct exofs_dir_entry *old_de; | ||
252 | int err = -ENOENT; | ||
253 | |||
254 | old_de = exofs_find_entry(old_dir, old_dentry, &old_page); | ||
255 | if (!old_de) | ||
256 | goto out; | ||
257 | |||
258 | if (S_ISDIR(old_inode->i_mode)) { | ||
259 | err = -EIO; | ||
260 | dir_de = exofs_dotdot(old_inode, &dir_page); | ||
261 | if (!dir_de) | ||
262 | goto out_old; | ||
263 | } | ||
264 | |||
265 | if (new_inode) { | ||
266 | struct page *new_page; | ||
267 | struct exofs_dir_entry *new_de; | ||
268 | |||
269 | err = -ENOTEMPTY; | ||
270 | if (dir_de && !exofs_empty_dir(new_inode)) | ||
271 | goto out_dir; | ||
272 | |||
273 | err = -ENOENT; | ||
274 | new_de = exofs_find_entry(new_dir, new_dentry, &new_page); | ||
275 | if (!new_de) | ||
276 | goto out_dir; | ||
277 | inode_inc_link_count(old_inode); | ||
278 | err = exofs_set_link(new_dir, new_de, new_page, old_inode); | ||
279 | new_inode->i_ctime = CURRENT_TIME; | ||
280 | if (dir_de) | ||
281 | drop_nlink(new_inode); | ||
282 | inode_dec_link_count(new_inode); | ||
283 | if (err) | ||
284 | goto out_dir; | ||
285 | } else { | ||
286 | if (dir_de) { | ||
287 | err = -EMLINK; | ||
288 | if (new_dir->i_nlink >= EXOFS_LINK_MAX) | ||
289 | goto out_dir; | ||
290 | } | ||
291 | inode_inc_link_count(old_inode); | ||
292 | err = exofs_add_link(new_dentry, old_inode); | ||
293 | if (err) { | ||
294 | inode_dec_link_count(old_inode); | ||
295 | goto out_dir; | ||
296 | } | ||
297 | if (dir_de) | ||
298 | inode_inc_link_count(new_dir); | ||
299 | } | ||
300 | |||
301 | old_inode->i_ctime = CURRENT_TIME; | ||
302 | |||
303 | exofs_delete_entry(old_de, old_page); | ||
304 | inode_dec_link_count(old_inode); | ||
305 | |||
306 | if (dir_de) { | ||
307 | err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); | ||
308 | inode_dec_link_count(old_dir); | ||
309 | if (err) | ||
310 | goto out_dir; | ||
311 | } | ||
312 | return 0; | ||
313 | |||
314 | |||
315 | out_dir: | ||
316 | if (dir_de) { | ||
317 | kunmap(dir_page); | ||
318 | page_cache_release(dir_page); | ||
319 | } | ||
320 | out_old: | ||
321 | kunmap(old_page); | ||
322 | page_cache_release(old_page); | ||
323 | out: | ||
324 | return err; | ||
325 | } | ||
326 | |||
327 | const struct inode_operations exofs_dir_inode_operations = { | ||
328 | .create = exofs_create, | ||
329 | .lookup = exofs_lookup, | ||
330 | .link = exofs_link, | ||
331 | .unlink = exofs_unlink, | ||
332 | .symlink = exofs_symlink, | ||
333 | .mkdir = exofs_mkdir, | ||
334 | .rmdir = exofs_rmdir, | ||
335 | .mknod = exofs_mknod, | ||
336 | .rename = exofs_rename, | ||
337 | .setattr = exofs_setattr, | ||
338 | }; | ||
339 | |||
340 | const struct inode_operations exofs_special_inode_operations = { | ||
341 | .setattr = exofs_setattr, | ||
342 | }; | ||
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c new file mode 100644 index 000000000000..b249ae97fb15 --- /dev/null +++ b/fs/exofs/osd.c | |||
@@ -0,0 +1,153 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * This file is part of exofs. | ||
10 | * | ||
11 | * exofs is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
14 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
15 | * version of GPL for exofs is version 2. | ||
16 | * | ||
17 | * exofs is distributed in the hope that it will be useful, | ||
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
20 | * GNU General Public License for more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License | ||
23 | * along with exofs; if not, write to the Free Software | ||
24 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
25 | */ | ||
26 | |||
27 | #include <scsi/scsi_device.h> | ||
28 | #include <scsi/osd_sense.h> | ||
29 | |||
30 | #include "exofs.h" | ||
31 | |||
32 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) | ||
33 | { | ||
34 | struct osd_sense_info osi; | ||
35 | int ret = osd_req_decode_sense(or, &osi); | ||
36 | |||
37 | if (ret) { /* translate to Linux codes */ | ||
38 | if (osi.additional_code == scsi_invalid_field_in_cdb) { | ||
39 | if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) | ||
40 | ret = -EFAULT; | ||
41 | if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) | ||
42 | ret = -ENOENT; | ||
43 | else | ||
44 | ret = -EINVAL; | ||
45 | } else if (osi.additional_code == osd_quota_error) | ||
46 | ret = -ENOSPC; | ||
47 | else | ||
48 | ret = -EIO; | ||
49 | } | ||
50 | |||
51 | /* FIXME: should be include in osd_sense_info */ | ||
52 | if (in_resid) | ||
53 | *in_resid = or->in.req ? or->in.req->data_len : 0; | ||
54 | |||
55 | if (out_resid) | ||
56 | *out_resid = or->out.req ? or->out.req->data_len : 0; | ||
57 | |||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | ||
62 | { | ||
63 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * Perform a synchronous OSD operation. | ||
68 | */ | ||
69 | int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | ||
70 | { | ||
71 | int ret; | ||
72 | |||
73 | or->timeout = timeout; | ||
74 | ret = osd_finalize_request(or, 0, credential, NULL); | ||
75 | if (ret) { | ||
76 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
77 | return ret; | ||
78 | } | ||
79 | |||
80 | ret = osd_execute_request(or); | ||
81 | |||
82 | if (ret) | ||
83 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
84 | /* osd_req_decode_sense(or, ret); */ | ||
85 | return ret; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Perform an asynchronous OSD operation. | ||
90 | */ | ||
91 | int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done, | ||
92 | void *caller_context, u8 *cred) | ||
93 | { | ||
94 | int ret; | ||
95 | |||
96 | ret = osd_finalize_request(or, 0, cred, NULL); | ||
97 | if (ret) { | ||
98 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | ret = osd_execute_request_async(or, async_done, caller_context); | ||
103 | |||
104 | if (ret) | ||
105 | EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret); | ||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | ||
110 | { | ||
111 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | ||
112 | void *iter = NULL; | ||
113 | int nelem; | ||
114 | |||
115 | do { | ||
116 | nelem = 1; | ||
117 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | ||
118 | if ((cur_attr.attr_page == attr->attr_page) && | ||
119 | (cur_attr.attr_id == attr->attr_id)) { | ||
120 | attr->len = cur_attr.len; | ||
121 | attr->val_ptr = cur_attr.val_ptr; | ||
122 | return 0; | ||
123 | } | ||
124 | } while (iter); | ||
125 | |||
126 | return -EIO; | ||
127 | } | ||
128 | |||
129 | int osd_req_read_kern(struct osd_request *or, | ||
130 | const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) | ||
131 | { | ||
132 | struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; | ||
133 | struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); | ||
134 | |||
135 | if (!bio) | ||
136 | return -ENOMEM; | ||
137 | |||
138 | osd_req_read(or, obj, bio, offset); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | int osd_req_write_kern(struct osd_request *or, | ||
143 | const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) | ||
144 | { | ||
145 | struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; | ||
146 | struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); | ||
147 | |||
148 | if (!bio) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | osd_req_write(or, obj, bio, offset); | ||
152 | return 0; | ||
153 | } | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c new file mode 100644 index 000000000000..9f1985e857e2 --- /dev/null +++ b/fs/exofs/super.c | |||
@@ -0,0 +1,584 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/string.h> | ||
37 | #include <linux/parser.h> | ||
38 | #include <linux/vfs.h> | ||
39 | #include <linux/random.h> | ||
40 | #include <linux/exportfs.h> | ||
41 | |||
42 | #include "exofs.h" | ||
43 | |||
44 | /****************************************************************************** | ||
45 | * MOUNT OPTIONS | ||
46 | *****************************************************************************/ | ||
47 | |||
48 | /* | ||
49 | * struct to hold what we get from mount options | ||
50 | */ | ||
51 | struct exofs_mountopt { | ||
52 | const char *dev_name; | ||
53 | uint64_t pid; | ||
54 | int timeout; | ||
55 | }; | ||
56 | |||
57 | /* | ||
58 | * exofs-specific mount-time options. | ||
59 | */ | ||
60 | enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; | ||
61 | |||
62 | /* | ||
63 | * Our mount-time options. These should ideally be 64-bit unsigned, but the | ||
64 | * kernel's parsing functions do not currently support that. 32-bit should be | ||
65 | * sufficient for most applications now. | ||
66 | */ | ||
67 | static match_table_t tokens = { | ||
68 | {Opt_pid, "pid=%u"}, | ||
69 | {Opt_to, "to=%u"}, | ||
70 | {Opt_err, NULL} | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * The main option parsing method. Also makes sure that all of the mandatory | ||
75 | * mount options were set. | ||
76 | */ | ||
77 | static int parse_options(char *options, struct exofs_mountopt *opts) | ||
78 | { | ||
79 | char *p; | ||
80 | substring_t args[MAX_OPT_ARGS]; | ||
81 | int option; | ||
82 | bool s_pid = false; | ||
83 | |||
84 | EXOFS_DBGMSG("parse_options %s\n", options); | ||
85 | /* defaults */ | ||
86 | memset(opts, 0, sizeof(*opts)); | ||
87 | opts->timeout = BLK_DEFAULT_SG_TIMEOUT; | ||
88 | |||
89 | while ((p = strsep(&options, ",")) != NULL) { | ||
90 | int token; | ||
91 | char str[32]; | ||
92 | |||
93 | if (!*p) | ||
94 | continue; | ||
95 | |||
96 | token = match_token(p, tokens, args); | ||
97 | switch (token) { | ||
98 | case Opt_pid: | ||
99 | if (0 == match_strlcpy(str, &args[0], sizeof(str))) | ||
100 | return -EINVAL; | ||
101 | opts->pid = simple_strtoull(str, NULL, 0); | ||
102 | if (opts->pid < EXOFS_MIN_PID) { | ||
103 | EXOFS_ERR("Partition ID must be >= %u", | ||
104 | EXOFS_MIN_PID); | ||
105 | return -EINVAL; | ||
106 | } | ||
107 | s_pid = 1; | ||
108 | break; | ||
109 | case Opt_to: | ||
110 | if (match_int(&args[0], &option)) | ||
111 | return -EINVAL; | ||
112 | if (option <= 0) { | ||
113 | EXOFS_ERR("Timout must be > 0"); | ||
114 | return -EINVAL; | ||
115 | } | ||
116 | opts->timeout = option * HZ; | ||
117 | break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | if (!s_pid) { | ||
122 | EXOFS_ERR("Need to specify the following options:\n"); | ||
123 | EXOFS_ERR(" -o pid=pid_no_to_use\n"); | ||
124 | return -EINVAL; | ||
125 | } | ||
126 | |||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | /****************************************************************************** | ||
131 | * INODE CACHE | ||
132 | *****************************************************************************/ | ||
133 | |||
134 | /* | ||
135 | * Our inode cache. Isn't it pretty? | ||
136 | */ | ||
137 | static struct kmem_cache *exofs_inode_cachep; | ||
138 | |||
139 | /* | ||
140 | * Allocate an inode in the cache | ||
141 | */ | ||
142 | static struct inode *exofs_alloc_inode(struct super_block *sb) | ||
143 | { | ||
144 | struct exofs_i_info *oi; | ||
145 | |||
146 | oi = kmem_cache_alloc(exofs_inode_cachep, GFP_KERNEL); | ||
147 | if (!oi) | ||
148 | return NULL; | ||
149 | |||
150 | oi->vfs_inode.i_version = 1; | ||
151 | return &oi->vfs_inode; | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Remove an inode from the cache | ||
156 | */ | ||
157 | static void exofs_destroy_inode(struct inode *inode) | ||
158 | { | ||
159 | kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * Initialize the inode | ||
164 | */ | ||
165 | static void exofs_init_once(void *foo) | ||
166 | { | ||
167 | struct exofs_i_info *oi = foo; | ||
168 | |||
169 | inode_init_once(&oi->vfs_inode); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Create and initialize the inode cache | ||
174 | */ | ||
175 | static int init_inodecache(void) | ||
176 | { | ||
177 | exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", | ||
178 | sizeof(struct exofs_i_info), 0, | ||
179 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
180 | exofs_init_once); | ||
181 | if (exofs_inode_cachep == NULL) | ||
182 | return -ENOMEM; | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Destroy the inode cache | ||
188 | */ | ||
189 | static void destroy_inodecache(void) | ||
190 | { | ||
191 | kmem_cache_destroy(exofs_inode_cachep); | ||
192 | } | ||
193 | |||
194 | /****************************************************************************** | ||
195 | * SUPERBLOCK FUNCTIONS | ||
196 | *****************************************************************************/ | ||
197 | static const struct super_operations exofs_sops; | ||
198 | static const struct export_operations exofs_export_ops; | ||
199 | |||
200 | /* | ||
201 | * Write the superblock to the OSD | ||
202 | */ | ||
203 | static void exofs_write_super(struct super_block *sb) | ||
204 | { | ||
205 | struct exofs_sb_info *sbi; | ||
206 | struct exofs_fscb *fscb; | ||
207 | struct osd_request *or; | ||
208 | struct osd_obj_id obj; | ||
209 | int ret; | ||
210 | |||
211 | fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); | ||
212 | if (!fscb) { | ||
213 | EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | lock_kernel(); | ||
218 | sbi = sb->s_fs_info; | ||
219 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | ||
220 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); | ||
221 | fscb->s_magic = cpu_to_le16(sb->s_magic); | ||
222 | fscb->s_newfs = 0; | ||
223 | |||
224 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
225 | if (unlikely(!or)) { | ||
226 | EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); | ||
227 | goto out; | ||
228 | } | ||
229 | |||
230 | obj.partition = sbi->s_pid; | ||
231 | obj.id = EXOFS_SUPER_ID; | ||
232 | ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb)); | ||
233 | if (unlikely(ret)) { | ||
234 | EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); | ||
235 | goto out; | ||
236 | } | ||
237 | |||
238 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
239 | if (unlikely(ret)) { | ||
240 | EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n"); | ||
241 | goto out; | ||
242 | } | ||
243 | sb->s_dirt = 0; | ||
244 | |||
245 | out: | ||
246 | if (or) | ||
247 | osd_end_request(or); | ||
248 | unlock_kernel(); | ||
249 | kfree(fscb); | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * This function is called when the vfs is freeing the superblock. We just | ||
254 | * need to free our own part. | ||
255 | */ | ||
256 | static void exofs_put_super(struct super_block *sb) | ||
257 | { | ||
258 | int num_pend; | ||
259 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
260 | |||
261 | /* make sure there are no pending commands */ | ||
262 | for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; | ||
263 | num_pend = atomic_read(&sbi->s_curr_pending)) { | ||
264 | wait_queue_head_t wq; | ||
265 | init_waitqueue_head(&wq); | ||
266 | wait_event_timeout(wq, | ||
267 | (atomic_read(&sbi->s_curr_pending) == 0), | ||
268 | msecs_to_jiffies(100)); | ||
269 | } | ||
270 | |||
271 | osduld_put_device(sbi->s_dev); | ||
272 | kfree(sb->s_fs_info); | ||
273 | sb->s_fs_info = NULL; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Read the superblock from the OSD and fill in the fields | ||
278 | */ | ||
279 | static int exofs_fill_super(struct super_block *sb, void *data, int silent) | ||
280 | { | ||
281 | struct inode *root; | ||
282 | struct exofs_mountopt *opts = data; | ||
283 | struct exofs_sb_info *sbi; /*extended info */ | ||
284 | struct exofs_fscb fscb; /*on-disk superblock info */ | ||
285 | struct osd_request *or = NULL; | ||
286 | struct osd_obj_id obj; | ||
287 | int ret; | ||
288 | |||
289 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | ||
290 | if (!sbi) | ||
291 | return -ENOMEM; | ||
292 | sb->s_fs_info = sbi; | ||
293 | |||
294 | /* use mount options to fill superblock */ | ||
295 | sbi->s_dev = osduld_path_lookup(opts->dev_name); | ||
296 | if (IS_ERR(sbi->s_dev)) { | ||
297 | ret = PTR_ERR(sbi->s_dev); | ||
298 | sbi->s_dev = NULL; | ||
299 | goto free_sbi; | ||
300 | } | ||
301 | |||
302 | sbi->s_pid = opts->pid; | ||
303 | sbi->s_timeout = opts->timeout; | ||
304 | |||
305 | /* fill in some other data by hand */ | ||
306 | memset(sb->s_id, 0, sizeof(sb->s_id)); | ||
307 | strcpy(sb->s_id, "exofs"); | ||
308 | sb->s_blocksize = EXOFS_BLKSIZE; | ||
309 | sb->s_blocksize_bits = EXOFS_BLKSHIFT; | ||
310 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
311 | atomic_set(&sbi->s_curr_pending, 0); | ||
312 | sb->s_bdev = NULL; | ||
313 | sb->s_dev = 0; | ||
314 | |||
315 | /* read data from on-disk superblock object */ | ||
316 | obj.partition = sbi->s_pid; | ||
317 | obj.id = EXOFS_SUPER_ID; | ||
318 | exofs_make_credential(sbi->s_cred, &obj); | ||
319 | |||
320 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
321 | if (unlikely(!or)) { | ||
322 | if (!silent) | ||
323 | EXOFS_ERR( | ||
324 | "exofs_fill_super: osd_start_request failed.\n"); | ||
325 | ret = -ENOMEM; | ||
326 | goto free_sbi; | ||
327 | } | ||
328 | ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb)); | ||
329 | if (unlikely(ret)) { | ||
330 | if (!silent) | ||
331 | EXOFS_ERR( | ||
332 | "exofs_fill_super: osd_req_read_kern failed.\n"); | ||
333 | ret = -ENOMEM; | ||
334 | goto free_sbi; | ||
335 | } | ||
336 | |||
337 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
338 | if (unlikely(ret)) { | ||
339 | if (!silent) | ||
340 | EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n"); | ||
341 | ret = -EIO; | ||
342 | goto free_sbi; | ||
343 | } | ||
344 | |||
345 | sb->s_magic = le16_to_cpu(fscb.s_magic); | ||
346 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); | ||
347 | sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); | ||
348 | |||
349 | /* make sure what we read from the object store is correct */ | ||
350 | if (sb->s_magic != EXOFS_SUPER_MAGIC) { | ||
351 | if (!silent) | ||
352 | EXOFS_ERR("ERROR: Bad magic value\n"); | ||
353 | ret = -EINVAL; | ||
354 | goto free_sbi; | ||
355 | } | ||
356 | |||
357 | /* start generation numbers from a random point */ | ||
358 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | ||
359 | spin_lock_init(&sbi->s_next_gen_lock); | ||
360 | |||
361 | /* set up operation vectors */ | ||
362 | sb->s_op = &exofs_sops; | ||
363 | sb->s_export_op = &exofs_export_ops; | ||
364 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); | ||
365 | if (IS_ERR(root)) { | ||
366 | EXOFS_ERR("ERROR: exofs_iget failed\n"); | ||
367 | ret = PTR_ERR(root); | ||
368 | goto free_sbi; | ||
369 | } | ||
370 | sb->s_root = d_alloc_root(root); | ||
371 | if (!sb->s_root) { | ||
372 | iput(root); | ||
373 | EXOFS_ERR("ERROR: get root inode failed\n"); | ||
374 | ret = -ENOMEM; | ||
375 | goto free_sbi; | ||
376 | } | ||
377 | |||
378 | if (!S_ISDIR(root->i_mode)) { | ||
379 | dput(sb->s_root); | ||
380 | sb->s_root = NULL; | ||
381 | EXOFS_ERR("ERROR: corrupt root inode (mode = %hd)\n", | ||
382 | root->i_mode); | ||
383 | ret = -EINVAL; | ||
384 | goto free_sbi; | ||
385 | } | ||
386 | |||
387 | ret = 0; | ||
388 | out: | ||
389 | if (or) | ||
390 | osd_end_request(or); | ||
391 | return ret; | ||
392 | |||
393 | free_sbi: | ||
394 | osduld_put_device(sbi->s_dev); /* NULL safe */ | ||
395 | kfree(sbi); | ||
396 | goto out; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Set up the superblock (calls exofs_fill_super eventually) | ||
401 | */ | ||
402 | static int exofs_get_sb(struct file_system_type *type, | ||
403 | int flags, const char *dev_name, | ||
404 | void *data, struct vfsmount *mnt) | ||
405 | { | ||
406 | struct exofs_mountopt opts; | ||
407 | int ret; | ||
408 | |||
409 | ret = parse_options(data, &opts); | ||
410 | if (ret) | ||
411 | return ret; | ||
412 | |||
413 | opts.dev_name = dev_name; | ||
414 | return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt); | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Return information about the file system state in the buffer. This is used | ||
419 | * by the 'df' command, for example. | ||
420 | */ | ||
421 | static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
422 | { | ||
423 | struct super_block *sb = dentry->d_sb; | ||
424 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
425 | struct osd_obj_id obj = {sbi->s_pid, 0}; | ||
426 | struct osd_attr attrs[] = { | ||
427 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, | ||
428 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), | ||
429 | ATTR_DEF(OSD_APAGE_PARTITION_INFORMATION, | ||
430 | OSD_ATTR_PI_USED_CAPACITY, sizeof(__be64)), | ||
431 | }; | ||
432 | uint64_t capacity = ULLONG_MAX; | ||
433 | uint64_t used = ULLONG_MAX; | ||
434 | struct osd_request *or; | ||
435 | uint8_t cred_a[OSD_CAP_LEN]; | ||
436 | int ret; | ||
437 | |||
438 | /* get used/capacity attributes */ | ||
439 | exofs_make_credential(cred_a, &obj); | ||
440 | |||
441 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
442 | if (unlikely(!or)) { | ||
443 | EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n"); | ||
444 | return -ENOMEM; | ||
445 | } | ||
446 | |||
447 | osd_req_get_attributes(or, &obj); | ||
448 | osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); | ||
449 | ret = exofs_sync_op(or, sbi->s_timeout, cred_a); | ||
450 | if (unlikely(ret)) | ||
451 | goto out; | ||
452 | |||
453 | ret = extract_attr_from_req(or, &attrs[0]); | ||
454 | if (likely(!ret)) | ||
455 | capacity = get_unaligned_be64(attrs[0].val_ptr); | ||
456 | else | ||
457 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); | ||
458 | |||
459 | ret = extract_attr_from_req(or, &attrs[1]); | ||
460 | if (likely(!ret)) | ||
461 | used = get_unaligned_be64(attrs[1].val_ptr); | ||
462 | else | ||
463 | EXOFS_DBGMSG("exofs_statfs: get used-space failed.\n"); | ||
464 | |||
465 | /* fill in the stats buffer */ | ||
466 | buf->f_type = EXOFS_SUPER_MAGIC; | ||
467 | buf->f_bsize = EXOFS_BLKSIZE; | ||
468 | buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); | ||
469 | buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); | ||
470 | buf->f_bavail = buf->f_bfree; | ||
471 | buf->f_files = sbi->s_numfiles; | ||
472 | buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; | ||
473 | buf->f_namelen = EXOFS_NAME_LEN; | ||
474 | |||
475 | out: | ||
476 | osd_end_request(or); | ||
477 | return ret; | ||
478 | } | ||
479 | |||
480 | static const struct super_operations exofs_sops = { | ||
481 | .alloc_inode = exofs_alloc_inode, | ||
482 | .destroy_inode = exofs_destroy_inode, | ||
483 | .write_inode = exofs_write_inode, | ||
484 | .delete_inode = exofs_delete_inode, | ||
485 | .put_super = exofs_put_super, | ||
486 | .write_super = exofs_write_super, | ||
487 | .statfs = exofs_statfs, | ||
488 | }; | ||
489 | |||
490 | /****************************************************************************** | ||
491 | * EXPORT OPERATIONS | ||
492 | *****************************************************************************/ | ||
493 | |||
494 | struct dentry *exofs_get_parent(struct dentry *child) | ||
495 | { | ||
496 | unsigned long ino = exofs_parent_ino(child); | ||
497 | |||
498 | if (!ino) | ||
499 | return NULL; | ||
500 | |||
501 | return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino)); | ||
502 | } | ||
503 | |||
504 | static struct inode *exofs_nfs_get_inode(struct super_block *sb, | ||
505 | u64 ino, u32 generation) | ||
506 | { | ||
507 | struct inode *inode; | ||
508 | |||
509 | inode = exofs_iget(sb, ino); | ||
510 | if (IS_ERR(inode)) | ||
511 | return ERR_CAST(inode); | ||
512 | if (generation && inode->i_generation != generation) { | ||
513 | /* we didn't find the right inode.. */ | ||
514 | iput(inode); | ||
515 | return ERR_PTR(-ESTALE); | ||
516 | } | ||
517 | return inode; | ||
518 | } | ||
519 | |||
520 | static struct dentry *exofs_fh_to_dentry(struct super_block *sb, | ||
521 | struct fid *fid, int fh_len, int fh_type) | ||
522 | { | ||
523 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
524 | exofs_nfs_get_inode); | ||
525 | } | ||
526 | |||
527 | static struct dentry *exofs_fh_to_parent(struct super_block *sb, | ||
528 | struct fid *fid, int fh_len, int fh_type) | ||
529 | { | ||
530 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
531 | exofs_nfs_get_inode); | ||
532 | } | ||
533 | |||
534 | static const struct export_operations exofs_export_ops = { | ||
535 | .fh_to_dentry = exofs_fh_to_dentry, | ||
536 | .fh_to_parent = exofs_fh_to_parent, | ||
537 | .get_parent = exofs_get_parent, | ||
538 | }; | ||
539 | |||
540 | /****************************************************************************** | ||
541 | * INSMOD/RMMOD | ||
542 | *****************************************************************************/ | ||
543 | |||
544 | /* | ||
545 | * struct that describes this file system | ||
546 | */ | ||
547 | static struct file_system_type exofs_type = { | ||
548 | .owner = THIS_MODULE, | ||
549 | .name = "exofs", | ||
550 | .get_sb = exofs_get_sb, | ||
551 | .kill_sb = generic_shutdown_super, | ||
552 | }; | ||
553 | |||
554 | static int __init init_exofs(void) | ||
555 | { | ||
556 | int err; | ||
557 | |||
558 | err = init_inodecache(); | ||
559 | if (err) | ||
560 | goto out; | ||
561 | |||
562 | err = register_filesystem(&exofs_type); | ||
563 | if (err) | ||
564 | goto out_d; | ||
565 | |||
566 | return 0; | ||
567 | out_d: | ||
568 | destroy_inodecache(); | ||
569 | out: | ||
570 | return err; | ||
571 | } | ||
572 | |||
573 | static void __exit exit_exofs(void) | ||
574 | { | ||
575 | unregister_filesystem(&exofs_type); | ||
576 | destroy_inodecache(); | ||
577 | } | ||
578 | |||
579 | MODULE_AUTHOR("Avishay Traeger <avishay@gmail.com>"); | ||
580 | MODULE_DESCRIPTION("exofs"); | ||
581 | MODULE_LICENSE("GPL"); | ||
582 | |||
583 | module_init(init_exofs) | ||
584 | module_exit(exit_exofs) | ||
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c new file mode 100644 index 000000000000..36e2d7bc7f7b --- /dev/null +++ b/fs/exofs/symlink.c | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/namei.h> | ||
37 | |||
38 | #include "exofs.h" | ||
39 | |||
40 | static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
41 | { | ||
42 | struct exofs_i_info *oi = exofs_i(dentry->d_inode); | ||
43 | |||
44 | nd_set_link(nd, (char *)oi->i_data); | ||
45 | return NULL; | ||
46 | } | ||
47 | |||
48 | const struct inode_operations exofs_symlink_inode_operations = { | ||
49 | .readlink = generic_readlink, | ||
50 | .follow_link = page_follow_link_light, | ||
51 | .put_link = page_put_link, | ||
52 | }; | ||
53 | |||
54 | const struct inode_operations exofs_fast_symlink_inode_operations = { | ||
55 | .readlink = generic_readlink, | ||
56 | .follow_link = exofs_follow_link, | ||
57 | }; | ||
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index ae8c4f850b27..d46e38cb85c5 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -318,7 +318,7 @@ ext2_init_acl(struct inode *inode, struct inode *dir) | |||
318 | return PTR_ERR(acl); | 318 | return PTR_ERR(acl); |
319 | } | 319 | } |
320 | if (!acl) | 320 | if (!acl) |
321 | inode->i_mode &= ~current->fs->umask; | 321 | inode->i_mode &= ~current_umask(); |
322 | } | 322 | } |
323 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { | 323 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { |
324 | struct posix_acl *clone; | 324 | struct posix_acl *clone; |
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index b60bb241880c..d81ef2fdb08e 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c | |||
@@ -323,7 +323,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
323 | return PTR_ERR(acl); | 323 | return PTR_ERR(acl); |
324 | } | 324 | } |
325 | if (!acl) | 325 | if (!acl) |
326 | inode->i_mode &= ~current->fs->umask; | 326 | inode->i_mode &= ~current_umask(); |
327 | } | 327 | } |
328 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { | 328 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { |
329 | struct posix_acl *clone; | 329 | struct posix_acl *clone; |
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 5853f4440af4..3d724a95882f 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -42,7 +42,7 @@ const struct file_operations ext3_dir_operations = { | |||
42 | .llseek = generic_file_llseek, | 42 | .llseek = generic_file_llseek, |
43 | .read = generic_read_dir, | 43 | .read = generic_read_dir, |
44 | .readdir = ext3_readdir, /* we take BKL. needed?*/ | 44 | .readdir = ext3_readdir, /* we take BKL. needed?*/ |
45 | .ioctl = ext3_ioctl, /* BKL held */ | 45 | .unlocked_ioctl = ext3_ioctl, |
46 | #ifdef CONFIG_COMPAT | 46 | #ifdef CONFIG_COMPAT |
47 | .compat_ioctl = ext3_compat_ioctl, | 47 | .compat_ioctl = ext3_compat_ioctl, |
48 | #endif | 48 | #endif |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 3be1e0689c9a..5b49704b231b 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -33,6 +33,10 @@ | |||
33 | */ | 33 | */ |
34 | static int ext3_release_file (struct inode * inode, struct file * filp) | 34 | static int ext3_release_file (struct inode * inode, struct file * filp) |
35 | { | 35 | { |
36 | if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) { | ||
37 | filemap_flush(inode->i_mapping); | ||
38 | EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE; | ||
39 | } | ||
36 | /* if we are the last writer on the inode, drop the block reservation */ | 40 | /* if we are the last writer on the inode, drop the block reservation */ |
37 | if ((filp->f_mode & FMODE_WRITE) && | 41 | if ((filp->f_mode & FMODE_WRITE) && |
38 | (atomic_read(&inode->i_writecount) == 1)) | 42 | (atomic_read(&inode->i_writecount) == 1)) |
@@ -112,7 +116,7 @@ const struct file_operations ext3_file_operations = { | |||
112 | .write = do_sync_write, | 116 | .write = do_sync_write, |
113 | .aio_read = generic_file_aio_read, | 117 | .aio_read = generic_file_aio_read, |
114 | .aio_write = ext3_file_write, | 118 | .aio_write = ext3_file_write, |
115 | .ioctl = ext3_ioctl, | 119 | .unlocked_ioctl = ext3_ioctl, |
116 | #ifdef CONFIG_COMPAT | 120 | #ifdef CONFIG_COMPAT |
117 | .compat_ioctl = ext3_compat_ioctl, | 121 | .compat_ioctl = ext3_compat_ioctl, |
118 | #endif | 122 | #endif |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 4a09ff169870..466a332e0bd1 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1149,12 +1149,15 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping, | |||
1149 | struct page **pagep, void **fsdata) | 1149 | struct page **pagep, void **fsdata) |
1150 | { | 1150 | { |
1151 | struct inode *inode = mapping->host; | 1151 | struct inode *inode = mapping->host; |
1152 | int ret, needed_blocks = ext3_writepage_trans_blocks(inode); | 1152 | int ret; |
1153 | handle_t *handle; | 1153 | handle_t *handle; |
1154 | int retries = 0; | 1154 | int retries = 0; |
1155 | struct page *page; | 1155 | struct page *page; |
1156 | pgoff_t index; | 1156 | pgoff_t index; |
1157 | unsigned from, to; | 1157 | unsigned from, to; |
1158 | /* Reserve one block more for addition to orphan list in case | ||
1159 | * we allocate blocks but write fails for some reason */ | ||
1160 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; | ||
1158 | 1161 | ||
1159 | index = pos >> PAGE_CACHE_SHIFT; | 1162 | index = pos >> PAGE_CACHE_SHIFT; |
1160 | from = pos & (PAGE_CACHE_SIZE - 1); | 1163 | from = pos & (PAGE_CACHE_SIZE - 1); |
@@ -1184,15 +1187,20 @@ retry: | |||
1184 | } | 1187 | } |
1185 | write_begin_failed: | 1188 | write_begin_failed: |
1186 | if (ret) { | 1189 | if (ret) { |
1187 | ext3_journal_stop(handle); | ||
1188 | unlock_page(page); | ||
1189 | page_cache_release(page); | ||
1190 | /* | 1190 | /* |
1191 | * block_write_begin may have instantiated a few blocks | 1191 | * block_write_begin may have instantiated a few blocks |
1192 | * outside i_size. Trim these off again. Don't need | 1192 | * outside i_size. Trim these off again. Don't need |
1193 | * i_size_read because we hold i_mutex. | 1193 | * i_size_read because we hold i_mutex. |
1194 | * | ||
1195 | * Add inode to orphan list in case we crash before truncate | ||
1196 | * finishes. | ||
1194 | */ | 1197 | */ |
1195 | if (pos + len > inode->i_size) | 1198 | if (pos + len > inode->i_size) |
1199 | ext3_orphan_add(handle, inode); | ||
1200 | ext3_journal_stop(handle); | ||
1201 | unlock_page(page); | ||
1202 | page_cache_release(page); | ||
1203 | if (pos + len > inode->i_size) | ||
1196 | vmtruncate(inode, inode->i_size); | 1204 | vmtruncate(inode, inode->i_size); |
1197 | } | 1205 | } |
1198 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1206 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
@@ -1211,6 +1219,18 @@ int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1211 | return err; | 1219 | return err; |
1212 | } | 1220 | } |
1213 | 1221 | ||
1222 | /* For ordered writepage and write_end functions */ | ||
1223 | static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | ||
1224 | { | ||
1225 | /* | ||
1226 | * Write could have mapped the buffer but it didn't copy the data in | ||
1227 | * yet. So avoid filing such buffer into a transaction. | ||
1228 | */ | ||
1229 | if (buffer_mapped(bh) && buffer_uptodate(bh)) | ||
1230 | return ext3_journal_dirty_data(handle, bh); | ||
1231 | return 0; | ||
1232 | } | ||
1233 | |||
1214 | /* For write_end() in data=journal mode */ | 1234 | /* For write_end() in data=journal mode */ |
1215 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) | 1235 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) |
1216 | { | 1236 | { |
@@ -1221,26 +1241,20 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
1221 | } | 1241 | } |
1222 | 1242 | ||
1223 | /* | 1243 | /* |
1224 | * Generic write_end handler for ordered and writeback ext3 journal modes. | 1244 | * This is nasty and subtle: ext3_write_begin() could have allocated blocks |
1225 | * We can't use generic_write_end, because that unlocks the page and we need to | 1245 | * for the whole page but later we failed to copy the data in. Update inode |
1226 | * unlock the page after ext3_journal_stop, but ext3_journal_stop must run | 1246 | * size according to what we managed to copy. The rest is going to be |
1227 | * after block_write_end. | 1247 | * truncated in write_end function. |
1228 | */ | 1248 | */ |
1229 | static int ext3_generic_write_end(struct file *file, | 1249 | static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied) |
1230 | struct address_space *mapping, | ||
1231 | loff_t pos, unsigned len, unsigned copied, | ||
1232 | struct page *page, void *fsdata) | ||
1233 | { | 1250 | { |
1234 | struct inode *inode = file->f_mapping->host; | 1251 | /* What matters to us is i_disksize. We don't write i_size anywhere */ |
1235 | 1252 | if (pos + copied > inode->i_size) | |
1236 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1253 | i_size_write(inode, pos + copied); |
1237 | 1254 | if (pos + copied > EXT3_I(inode)->i_disksize) { | |
1238 | if (pos+copied > inode->i_size) { | 1255 | EXT3_I(inode)->i_disksize = pos + copied; |
1239 | i_size_write(inode, pos+copied); | ||
1240 | mark_inode_dirty(inode); | 1256 | mark_inode_dirty(inode); |
1241 | } | 1257 | } |
1242 | |||
1243 | return copied; | ||
1244 | } | 1258 | } |
1245 | 1259 | ||
1246 | /* | 1260 | /* |
@@ -1260,35 +1274,29 @@ static int ext3_ordered_write_end(struct file *file, | |||
1260 | unsigned from, to; | 1274 | unsigned from, to; |
1261 | int ret = 0, ret2; | 1275 | int ret = 0, ret2; |
1262 | 1276 | ||
1263 | from = pos & (PAGE_CACHE_SIZE - 1); | 1277 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1264 | to = from + len; | ||
1265 | 1278 | ||
1279 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
1280 | to = from + copied; | ||
1266 | ret = walk_page_buffers(handle, page_buffers(page), | 1281 | ret = walk_page_buffers(handle, page_buffers(page), |
1267 | from, to, NULL, ext3_journal_dirty_data); | 1282 | from, to, NULL, journal_dirty_data_fn); |
1268 | 1283 | ||
1269 | if (ret == 0) { | 1284 | if (ret == 0) |
1270 | /* | 1285 | update_file_sizes(inode, pos, copied); |
1271 | * generic_write_end() will run mark_inode_dirty() if i_size | 1286 | /* |
1272 | * changes. So let's piggyback the i_disksize mark_inode_dirty | 1287 | * There may be allocated blocks outside of i_size because |
1273 | * into that. | 1288 | * we failed to copy some data. Prepare for truncate. |
1274 | */ | 1289 | */ |
1275 | loff_t new_i_size; | 1290 | if (pos + len > inode->i_size) |
1276 | 1291 | ext3_orphan_add(handle, inode); | |
1277 | new_i_size = pos + copied; | ||
1278 | if (new_i_size > EXT3_I(inode)->i_disksize) | ||
1279 | EXT3_I(inode)->i_disksize = new_i_size; | ||
1280 | ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, | ||
1281 | page, fsdata); | ||
1282 | copied = ret2; | ||
1283 | if (ret2 < 0) | ||
1284 | ret = ret2; | ||
1285 | } | ||
1286 | ret2 = ext3_journal_stop(handle); | 1292 | ret2 = ext3_journal_stop(handle); |
1287 | if (!ret) | 1293 | if (!ret) |
1288 | ret = ret2; | 1294 | ret = ret2; |
1289 | unlock_page(page); | 1295 | unlock_page(page); |
1290 | page_cache_release(page); | 1296 | page_cache_release(page); |
1291 | 1297 | ||
1298 | if (pos + len > inode->i_size) | ||
1299 | vmtruncate(inode, inode->i_size); | ||
1292 | return ret ? ret : copied; | 1300 | return ret ? ret : copied; |
1293 | } | 1301 | } |
1294 | 1302 | ||
@@ -1299,25 +1307,22 @@ static int ext3_writeback_write_end(struct file *file, | |||
1299 | { | 1307 | { |
1300 | handle_t *handle = ext3_journal_current_handle(); | 1308 | handle_t *handle = ext3_journal_current_handle(); |
1301 | struct inode *inode = file->f_mapping->host; | 1309 | struct inode *inode = file->f_mapping->host; |
1302 | int ret = 0, ret2; | 1310 | int ret; |
1303 | loff_t new_i_size; | ||
1304 | |||
1305 | new_i_size = pos + copied; | ||
1306 | if (new_i_size > EXT3_I(inode)->i_disksize) | ||
1307 | EXT3_I(inode)->i_disksize = new_i_size; | ||
1308 | |||
1309 | ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, | ||
1310 | page, fsdata); | ||
1311 | copied = ret2; | ||
1312 | if (ret2 < 0) | ||
1313 | ret = ret2; | ||
1314 | 1311 | ||
1315 | ret2 = ext3_journal_stop(handle); | 1312 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1316 | if (!ret) | 1313 | update_file_sizes(inode, pos, copied); |
1317 | ret = ret2; | 1314 | /* |
1315 | * There may be allocated blocks outside of i_size because | ||
1316 | * we failed to copy some data. Prepare for truncate. | ||
1317 | */ | ||
1318 | if (pos + len > inode->i_size) | ||
1319 | ext3_orphan_add(handle, inode); | ||
1320 | ret = ext3_journal_stop(handle); | ||
1318 | unlock_page(page); | 1321 | unlock_page(page); |
1319 | page_cache_release(page); | 1322 | page_cache_release(page); |
1320 | 1323 | ||
1324 | if (pos + len > inode->i_size) | ||
1325 | vmtruncate(inode, inode->i_size); | ||
1321 | return ret ? ret : copied; | 1326 | return ret ? ret : copied; |
1322 | } | 1327 | } |
1323 | 1328 | ||
@@ -1338,15 +1343,23 @@ static int ext3_journalled_write_end(struct file *file, | |||
1338 | if (copied < len) { | 1343 | if (copied < len) { |
1339 | if (!PageUptodate(page)) | 1344 | if (!PageUptodate(page)) |
1340 | copied = 0; | 1345 | copied = 0; |
1341 | page_zero_new_buffers(page, from+copied, to); | 1346 | page_zero_new_buffers(page, from + copied, to); |
1347 | to = from + copied; | ||
1342 | } | 1348 | } |
1343 | 1349 | ||
1344 | ret = walk_page_buffers(handle, page_buffers(page), from, | 1350 | ret = walk_page_buffers(handle, page_buffers(page), from, |
1345 | to, &partial, write_end_fn); | 1351 | to, &partial, write_end_fn); |
1346 | if (!partial) | 1352 | if (!partial) |
1347 | SetPageUptodate(page); | 1353 | SetPageUptodate(page); |
1348 | if (pos+copied > inode->i_size) | 1354 | |
1349 | i_size_write(inode, pos+copied); | 1355 | if (pos + copied > inode->i_size) |
1356 | i_size_write(inode, pos + copied); | ||
1357 | /* | ||
1358 | * There may be allocated blocks outside of i_size because | ||
1359 | * we failed to copy some data. Prepare for truncate. | ||
1360 | */ | ||
1361 | if (pos + len > inode->i_size) | ||
1362 | ext3_orphan_add(handle, inode); | ||
1350 | EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; | 1363 | EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; |
1351 | if (inode->i_size > EXT3_I(inode)->i_disksize) { | 1364 | if (inode->i_size > EXT3_I(inode)->i_disksize) { |
1352 | EXT3_I(inode)->i_disksize = inode->i_size; | 1365 | EXT3_I(inode)->i_disksize = inode->i_size; |
@@ -1361,6 +1374,8 @@ static int ext3_journalled_write_end(struct file *file, | |||
1361 | unlock_page(page); | 1374 | unlock_page(page); |
1362 | page_cache_release(page); | 1375 | page_cache_release(page); |
1363 | 1376 | ||
1377 | if (pos + len > inode->i_size) | ||
1378 | vmtruncate(inode, inode->i_size); | ||
1364 | return ret ? ret : copied; | 1379 | return ret ? ret : copied; |
1365 | } | 1380 | } |
1366 | 1381 | ||
@@ -1428,17 +1443,11 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) | |||
1428 | return 0; | 1443 | return 0; |
1429 | } | 1444 | } |
1430 | 1445 | ||
1431 | static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | ||
1432 | { | ||
1433 | if (buffer_mapped(bh)) | ||
1434 | return ext3_journal_dirty_data(handle, bh); | ||
1435 | return 0; | ||
1436 | } | ||
1437 | |||
1438 | static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) | 1446 | static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) |
1439 | { | 1447 | { |
1440 | return !buffer_mapped(bh); | 1448 | return !buffer_mapped(bh); |
1441 | } | 1449 | } |
1450 | |||
1442 | /* | 1451 | /* |
1443 | * Note that we always start a transaction even if we're not journalling | 1452 | * Note that we always start a transaction even if we're not journalling |
1444 | * data. This is to preserve ordering: any hole instantiation within | 1453 | * data. This is to preserve ordering: any hole instantiation within |
@@ -2354,6 +2363,9 @@ void ext3_truncate(struct inode *inode) | |||
2354 | if (!ext3_can_truncate(inode)) | 2363 | if (!ext3_can_truncate(inode)) |
2355 | return; | 2364 | return; |
2356 | 2365 | ||
2366 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) | ||
2367 | ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; | ||
2368 | |||
2357 | /* | 2369 | /* |
2358 | * We have to lock the EOF page here, because lock_page() nests | 2370 | * We have to lock the EOF page here, because lock_page() nests |
2359 | * outside journal_start(). | 2371 | * outside journal_start(). |
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 5e86ce9a86e0..88974814783a 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c | |||
@@ -15,12 +15,11 @@ | |||
15 | #include <linux/mount.h> | 15 | #include <linux/mount.h> |
16 | #include <linux/time.h> | 16 | #include <linux/time.h> |
17 | #include <linux/compat.h> | 17 | #include <linux/compat.h> |
18 | #include <linux/smp_lock.h> | ||
19 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
20 | 19 | ||
21 | int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | 20 | long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
22 | unsigned long arg) | ||
23 | { | 21 | { |
22 | struct inode *inode = filp->f_dentry->d_inode; | ||
24 | struct ext3_inode_info *ei = EXT3_I(inode); | 23 | struct ext3_inode_info *ei = EXT3_I(inode); |
25 | unsigned int flags; | 24 | unsigned int flags; |
26 | unsigned short rsv_window_size; | 25 | unsigned short rsv_window_size; |
@@ -39,29 +38,25 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
39 | unsigned int oldflags; | 38 | unsigned int oldflags; |
40 | unsigned int jflag; | 39 | unsigned int jflag; |
41 | 40 | ||
41 | if (!is_owner_or_cap(inode)) | ||
42 | return -EACCES; | ||
43 | |||
44 | if (get_user(flags, (int __user *) arg)) | ||
45 | return -EFAULT; | ||
46 | |||
42 | err = mnt_want_write(filp->f_path.mnt); | 47 | err = mnt_want_write(filp->f_path.mnt); |
43 | if (err) | 48 | if (err) |
44 | return err; | 49 | return err; |
45 | 50 | ||
46 | if (!is_owner_or_cap(inode)) { | ||
47 | err = -EACCES; | ||
48 | goto flags_out; | ||
49 | } | ||
50 | |||
51 | if (get_user(flags, (int __user *) arg)) { | ||
52 | err = -EFAULT; | ||
53 | goto flags_out; | ||
54 | } | ||
55 | |||
56 | flags = ext3_mask_flags(inode->i_mode, flags); | 51 | flags = ext3_mask_flags(inode->i_mode, flags); |
57 | 52 | ||
58 | mutex_lock(&inode->i_mutex); | 53 | mutex_lock(&inode->i_mutex); |
54 | |||
59 | /* Is it quota file? Do not allow user to mess with it */ | 55 | /* Is it quota file? Do not allow user to mess with it */ |
60 | if (IS_NOQUOTA(inode)) { | 56 | err = -EPERM; |
61 | mutex_unlock(&inode->i_mutex); | 57 | if (IS_NOQUOTA(inode)) |
62 | err = -EPERM; | ||
63 | goto flags_out; | 58 | goto flags_out; |
64 | } | 59 | |
65 | oldflags = ei->i_flags; | 60 | oldflags = ei->i_flags; |
66 | 61 | ||
67 | /* The JOURNAL_DATA flag is modifiable only by root */ | 62 | /* The JOURNAL_DATA flag is modifiable only by root */ |
@@ -74,11 +69,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
74 | * This test looks nicer. Thanks to Pauline Middelink | 69 | * This test looks nicer. Thanks to Pauline Middelink |
75 | */ | 70 | */ |
76 | if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { | 71 | if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { |
77 | if (!capable(CAP_LINUX_IMMUTABLE)) { | 72 | if (!capable(CAP_LINUX_IMMUTABLE)) |
78 | mutex_unlock(&inode->i_mutex); | ||
79 | err = -EPERM; | ||
80 | goto flags_out; | 73 | goto flags_out; |
81 | } | ||
82 | } | 74 | } |
83 | 75 | ||
84 | /* | 76 | /* |
@@ -86,17 +78,12 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
86 | * the relevant capability. | 78 | * the relevant capability. |
87 | */ | 79 | */ |
88 | if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { | 80 | if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { |
89 | if (!capable(CAP_SYS_RESOURCE)) { | 81 | if (!capable(CAP_SYS_RESOURCE)) |
90 | mutex_unlock(&inode->i_mutex); | ||
91 | err = -EPERM; | ||
92 | goto flags_out; | 82 | goto flags_out; |
93 | } | ||
94 | } | 83 | } |
95 | 84 | ||
96 | |||
97 | handle = ext3_journal_start(inode, 1); | 85 | handle = ext3_journal_start(inode, 1); |
98 | if (IS_ERR(handle)) { | 86 | if (IS_ERR(handle)) { |
99 | mutex_unlock(&inode->i_mutex); | ||
100 | err = PTR_ERR(handle); | 87 | err = PTR_ERR(handle); |
101 | goto flags_out; | 88 | goto flags_out; |
102 | } | 89 | } |
@@ -116,15 +103,13 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
116 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); | 103 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); |
117 | flags_err: | 104 | flags_err: |
118 | ext3_journal_stop(handle); | 105 | ext3_journal_stop(handle); |
119 | if (err) { | 106 | if (err) |
120 | mutex_unlock(&inode->i_mutex); | 107 | goto flags_out; |
121 | return err; | ||
122 | } | ||
123 | 108 | ||
124 | if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) | 109 | if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) |
125 | err = ext3_change_inode_journal_flag(inode, jflag); | 110 | err = ext3_change_inode_journal_flag(inode, jflag); |
126 | mutex_unlock(&inode->i_mutex); | ||
127 | flags_out: | 111 | flags_out: |
112 | mutex_unlock(&inode->i_mutex); | ||
128 | mnt_drop_write(filp->f_path.mnt); | 113 | mnt_drop_write(filp->f_path.mnt); |
129 | return err; | 114 | return err; |
130 | } | 115 | } |
@@ -140,6 +125,7 @@ flags_out: | |||
140 | 125 | ||
141 | if (!is_owner_or_cap(inode)) | 126 | if (!is_owner_or_cap(inode)) |
142 | return -EPERM; | 127 | return -EPERM; |
128 | |||
143 | err = mnt_want_write(filp->f_path.mnt); | 129 | err = mnt_want_write(filp->f_path.mnt); |
144 | if (err) | 130 | if (err) |
145 | return err; | 131 | return err; |
@@ -147,6 +133,7 @@ flags_out: | |||
147 | err = -EFAULT; | 133 | err = -EFAULT; |
148 | goto setversion_out; | 134 | goto setversion_out; |
149 | } | 135 | } |
136 | |||
150 | handle = ext3_journal_start(inode, 1); | 137 | handle = ext3_journal_start(inode, 1); |
151 | if (IS_ERR(handle)) { | 138 | if (IS_ERR(handle)) { |
152 | err = PTR_ERR(handle); | 139 | err = PTR_ERR(handle); |
@@ -299,9 +286,6 @@ group_add_out: | |||
299 | #ifdef CONFIG_COMPAT | 286 | #ifdef CONFIG_COMPAT |
300 | long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 287 | long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
301 | { | 288 | { |
302 | struct inode *inode = file->f_path.dentry->d_inode; | ||
303 | int ret; | ||
304 | |||
305 | /* These are just misnamed, they actually get/put from/to user an int */ | 289 | /* These are just misnamed, they actually get/put from/to user an int */ |
306 | switch (cmd) { | 290 | switch (cmd) { |
307 | case EXT3_IOC32_GETFLAGS: | 291 | case EXT3_IOC32_GETFLAGS: |
@@ -341,9 +325,6 @@ long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
341 | default: | 325 | default: |
342 | return -ENOIOCTLCMD; | 326 | return -ENOIOCTLCMD; |
343 | } | 327 | } |
344 | lock_kernel(); | 328 | return ext3_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); |
345 | ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); | ||
346 | unlock_kernel(); | ||
347 | return ret; | ||
348 | } | 329 | } |
349 | #endif | 330 | #endif |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index e2fc63cbba8b..6ff7b9730234 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -161,12 +161,12 @@ static struct dx_frame *dx_probe(struct qstr *entry, | |||
161 | struct dx_frame *frame, | 161 | struct dx_frame *frame, |
162 | int *err); | 162 | int *err); |
163 | static void dx_release (struct dx_frame *frames); | 163 | static void dx_release (struct dx_frame *frames); |
164 | static int dx_make_map (struct ext3_dir_entry_2 *de, int size, | 164 | static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, |
165 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 165 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); |
166 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); | 166 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); |
167 | static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, | 167 | static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, |
168 | struct dx_map_entry *offsets, int count); | 168 | struct dx_map_entry *offsets, int count); |
169 | static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); | 169 | static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize); |
170 | static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); | 170 | static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); |
171 | static int ext3_htree_next_block(struct inode *dir, __u32 hash, | 171 | static int ext3_htree_next_block(struct inode *dir, __u32 hash, |
172 | struct dx_frame *frame, | 172 | struct dx_frame *frame, |
@@ -708,14 +708,14 @@ errout: | |||
708 | * Create map of hash values, offsets, and sizes, stored at end of block. | 708 | * Create map of hash values, offsets, and sizes, stored at end of block. |
709 | * Returns number of entries mapped. | 709 | * Returns number of entries mapped. |
710 | */ | 710 | */ |
711 | static int dx_make_map (struct ext3_dir_entry_2 *de, int size, | 711 | static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, |
712 | struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) | 712 | struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) |
713 | { | 713 | { |
714 | int count = 0; | 714 | int count = 0; |
715 | char *base = (char *) de; | 715 | char *base = (char *) de; |
716 | struct dx_hash_info h = *hinfo; | 716 | struct dx_hash_info h = *hinfo; |
717 | 717 | ||
718 | while ((char *) de < base + size) | 718 | while ((char *) de < base + blocksize) |
719 | { | 719 | { |
720 | if (de->name_len && de->inode) { | 720 | if (de->name_len && de->inode) { |
721 | ext3fs_dirhash(de->name, de->name_len, &h); | 721 | ext3fs_dirhash(de->name, de->name_len, &h); |
@@ -1047,8 +1047,16 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str | |||
1047 | return ERR_PTR(-EIO); | 1047 | return ERR_PTR(-EIO); |
1048 | } | 1048 | } |
1049 | inode = ext3_iget(dir->i_sb, ino); | 1049 | inode = ext3_iget(dir->i_sb, ino); |
1050 | if (IS_ERR(inode)) | 1050 | if (unlikely(IS_ERR(inode))) { |
1051 | return ERR_CAST(inode); | 1051 | if (PTR_ERR(inode) == -ESTALE) { |
1052 | ext3_error(dir->i_sb, __func__, | ||
1053 | "deleted inode referenced: %lu", | ||
1054 | ino); | ||
1055 | return ERR_PTR(-EIO); | ||
1056 | } else { | ||
1057 | return ERR_CAST(inode); | ||
1058 | } | ||
1059 | } | ||
1052 | } | 1060 | } |
1053 | return d_splice_alias(inode, dentry); | 1061 | return d_splice_alias(inode, dentry); |
1054 | } | 1062 | } |
@@ -1120,13 +1128,14 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) | |||
1120 | * Compact each dir entry in the range to the minimal rec_len. | 1128 | * Compact each dir entry in the range to the minimal rec_len. |
1121 | * Returns pointer to last entry in range. | 1129 | * Returns pointer to last entry in range. |
1122 | */ | 1130 | */ |
1123 | static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) | 1131 | static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize) |
1124 | { | 1132 | { |
1125 | struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; | 1133 | struct ext3_dir_entry_2 *next, *to, *prev; |
1134 | struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *)base; | ||
1126 | unsigned rec_len = 0; | 1135 | unsigned rec_len = 0; |
1127 | 1136 | ||
1128 | prev = to = de; | 1137 | prev = to = de; |
1129 | while ((char*)de < base + size) { | 1138 | while ((char *)de < base + blocksize) { |
1130 | next = ext3_next_entry(de); | 1139 | next = ext3_next_entry(de); |
1131 | if (de->inode && de->name_len) { | 1140 | if (de->inode && de->name_len) { |
1132 | rec_len = EXT3_DIR_REC_LEN(de->name_len); | 1141 | rec_len = EXT3_DIR_REC_LEN(de->name_len); |
@@ -2265,7 +2274,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2265 | struct inode * old_inode, * new_inode; | 2274 | struct inode * old_inode, * new_inode; |
2266 | struct buffer_head * old_bh, * new_bh, * dir_bh; | 2275 | struct buffer_head * old_bh, * new_bh, * dir_bh; |
2267 | struct ext3_dir_entry_2 * old_de, * new_de; | 2276 | struct ext3_dir_entry_2 * old_de, * new_de; |
2268 | int retval; | 2277 | int retval, flush_file = 0; |
2269 | 2278 | ||
2270 | old_bh = new_bh = dir_bh = NULL; | 2279 | old_bh = new_bh = dir_bh = NULL; |
2271 | 2280 | ||
@@ -2401,6 +2410,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2401 | ext3_mark_inode_dirty(handle, new_inode); | 2410 | ext3_mark_inode_dirty(handle, new_inode); |
2402 | if (!new_inode->i_nlink) | 2411 | if (!new_inode->i_nlink) |
2403 | ext3_orphan_add(handle, new_inode); | 2412 | ext3_orphan_add(handle, new_inode); |
2413 | if (ext3_should_writeback_data(new_inode)) | ||
2414 | flush_file = 1; | ||
2404 | } | 2415 | } |
2405 | retval = 0; | 2416 | retval = 0; |
2406 | 2417 | ||
@@ -2409,6 +2420,8 @@ end_rename: | |||
2409 | brelse (old_bh); | 2420 | brelse (old_bh); |
2410 | brelse (new_bh); | 2421 | brelse (new_bh); |
2411 | ext3_journal_stop(handle); | 2422 | ext3_journal_stop(handle); |
2423 | if (retval == 0 && flush_file) | ||
2424 | filemap_flush(old_inode->i_mapping); | ||
2412 | return retval; | 2425 | return retval; |
2413 | } | 2426 | } |
2414 | 2427 | ||
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 7505482a08fa..418b6f3b0ae8 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -18,7 +18,7 @@ config EXT4_FS | |||
18 | filesystem; while there will be some performance gains from | 18 | filesystem; while there will be some performance gains from |
19 | the delayed allocation and inode table readahead, the best | 19 | the delayed allocation and inode table readahead, the best |
20 | performance gains will require enabling ext4 features in the | 20 | performance gains will require enabling ext4 features in the |
21 | filesystem, or formating a new filesystem as an ext4 | 21 | filesystem, or formatting a new filesystem as an ext4 |
22 | filesystem initially. | 22 | filesystem initially. |
23 | 23 | ||
24 | To compile this file system support as a module, choose M here. The | 24 | To compile this file system support as a module, choose M here. The |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 694ed6fadcc8..647e0d65a284 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -323,7 +323,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
323 | return PTR_ERR(acl); | 323 | return PTR_ERR(acl); |
324 | } | 324 | } |
325 | if (!acl) | 325 | if (!acl) |
326 | inode->i_mode &= ~current->fs->umask; | 326 | inode->i_mode &= ~current_umask(); |
327 | } | 327 | } |
328 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { | 328 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { |
329 | struct posix_acl *clone; | 329 | struct posix_acl *clone; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index de0004fe6e00..296785a0dec8 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -523,7 +523,9 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) | |||
523 | 523 | ||
524 | static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) | 524 | static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) |
525 | { | 525 | { |
526 | struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); | 526 | struct super_block *sb = dentry->d_sb; |
527 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | ||
528 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
527 | 529 | ||
528 | /* If the count of free cluster is still unknown, counts it here. */ | 530 | /* If the count of free cluster is still unknown, counts it here. */ |
529 | if (sbi->free_clusters == -1 || !sbi->free_clus_valid) { | 531 | if (sbi->free_clusters == -1 || !sbi->free_clus_valid) { |
@@ -537,6 +539,8 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
537 | buf->f_blocks = sbi->max_cluster - FAT_START_ENT; | 539 | buf->f_blocks = sbi->max_cluster - FAT_START_ENT; |
538 | buf->f_bfree = sbi->free_clusters; | 540 | buf->f_bfree = sbi->free_clusters; |
539 | buf->f_bavail = sbi->free_clusters; | 541 | buf->f_bavail = sbi->free_clusters; |
542 | buf->f_fsid.val[0] = (u32)id; | ||
543 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
540 | buf->f_namelen = sbi->options.isvfat ? 260 : 12; | 544 | buf->f_namelen = sbi->options.isvfat ? 260 : 12; |
541 | 545 | ||
542 | return 0; | 546 | return 0; |
@@ -930,7 +934,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, | |||
930 | 934 | ||
931 | opts->fs_uid = current_uid(); | 935 | opts->fs_uid = current_uid(); |
932 | opts->fs_gid = current_gid(); | 936 | opts->fs_gid = current_gid(); |
933 | opts->fs_fmask = opts->fs_dmask = current->fs->umask; | 937 | opts->fs_fmask = current_umask(); |
934 | opts->allow_utime = -1; | 938 | opts->allow_utime = -1; |
935 | opts->codepage = fat_default_codepage; | 939 | opts->codepage = fat_default_codepage; |
936 | opts->iocharset = fat_default_iocharset; | 940 | opts->iocharset = fat_default_iocharset; |
diff --git a/fs/file_table.c b/fs/file_table.c index b74a8e1da913..54018fe48840 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -169,7 +169,6 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, | |||
169 | fmode_t mode, const struct file_operations *fop) | 169 | fmode_t mode, const struct file_operations *fop) |
170 | { | 170 | { |
171 | struct file *file; | 171 | struct file *file; |
172 | struct path; | ||
173 | 172 | ||
174 | file = get_empty_filp(); | 173 | file = get_empty_filp(); |
175 | if (!file) | 174 | if (!file) |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e3fe9918faaf..91013ff7dd53 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -196,7 +196,7 @@ static void redirty_tail(struct inode *inode) | |||
196 | struct inode *tail_inode; | 196 | struct inode *tail_inode; |
197 | 197 | ||
198 | tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); | 198 | tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); |
199 | if (!time_after_eq(inode->dirtied_when, | 199 | if (time_before(inode->dirtied_when, |
200 | tail_inode->dirtied_when)) | 200 | tail_inode->dirtied_when)) |
201 | inode->dirtied_when = jiffies; | 201 | inode->dirtied_when = jiffies; |
202 | } | 202 | } |
@@ -220,6 +220,21 @@ static void inode_sync_complete(struct inode *inode) | |||
220 | wake_up_bit(&inode->i_state, __I_SYNC); | 220 | wake_up_bit(&inode->i_state, __I_SYNC); |
221 | } | 221 | } |
222 | 222 | ||
223 | static bool inode_dirtied_after(struct inode *inode, unsigned long t) | ||
224 | { | ||
225 | bool ret = time_after(inode->dirtied_when, t); | ||
226 | #ifndef CONFIG_64BIT | ||
227 | /* | ||
228 | * For inodes being constantly redirtied, dirtied_when can get stuck. | ||
229 | * It _appears_ to be in the future, but is actually in distant past. | ||
230 | * This test is necessary to prevent such wrapped-around relative times | ||
231 | * from permanently stopping the whole pdflush writeback. | ||
232 | */ | ||
233 | ret = ret && time_before_eq(inode->dirtied_when, jiffies); | ||
234 | #endif | ||
235 | return ret; | ||
236 | } | ||
237 | |||
223 | /* | 238 | /* |
224 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. | 239 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. |
225 | */ | 240 | */ |
@@ -231,7 +246,7 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
231 | struct inode *inode = list_entry(delaying_queue->prev, | 246 | struct inode *inode = list_entry(delaying_queue->prev, |
232 | struct inode, i_list); | 247 | struct inode, i_list); |
233 | if (older_than_this && | 248 | if (older_than_this && |
234 | time_after(inode->dirtied_when, *older_than_this)) | 249 | inode_dirtied_after(inode, *older_than_this)) |
235 | break; | 250 | break; |
236 | list_move(&inode->i_list, dispatch_queue); | 251 | list_move(&inode->i_list, dispatch_queue); |
237 | } | 252 | } |
@@ -420,7 +435,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
420 | * If older_than_this is non-NULL, then only write out inodes which | 435 | * If older_than_this is non-NULL, then only write out inodes which |
421 | * had their first dirtying at a time earlier than *older_than_this. | 436 | * had their first dirtying at a time earlier than *older_than_this. |
422 | * | 437 | * |
423 | * If we're a pdlfush thread, then implement pdflush collision avoidance | 438 | * If we're a pdflush thread, then implement pdflush collision avoidance |
424 | * against the entire list. | 439 | * against the entire list. |
425 | * | 440 | * |
426 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | 441 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. |
@@ -492,8 +507,11 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
492 | continue; /* blockdev has wrong queue */ | 507 | continue; /* blockdev has wrong queue */ |
493 | } | 508 | } |
494 | 509 | ||
495 | /* Was this inode dirtied after sync_sb_inodes was called? */ | 510 | /* |
496 | if (time_after(inode->dirtied_when, start)) | 511 | * Was this inode dirtied after sync_sb_inodes was called? |
512 | * This keeps sync from extra jobs and livelock. | ||
513 | */ | ||
514 | if (inode_dirtied_after(inode, start)) | ||
497 | break; | 515 | break; |
498 | 516 | ||
499 | /* Is another pdflush already flushing this queue? */ | 517 | /* Is another pdflush already flushing this queue? */ |
@@ -538,7 +556,8 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
538 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 556 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
539 | struct address_space *mapping; | 557 | struct address_space *mapping; |
540 | 558 | ||
541 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 559 | if (inode->i_state & |
560 | (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | ||
542 | continue; | 561 | continue; |
543 | mapping = inode->i_mapping; | 562 | mapping = inode->i_mapping; |
544 | if (mapping->nrpages == 0) | 563 | if (mapping->nrpages == 0) |
diff --git a/fs/fs_struct.c b/fs/fs_struct.c new file mode 100644 index 000000000000..eee059052db5 --- /dev/null +++ b/fs/fs_struct.c | |||
@@ -0,0 +1,177 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/path.h> | ||
5 | #include <linux/slab.h> | ||
6 | #include <linux/fs_struct.h> | ||
7 | |||
8 | /* | ||
9 | * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. | ||
10 | * It can block. | ||
11 | */ | ||
12 | void set_fs_root(struct fs_struct *fs, struct path *path) | ||
13 | { | ||
14 | struct path old_root; | ||
15 | |||
16 | write_lock(&fs->lock); | ||
17 | old_root = fs->root; | ||
18 | fs->root = *path; | ||
19 | path_get(path); | ||
20 | write_unlock(&fs->lock); | ||
21 | if (old_root.dentry) | ||
22 | path_put(&old_root); | ||
23 | } | ||
24 | |||
25 | /* | ||
26 | * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. | ||
27 | * It can block. | ||
28 | */ | ||
29 | void set_fs_pwd(struct fs_struct *fs, struct path *path) | ||
30 | { | ||
31 | struct path old_pwd; | ||
32 | |||
33 | write_lock(&fs->lock); | ||
34 | old_pwd = fs->pwd; | ||
35 | fs->pwd = *path; | ||
36 | path_get(path); | ||
37 | write_unlock(&fs->lock); | ||
38 | |||
39 | if (old_pwd.dentry) | ||
40 | path_put(&old_pwd); | ||
41 | } | ||
42 | |||
43 | void chroot_fs_refs(struct path *old_root, struct path *new_root) | ||
44 | { | ||
45 | struct task_struct *g, *p; | ||
46 | struct fs_struct *fs; | ||
47 | int count = 0; | ||
48 | |||
49 | read_lock(&tasklist_lock); | ||
50 | do_each_thread(g, p) { | ||
51 | task_lock(p); | ||
52 | fs = p->fs; | ||
53 | if (fs) { | ||
54 | write_lock(&fs->lock); | ||
55 | if (fs->root.dentry == old_root->dentry | ||
56 | && fs->root.mnt == old_root->mnt) { | ||
57 | path_get(new_root); | ||
58 | fs->root = *new_root; | ||
59 | count++; | ||
60 | } | ||
61 | if (fs->pwd.dentry == old_root->dentry | ||
62 | && fs->pwd.mnt == old_root->mnt) { | ||
63 | path_get(new_root); | ||
64 | fs->pwd = *new_root; | ||
65 | count++; | ||
66 | } | ||
67 | write_unlock(&fs->lock); | ||
68 | } | ||
69 | task_unlock(p); | ||
70 | } while_each_thread(g, p); | ||
71 | read_unlock(&tasklist_lock); | ||
72 | while (count--) | ||
73 | path_put(old_root); | ||
74 | } | ||
75 | |||
76 | void free_fs_struct(struct fs_struct *fs) | ||
77 | { | ||
78 | path_put(&fs->root); | ||
79 | path_put(&fs->pwd); | ||
80 | kmem_cache_free(fs_cachep, fs); | ||
81 | } | ||
82 | |||
83 | void exit_fs(struct task_struct *tsk) | ||
84 | { | ||
85 | struct fs_struct *fs = tsk->fs; | ||
86 | |||
87 | if (fs) { | ||
88 | int kill; | ||
89 | task_lock(tsk); | ||
90 | write_lock(&fs->lock); | ||
91 | tsk->fs = NULL; | ||
92 | kill = !--fs->users; | ||
93 | write_unlock(&fs->lock); | ||
94 | task_unlock(tsk); | ||
95 | if (kill) | ||
96 | free_fs_struct(fs); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | struct fs_struct *copy_fs_struct(struct fs_struct *old) | ||
101 | { | ||
102 | struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); | ||
103 | /* We don't need to lock fs - think why ;-) */ | ||
104 | if (fs) { | ||
105 | fs->users = 1; | ||
106 | fs->in_exec = 0; | ||
107 | rwlock_init(&fs->lock); | ||
108 | fs->umask = old->umask; | ||
109 | read_lock(&old->lock); | ||
110 | fs->root = old->root; | ||
111 | path_get(&old->root); | ||
112 | fs->pwd = old->pwd; | ||
113 | path_get(&old->pwd); | ||
114 | read_unlock(&old->lock); | ||
115 | } | ||
116 | return fs; | ||
117 | } | ||
118 | |||
119 | int unshare_fs_struct(void) | ||
120 | { | ||
121 | struct fs_struct *fs = current->fs; | ||
122 | struct fs_struct *new_fs = copy_fs_struct(fs); | ||
123 | int kill; | ||
124 | |||
125 | if (!new_fs) | ||
126 | return -ENOMEM; | ||
127 | |||
128 | task_lock(current); | ||
129 | write_lock(&fs->lock); | ||
130 | kill = !--fs->users; | ||
131 | current->fs = new_fs; | ||
132 | write_unlock(&fs->lock); | ||
133 | task_unlock(current); | ||
134 | |||
135 | if (kill) | ||
136 | free_fs_struct(fs); | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | EXPORT_SYMBOL_GPL(unshare_fs_struct); | ||
141 | |||
142 | int current_umask(void) | ||
143 | { | ||
144 | return current->fs->umask; | ||
145 | } | ||
146 | EXPORT_SYMBOL(current_umask); | ||
147 | |||
148 | /* to be mentioned only in INIT_TASK */ | ||
149 | struct fs_struct init_fs = { | ||
150 | .users = 1, | ||
151 | .lock = __RW_LOCK_UNLOCKED(init_fs.lock), | ||
152 | .umask = 0022, | ||
153 | }; | ||
154 | |||
155 | void daemonize_fs_struct(void) | ||
156 | { | ||
157 | struct fs_struct *fs = current->fs; | ||
158 | |||
159 | if (fs) { | ||
160 | int kill; | ||
161 | |||
162 | task_lock(current); | ||
163 | |||
164 | write_lock(&init_fs.lock); | ||
165 | init_fs.users++; | ||
166 | write_unlock(&init_fs.lock); | ||
167 | |||
168 | write_lock(&fs->lock); | ||
169 | current->fs = &init_fs; | ||
170 | kill = !--fs->users; | ||
171 | write_unlock(&fs->lock); | ||
172 | |||
173 | task_unlock(current); | ||
174 | if (kill) | ||
175 | free_fs_struct(fs); | ||
176 | } | ||
177 | } | ||
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig new file mode 100644 index 000000000000..9bbb8ce7bea0 --- /dev/null +++ b/fs/fscache/Kconfig | |||
@@ -0,0 +1,56 @@ | |||
1 | |||
2 | config FSCACHE | ||
3 | tristate "General filesystem local caching manager" | ||
4 | depends on EXPERIMENTAL | ||
5 | select SLOW_WORK | ||
6 | help | ||
7 | This option enables a generic filesystem caching manager that can be | ||
8 | used by various network and other filesystems to cache data locally. | ||
9 | Different sorts of caches can be plugged in, depending on the | ||
10 | resources available. | ||
11 | |||
12 | See Documentation/filesystems/caching/fscache.txt for more information. | ||
13 | |||
14 | config FSCACHE_STATS | ||
15 | bool "Gather statistical information on local caching" | ||
16 | depends on FSCACHE && PROC_FS | ||
17 | help | ||
18 | This option causes statistical information to be gathered on local | ||
19 | caching and exported through file: | ||
20 | |||
21 | /proc/fs/fscache/stats | ||
22 | |||
23 | The gathering of statistics adds a certain amount of overhead to | ||
24 | execution as there are a quite a few stats gathered, and on a | ||
25 | multi-CPU system these may be on cachelines that keep bouncing | ||
26 | between CPUs. On the other hand, the stats are very useful for | ||
27 | debugging purposes. Saying 'Y' here is recommended. | ||
28 | |||
29 | See Documentation/filesystems/caching/fscache.txt for more information. | ||
30 | |||
31 | config FSCACHE_HISTOGRAM | ||
32 | bool "Gather latency information on local caching" | ||
33 | depends on FSCACHE && PROC_FS | ||
34 | help | ||
35 | This option causes latency information to be gathered on local | ||
36 | caching and exported through file: | ||
37 | |||
38 | /proc/fs/fscache/histogram | ||
39 | |||
40 | The generation of this histogram adds a certain amount of overhead to | ||
41 | execution as there are a number of points at which data is gathered, | ||
42 | and on a multi-CPU system these may be on cachelines that keep | ||
43 | bouncing between CPUs. On the other hand, the histogram may be | ||
44 | useful for debugging purposes. Saying 'N' here is recommended. | ||
45 | |||
46 | See Documentation/filesystems/caching/fscache.txt for more information. | ||
47 | |||
48 | config FSCACHE_DEBUG | ||
49 | bool "Debug FS-Cache" | ||
50 | depends on FSCACHE | ||
51 | help | ||
52 | This permits debugging to be dynamically enabled in the local caching | ||
53 | management module. If this is set, the debugging output may be | ||
54 | enabled by setting bits in /sys/modules/fscache/parameter/debug. | ||
55 | |||
56 | See Documentation/filesystems/caching/fscache.txt for more information. | ||
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile new file mode 100644 index 000000000000..91571b95aacc --- /dev/null +++ b/fs/fscache/Makefile | |||
@@ -0,0 +1,19 @@ | |||
1 | # | ||
2 | # Makefile for general filesystem caching code | ||
3 | # | ||
4 | |||
5 | fscache-y := \ | ||
6 | cache.o \ | ||
7 | cookie.o \ | ||
8 | fsdef.o \ | ||
9 | main.o \ | ||
10 | netfs.o \ | ||
11 | object.o \ | ||
12 | operation.o \ | ||
13 | page.o | ||
14 | |||
15 | fscache-$(CONFIG_PROC_FS) += proc.o | ||
16 | fscache-$(CONFIG_FSCACHE_STATS) += stats.o | ||
17 | fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o | ||
18 | |||
19 | obj-$(CONFIG_FSCACHE) := fscache.o | ||
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c new file mode 100644 index 000000000000..e21985bbb1fb --- /dev/null +++ b/fs/fscache/cache.c | |||
@@ -0,0 +1,415 @@ | |||
1 | /* FS-Cache cache handling | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL CACHE | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | LIST_HEAD(fscache_cache_list); | ||
18 | DECLARE_RWSEM(fscache_addremove_sem); | ||
19 | DECLARE_WAIT_QUEUE_HEAD(fscache_cache_cleared_wq); | ||
20 | EXPORT_SYMBOL(fscache_cache_cleared_wq); | ||
21 | |||
22 | static LIST_HEAD(fscache_cache_tag_list); | ||
23 | |||
24 | /* | ||
25 | * look up a cache tag | ||
26 | */ | ||
27 | struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name) | ||
28 | { | ||
29 | struct fscache_cache_tag *tag, *xtag; | ||
30 | |||
31 | /* firstly check for the existence of the tag under read lock */ | ||
32 | down_read(&fscache_addremove_sem); | ||
33 | |||
34 | list_for_each_entry(tag, &fscache_cache_tag_list, link) { | ||
35 | if (strcmp(tag->name, name) == 0) { | ||
36 | atomic_inc(&tag->usage); | ||
37 | up_read(&fscache_addremove_sem); | ||
38 | return tag; | ||
39 | } | ||
40 | } | ||
41 | |||
42 | up_read(&fscache_addremove_sem); | ||
43 | |||
44 | /* the tag does not exist - create a candidate */ | ||
45 | xtag = kzalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL); | ||
46 | if (!xtag) | ||
47 | /* return a dummy tag if out of memory */ | ||
48 | return ERR_PTR(-ENOMEM); | ||
49 | |||
50 | atomic_set(&xtag->usage, 1); | ||
51 | strcpy(xtag->name, name); | ||
52 | |||
53 | /* write lock, search again and add if still not present */ | ||
54 | down_write(&fscache_addremove_sem); | ||
55 | |||
56 | list_for_each_entry(tag, &fscache_cache_tag_list, link) { | ||
57 | if (strcmp(tag->name, name) == 0) { | ||
58 | atomic_inc(&tag->usage); | ||
59 | up_write(&fscache_addremove_sem); | ||
60 | kfree(xtag); | ||
61 | return tag; | ||
62 | } | ||
63 | } | ||
64 | |||
65 | list_add_tail(&xtag->link, &fscache_cache_tag_list); | ||
66 | up_write(&fscache_addremove_sem); | ||
67 | return xtag; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * release a reference to a cache tag | ||
72 | */ | ||
73 | void __fscache_release_cache_tag(struct fscache_cache_tag *tag) | ||
74 | { | ||
75 | if (tag != ERR_PTR(-ENOMEM)) { | ||
76 | down_write(&fscache_addremove_sem); | ||
77 | |||
78 | if (atomic_dec_and_test(&tag->usage)) | ||
79 | list_del_init(&tag->link); | ||
80 | else | ||
81 | tag = NULL; | ||
82 | |||
83 | up_write(&fscache_addremove_sem); | ||
84 | |||
85 | kfree(tag); | ||
86 | } | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * select a cache in which to store an object | ||
91 | * - the cache addremove semaphore must be at least read-locked by the caller | ||
92 | * - the object will never be an index | ||
93 | */ | ||
94 | struct fscache_cache *fscache_select_cache_for_object( | ||
95 | struct fscache_cookie *cookie) | ||
96 | { | ||
97 | struct fscache_cache_tag *tag; | ||
98 | struct fscache_object *object; | ||
99 | struct fscache_cache *cache; | ||
100 | |||
101 | _enter(""); | ||
102 | |||
103 | if (list_empty(&fscache_cache_list)) { | ||
104 | _leave(" = NULL [no cache]"); | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | /* we check the parent to determine the cache to use */ | ||
109 | spin_lock(&cookie->lock); | ||
110 | |||
111 | /* the first in the parent's backing list should be the preferred | ||
112 | * cache */ | ||
113 | if (!hlist_empty(&cookie->backing_objects)) { | ||
114 | object = hlist_entry(cookie->backing_objects.first, | ||
115 | struct fscache_object, cookie_link); | ||
116 | |||
117 | cache = object->cache; | ||
118 | if (object->state >= FSCACHE_OBJECT_DYING || | ||
119 | test_bit(FSCACHE_IOERROR, &cache->flags)) | ||
120 | cache = NULL; | ||
121 | |||
122 | spin_unlock(&cookie->lock); | ||
123 | _leave(" = %p [parent]", cache); | ||
124 | return cache; | ||
125 | } | ||
126 | |||
127 | /* the parent is unbacked */ | ||
128 | if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { | ||
129 | /* cookie not an index and is unbacked */ | ||
130 | spin_unlock(&cookie->lock); | ||
131 | _leave(" = NULL [cookie ub,ni]"); | ||
132 | return NULL; | ||
133 | } | ||
134 | |||
135 | spin_unlock(&cookie->lock); | ||
136 | |||
137 | if (!cookie->def->select_cache) | ||
138 | goto no_preference; | ||
139 | |||
140 | /* ask the netfs for its preference */ | ||
141 | tag = cookie->def->select_cache(cookie->parent->netfs_data, | ||
142 | cookie->netfs_data); | ||
143 | if (!tag) | ||
144 | goto no_preference; | ||
145 | |||
146 | if (tag == ERR_PTR(-ENOMEM)) { | ||
147 | _leave(" = NULL [nomem tag]"); | ||
148 | return NULL; | ||
149 | } | ||
150 | |||
151 | if (!tag->cache) { | ||
152 | _leave(" = NULL [unbacked tag]"); | ||
153 | return NULL; | ||
154 | } | ||
155 | |||
156 | if (test_bit(FSCACHE_IOERROR, &tag->cache->flags)) | ||
157 | return NULL; | ||
158 | |||
159 | _leave(" = %p [specific]", tag->cache); | ||
160 | return tag->cache; | ||
161 | |||
162 | no_preference: | ||
163 | /* netfs has no preference - just select first cache */ | ||
164 | cache = list_entry(fscache_cache_list.next, | ||
165 | struct fscache_cache, link); | ||
166 | _leave(" = %p [first]", cache); | ||
167 | return cache; | ||
168 | } | ||
169 | |||
170 | /** | ||
171 | * fscache_init_cache - Initialise a cache record | ||
172 | * @cache: The cache record to be initialised | ||
173 | * @ops: The cache operations to be installed in that record | ||
174 | * @idfmt: Format string to define identifier | ||
175 | * @...: sprintf-style arguments | ||
176 | * | ||
177 | * Initialise a record of a cache and fill in the name. | ||
178 | * | ||
179 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
180 | * description. | ||
181 | */ | ||
182 | void fscache_init_cache(struct fscache_cache *cache, | ||
183 | const struct fscache_cache_ops *ops, | ||
184 | const char *idfmt, | ||
185 | ...) | ||
186 | { | ||
187 | va_list va; | ||
188 | |||
189 | memset(cache, 0, sizeof(*cache)); | ||
190 | |||
191 | cache->ops = ops; | ||
192 | |||
193 | va_start(va, idfmt); | ||
194 | vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va); | ||
195 | va_end(va); | ||
196 | |||
197 | INIT_WORK(&cache->op_gc, fscache_operation_gc); | ||
198 | INIT_LIST_HEAD(&cache->link); | ||
199 | INIT_LIST_HEAD(&cache->object_list); | ||
200 | INIT_LIST_HEAD(&cache->op_gc_list); | ||
201 | spin_lock_init(&cache->object_list_lock); | ||
202 | spin_lock_init(&cache->op_gc_list_lock); | ||
203 | } | ||
204 | EXPORT_SYMBOL(fscache_init_cache); | ||
205 | |||
206 | /** | ||
207 | * fscache_add_cache - Declare a cache as being open for business | ||
208 | * @cache: The record describing the cache | ||
209 | * @ifsdef: The record of the cache object describing the top-level index | ||
210 | * @tagname: The tag describing this cache | ||
211 | * | ||
212 | * Add a cache to the system, making it available for netfs's to use. | ||
213 | * | ||
214 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
215 | * description. | ||
216 | */ | ||
217 | int fscache_add_cache(struct fscache_cache *cache, | ||
218 | struct fscache_object *ifsdef, | ||
219 | const char *tagname) | ||
220 | { | ||
221 | struct fscache_cache_tag *tag; | ||
222 | |||
223 | BUG_ON(!cache->ops); | ||
224 | BUG_ON(!ifsdef); | ||
225 | |||
226 | cache->flags = 0; | ||
227 | ifsdef->event_mask = ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
228 | ifsdef->state = FSCACHE_OBJECT_ACTIVE; | ||
229 | |||
230 | if (!tagname) | ||
231 | tagname = cache->identifier; | ||
232 | |||
233 | BUG_ON(!tagname[0]); | ||
234 | |||
235 | _enter("{%s.%s},,%s", cache->ops->name, cache->identifier, tagname); | ||
236 | |||
237 | /* we use the cache tag to uniquely identify caches */ | ||
238 | tag = __fscache_lookup_cache_tag(tagname); | ||
239 | if (IS_ERR(tag)) | ||
240 | goto nomem; | ||
241 | |||
242 | if (test_and_set_bit(FSCACHE_TAG_RESERVED, &tag->flags)) | ||
243 | goto tag_in_use; | ||
244 | |||
245 | cache->kobj = kobject_create_and_add(tagname, fscache_root); | ||
246 | if (!cache->kobj) | ||
247 | goto error; | ||
248 | |||
249 | ifsdef->cookie = &fscache_fsdef_index; | ||
250 | ifsdef->cache = cache; | ||
251 | cache->fsdef = ifsdef; | ||
252 | |||
253 | down_write(&fscache_addremove_sem); | ||
254 | |||
255 | tag->cache = cache; | ||
256 | cache->tag = tag; | ||
257 | |||
258 | /* add the cache to the list */ | ||
259 | list_add(&cache->link, &fscache_cache_list); | ||
260 | |||
261 | /* add the cache's netfs definition index object to the cache's | ||
262 | * list */ | ||
263 | spin_lock(&cache->object_list_lock); | ||
264 | list_add_tail(&ifsdef->cache_link, &cache->object_list); | ||
265 | spin_unlock(&cache->object_list_lock); | ||
266 | |||
267 | /* add the cache's netfs definition index object to the top level index | ||
268 | * cookie as a known backing object */ | ||
269 | spin_lock(&fscache_fsdef_index.lock); | ||
270 | |||
271 | hlist_add_head(&ifsdef->cookie_link, | ||
272 | &fscache_fsdef_index.backing_objects); | ||
273 | |||
274 | atomic_inc(&fscache_fsdef_index.usage); | ||
275 | |||
276 | /* done */ | ||
277 | spin_unlock(&fscache_fsdef_index.lock); | ||
278 | up_write(&fscache_addremove_sem); | ||
279 | |||
280 | printk(KERN_NOTICE "FS-Cache: Cache \"%s\" added (type %s)\n", | ||
281 | cache->tag->name, cache->ops->name); | ||
282 | kobject_uevent(cache->kobj, KOBJ_ADD); | ||
283 | |||
284 | _leave(" = 0 [%s]", cache->identifier); | ||
285 | return 0; | ||
286 | |||
287 | tag_in_use: | ||
288 | printk(KERN_ERR "FS-Cache: Cache tag '%s' already in use\n", tagname); | ||
289 | __fscache_release_cache_tag(tag); | ||
290 | _leave(" = -EXIST"); | ||
291 | return -EEXIST; | ||
292 | |||
293 | error: | ||
294 | __fscache_release_cache_tag(tag); | ||
295 | _leave(" = -EINVAL"); | ||
296 | return -EINVAL; | ||
297 | |||
298 | nomem: | ||
299 | _leave(" = -ENOMEM"); | ||
300 | return -ENOMEM; | ||
301 | } | ||
302 | EXPORT_SYMBOL(fscache_add_cache); | ||
303 | |||
304 | /** | ||
305 | * fscache_io_error - Note a cache I/O error | ||
306 | * @cache: The record describing the cache | ||
307 | * | ||
308 | * Note that an I/O error occurred in a cache and that it should no longer be | ||
309 | * used for anything. This also reports the error into the kernel log. | ||
310 | * | ||
311 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
312 | * description. | ||
313 | */ | ||
314 | void fscache_io_error(struct fscache_cache *cache) | ||
315 | { | ||
316 | set_bit(FSCACHE_IOERROR, &cache->flags); | ||
317 | |||
318 | printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n", | ||
319 | cache->ops->name); | ||
320 | } | ||
321 | EXPORT_SYMBOL(fscache_io_error); | ||
322 | |||
323 | /* | ||
324 | * request withdrawal of all the objects in a cache | ||
325 | * - all the objects being withdrawn are moved onto the supplied list | ||
326 | */ | ||
327 | static void fscache_withdraw_all_objects(struct fscache_cache *cache, | ||
328 | struct list_head *dying_objects) | ||
329 | { | ||
330 | struct fscache_object *object; | ||
331 | |||
332 | spin_lock(&cache->object_list_lock); | ||
333 | |||
334 | while (!list_empty(&cache->object_list)) { | ||
335 | object = list_entry(cache->object_list.next, | ||
336 | struct fscache_object, cache_link); | ||
337 | list_move_tail(&object->cache_link, dying_objects); | ||
338 | |||
339 | _debug("withdraw %p", object->cookie); | ||
340 | |||
341 | spin_lock(&object->lock); | ||
342 | spin_unlock(&cache->object_list_lock); | ||
343 | fscache_raise_event(object, FSCACHE_OBJECT_EV_WITHDRAW); | ||
344 | spin_unlock(&object->lock); | ||
345 | |||
346 | cond_resched(); | ||
347 | spin_lock(&cache->object_list_lock); | ||
348 | } | ||
349 | |||
350 | spin_unlock(&cache->object_list_lock); | ||
351 | } | ||
352 | |||
353 | /** | ||
354 | * fscache_withdraw_cache - Withdraw a cache from the active service | ||
355 | * @cache: The record describing the cache | ||
356 | * | ||
357 | * Withdraw a cache from service, unbinding all its cache objects from the | ||
358 | * netfs cookies they're currently representing. | ||
359 | * | ||
360 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
361 | * description. | ||
362 | */ | ||
363 | void fscache_withdraw_cache(struct fscache_cache *cache) | ||
364 | { | ||
365 | LIST_HEAD(dying_objects); | ||
366 | |||
367 | _enter(""); | ||
368 | |||
369 | printk(KERN_NOTICE "FS-Cache: Withdrawing cache \"%s\"\n", | ||
370 | cache->tag->name); | ||
371 | |||
372 | /* make the cache unavailable for cookie acquisition */ | ||
373 | if (test_and_set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags)) | ||
374 | BUG(); | ||
375 | |||
376 | down_write(&fscache_addremove_sem); | ||
377 | list_del_init(&cache->link); | ||
378 | cache->tag->cache = NULL; | ||
379 | up_write(&fscache_addremove_sem); | ||
380 | |||
381 | /* make sure all pages pinned by operations on behalf of the netfs are | ||
382 | * written to disk */ | ||
383 | cache->ops->sync_cache(cache); | ||
384 | |||
385 | /* dissociate all the netfs pages backed by this cache from the block | ||
386 | * mappings in the cache */ | ||
387 | cache->ops->dissociate_pages(cache); | ||
388 | |||
389 | /* we now have to destroy all the active objects pertaining to this | ||
390 | * cache - which we do by passing them off to thread pool to be | ||
391 | * disposed of */ | ||
392 | _debug("destroy"); | ||
393 | |||
394 | fscache_withdraw_all_objects(cache, &dying_objects); | ||
395 | |||
396 | /* wait for all extant objects to finish their outstanding operations | ||
397 | * and go away */ | ||
398 | _debug("wait for finish"); | ||
399 | wait_event(fscache_cache_cleared_wq, | ||
400 | atomic_read(&cache->object_count) == 0); | ||
401 | _debug("wait for clearance"); | ||
402 | wait_event(fscache_cache_cleared_wq, | ||
403 | list_empty(&cache->object_list)); | ||
404 | _debug("cleared"); | ||
405 | ASSERT(list_empty(&dying_objects)); | ||
406 | |||
407 | kobject_put(cache->kobj); | ||
408 | |||
409 | clear_bit(FSCACHE_TAG_RESERVED, &cache->tag->flags); | ||
410 | fscache_release_cache_tag(cache->tag); | ||
411 | cache->tag = NULL; | ||
412 | |||
413 | _leave(""); | ||
414 | } | ||
415 | EXPORT_SYMBOL(fscache_withdraw_cache); | ||
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c new file mode 100644 index 000000000000..72fd18f6c71f --- /dev/null +++ b/fs/fscache/cookie.c | |||
@@ -0,0 +1,500 @@ | |||
1 | /* netfs cookie management | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/filesystems/caching/netfs-api.txt for more information on | ||
12 | * the netfs API. | ||
13 | */ | ||
14 | |||
15 | #define FSCACHE_DEBUG_LEVEL COOKIE | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include "internal.h" | ||
19 | |||
20 | struct kmem_cache *fscache_cookie_jar; | ||
21 | |||
22 | static atomic_t fscache_object_debug_id = ATOMIC_INIT(0); | ||
23 | |||
24 | static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie); | ||
25 | static int fscache_alloc_object(struct fscache_cache *cache, | ||
26 | struct fscache_cookie *cookie); | ||
27 | static int fscache_attach_object(struct fscache_cookie *cookie, | ||
28 | struct fscache_object *object); | ||
29 | |||
30 | /* | ||
31 | * initialise an cookie jar slab element prior to any use | ||
32 | */ | ||
33 | void fscache_cookie_init_once(void *_cookie) | ||
34 | { | ||
35 | struct fscache_cookie *cookie = _cookie; | ||
36 | |||
37 | memset(cookie, 0, sizeof(*cookie)); | ||
38 | spin_lock_init(&cookie->lock); | ||
39 | INIT_HLIST_HEAD(&cookie->backing_objects); | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * request a cookie to represent an object (index, datafile, xattr, etc) | ||
44 | * - parent specifies the parent object | ||
45 | * - the top level index cookie for each netfs is stored in the fscache_netfs | ||
46 | * struct upon registration | ||
47 | * - def points to the definition | ||
48 | * - the netfs_data will be passed to the functions pointed to in *def | ||
49 | * - all attached caches will be searched to see if they contain this object | ||
50 | * - index objects aren't stored on disk until there's a dependent file that | ||
51 | * needs storing | ||
52 | * - other objects are stored in a selected cache immediately, and all the | ||
53 | * indices forming the path to it are instantiated if necessary | ||
54 | * - we never let on to the netfs about errors | ||
55 | * - we may set a negative cookie pointer, but that's okay | ||
56 | */ | ||
57 | struct fscache_cookie *__fscache_acquire_cookie( | ||
58 | struct fscache_cookie *parent, | ||
59 | const struct fscache_cookie_def *def, | ||
60 | void *netfs_data) | ||
61 | { | ||
62 | struct fscache_cookie *cookie; | ||
63 | |||
64 | BUG_ON(!def); | ||
65 | |||
66 | _enter("{%s},{%s},%p", | ||
67 | parent ? (char *) parent->def->name : "<no-parent>", | ||
68 | def->name, netfs_data); | ||
69 | |||
70 | fscache_stat(&fscache_n_acquires); | ||
71 | |||
72 | /* if there's no parent cookie, then we don't create one here either */ | ||
73 | if (!parent) { | ||
74 | fscache_stat(&fscache_n_acquires_null); | ||
75 | _leave(" [no parent]"); | ||
76 | return NULL; | ||
77 | } | ||
78 | |||
79 | /* validate the definition */ | ||
80 | BUG_ON(!def->get_key); | ||
81 | BUG_ON(!def->name[0]); | ||
82 | |||
83 | BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX && | ||
84 | parent->def->type != FSCACHE_COOKIE_TYPE_INDEX); | ||
85 | |||
86 | /* allocate and initialise a cookie */ | ||
87 | cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); | ||
88 | if (!cookie) { | ||
89 | fscache_stat(&fscache_n_acquires_oom); | ||
90 | _leave(" [ENOMEM]"); | ||
91 | return NULL; | ||
92 | } | ||
93 | |||
94 | atomic_set(&cookie->usage, 1); | ||
95 | atomic_set(&cookie->n_children, 0); | ||
96 | |||
97 | atomic_inc(&parent->usage); | ||
98 | atomic_inc(&parent->n_children); | ||
99 | |||
100 | cookie->def = def; | ||
101 | cookie->parent = parent; | ||
102 | cookie->netfs_data = netfs_data; | ||
103 | cookie->flags = 0; | ||
104 | |||
105 | INIT_RADIX_TREE(&cookie->stores, GFP_NOFS); | ||
106 | |||
107 | switch (cookie->def->type) { | ||
108 | case FSCACHE_COOKIE_TYPE_INDEX: | ||
109 | fscache_stat(&fscache_n_cookie_index); | ||
110 | break; | ||
111 | case FSCACHE_COOKIE_TYPE_DATAFILE: | ||
112 | fscache_stat(&fscache_n_cookie_data); | ||
113 | break; | ||
114 | default: | ||
115 | fscache_stat(&fscache_n_cookie_special); | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | /* if the object is an index then we need do nothing more here - we | ||
120 | * create indices on disk when we need them as an index may exist in | ||
121 | * multiple caches */ | ||
122 | if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { | ||
123 | if (fscache_acquire_non_index_cookie(cookie) < 0) { | ||
124 | atomic_dec(&parent->n_children); | ||
125 | __fscache_cookie_put(cookie); | ||
126 | fscache_stat(&fscache_n_acquires_nobufs); | ||
127 | _leave(" = NULL"); | ||
128 | return NULL; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | fscache_stat(&fscache_n_acquires_ok); | ||
133 | _leave(" = %p", cookie); | ||
134 | return cookie; | ||
135 | } | ||
136 | EXPORT_SYMBOL(__fscache_acquire_cookie); | ||
137 | |||
138 | /* | ||
139 | * acquire a non-index cookie | ||
140 | * - this must make sure the index chain is instantiated and instantiate the | ||
141 | * object representation too | ||
142 | */ | ||
143 | static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) | ||
144 | { | ||
145 | struct fscache_object *object; | ||
146 | struct fscache_cache *cache; | ||
147 | uint64_t i_size; | ||
148 | int ret; | ||
149 | |||
150 | _enter(""); | ||
151 | |||
152 | cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE; | ||
153 | |||
154 | /* now we need to see whether the backing objects for this cookie yet | ||
155 | * exist, if not there'll be nothing to search */ | ||
156 | down_read(&fscache_addremove_sem); | ||
157 | |||
158 | if (list_empty(&fscache_cache_list)) { | ||
159 | up_read(&fscache_addremove_sem); | ||
160 | _leave(" = 0 [no caches]"); | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | /* select a cache in which to store the object */ | ||
165 | cache = fscache_select_cache_for_object(cookie->parent); | ||
166 | if (!cache) { | ||
167 | up_read(&fscache_addremove_sem); | ||
168 | fscache_stat(&fscache_n_acquires_no_cache); | ||
169 | _leave(" = -ENOMEDIUM [no cache]"); | ||
170 | return -ENOMEDIUM; | ||
171 | } | ||
172 | |||
173 | _debug("cache %s", cache->tag->name); | ||
174 | |||
175 | cookie->flags = | ||
176 | (1 << FSCACHE_COOKIE_LOOKING_UP) | | ||
177 | (1 << FSCACHE_COOKIE_CREATING) | | ||
178 | (1 << FSCACHE_COOKIE_NO_DATA_YET); | ||
179 | |||
180 | /* ask the cache to allocate objects for this cookie and its parent | ||
181 | * chain */ | ||
182 | ret = fscache_alloc_object(cache, cookie); | ||
183 | if (ret < 0) { | ||
184 | up_read(&fscache_addremove_sem); | ||
185 | _leave(" = %d", ret); | ||
186 | return ret; | ||
187 | } | ||
188 | |||
189 | /* pass on how big the object we're caching is supposed to be */ | ||
190 | cookie->def->get_attr(cookie->netfs_data, &i_size); | ||
191 | |||
192 | spin_lock(&cookie->lock); | ||
193 | if (hlist_empty(&cookie->backing_objects)) { | ||
194 | spin_unlock(&cookie->lock); | ||
195 | goto unavailable; | ||
196 | } | ||
197 | |||
198 | object = hlist_entry(cookie->backing_objects.first, | ||
199 | struct fscache_object, cookie_link); | ||
200 | |||
201 | fscache_set_store_limit(object, i_size); | ||
202 | |||
203 | /* initiate the process of looking up all the objects in the chain | ||
204 | * (done by fscache_initialise_object()) */ | ||
205 | fscache_enqueue_object(object); | ||
206 | |||
207 | spin_unlock(&cookie->lock); | ||
208 | |||
209 | /* we may be required to wait for lookup to complete at this point */ | ||
210 | if (!fscache_defer_lookup) { | ||
211 | _debug("non-deferred lookup %p", &cookie->flags); | ||
212 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, | ||
213 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
214 | _debug("complete"); | ||
215 | if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags)) | ||
216 | goto unavailable; | ||
217 | } | ||
218 | |||
219 | up_read(&fscache_addremove_sem); | ||
220 | _leave(" = 0 [deferred]"); | ||
221 | return 0; | ||
222 | |||
223 | unavailable: | ||
224 | up_read(&fscache_addremove_sem); | ||
225 | _leave(" = -ENOBUFS"); | ||
226 | return -ENOBUFS; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * recursively allocate cache object records for a cookie/cache combination | ||
231 | * - caller must be holding the addremove sem | ||
232 | */ | ||
233 | static int fscache_alloc_object(struct fscache_cache *cache, | ||
234 | struct fscache_cookie *cookie) | ||
235 | { | ||
236 | struct fscache_object *object; | ||
237 | struct hlist_node *_n; | ||
238 | int ret; | ||
239 | |||
240 | _enter("%p,%p{%s}", cache, cookie, cookie->def->name); | ||
241 | |||
242 | spin_lock(&cookie->lock); | ||
243 | hlist_for_each_entry(object, _n, &cookie->backing_objects, | ||
244 | cookie_link) { | ||
245 | if (object->cache == cache) | ||
246 | goto object_already_extant; | ||
247 | } | ||
248 | spin_unlock(&cookie->lock); | ||
249 | |||
250 | /* ask the cache to allocate an object (we may end up with duplicate | ||
251 | * objects at this stage, but we sort that out later) */ | ||
252 | object = cache->ops->alloc_object(cache, cookie); | ||
253 | if (IS_ERR(object)) { | ||
254 | fscache_stat(&fscache_n_object_no_alloc); | ||
255 | ret = PTR_ERR(object); | ||
256 | goto error; | ||
257 | } | ||
258 | |||
259 | fscache_stat(&fscache_n_object_alloc); | ||
260 | |||
261 | object->debug_id = atomic_inc_return(&fscache_object_debug_id); | ||
262 | |||
263 | _debug("ALLOC OBJ%x: %s {%lx}", | ||
264 | object->debug_id, cookie->def->name, object->events); | ||
265 | |||
266 | ret = fscache_alloc_object(cache, cookie->parent); | ||
267 | if (ret < 0) | ||
268 | goto error_put; | ||
269 | |||
270 | /* only attach if we managed to allocate all we needed, otherwise | ||
271 | * discard the object we just allocated and instead use the one | ||
272 | * attached to the cookie */ | ||
273 | if (fscache_attach_object(cookie, object) < 0) | ||
274 | cache->ops->put_object(object); | ||
275 | |||
276 | _leave(" = 0"); | ||
277 | return 0; | ||
278 | |||
279 | object_already_extant: | ||
280 | ret = -ENOBUFS; | ||
281 | if (object->state >= FSCACHE_OBJECT_DYING) { | ||
282 | spin_unlock(&cookie->lock); | ||
283 | goto error; | ||
284 | } | ||
285 | spin_unlock(&cookie->lock); | ||
286 | _leave(" = 0 [found]"); | ||
287 | return 0; | ||
288 | |||
289 | error_put: | ||
290 | cache->ops->put_object(object); | ||
291 | error: | ||
292 | _leave(" = %d", ret); | ||
293 | return ret; | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * attach a cache object to a cookie | ||
298 | */ | ||
299 | static int fscache_attach_object(struct fscache_cookie *cookie, | ||
300 | struct fscache_object *object) | ||
301 | { | ||
302 | struct fscache_object *p; | ||
303 | struct fscache_cache *cache = object->cache; | ||
304 | struct hlist_node *_n; | ||
305 | int ret; | ||
306 | |||
307 | _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); | ||
308 | |||
309 | spin_lock(&cookie->lock); | ||
310 | |||
311 | /* there may be multiple initial creations of this object, but we only | ||
312 | * want one */ | ||
313 | ret = -EEXIST; | ||
314 | hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) { | ||
315 | if (p->cache == object->cache) { | ||
316 | if (p->state >= FSCACHE_OBJECT_DYING) | ||
317 | ret = -ENOBUFS; | ||
318 | goto cant_attach_object; | ||
319 | } | ||
320 | } | ||
321 | |||
322 | /* pin the parent object */ | ||
323 | spin_lock_nested(&cookie->parent->lock, 1); | ||
324 | hlist_for_each_entry(p, _n, &cookie->parent->backing_objects, | ||
325 | cookie_link) { | ||
326 | if (p->cache == object->cache) { | ||
327 | if (p->state >= FSCACHE_OBJECT_DYING) { | ||
328 | ret = -ENOBUFS; | ||
329 | spin_unlock(&cookie->parent->lock); | ||
330 | goto cant_attach_object; | ||
331 | } | ||
332 | object->parent = p; | ||
333 | spin_lock(&p->lock); | ||
334 | p->n_children++; | ||
335 | spin_unlock(&p->lock); | ||
336 | break; | ||
337 | } | ||
338 | } | ||
339 | spin_unlock(&cookie->parent->lock); | ||
340 | |||
341 | /* attach to the cache's object list */ | ||
342 | if (list_empty(&object->cache_link)) { | ||
343 | spin_lock(&cache->object_list_lock); | ||
344 | list_add(&object->cache_link, &cache->object_list); | ||
345 | spin_unlock(&cache->object_list_lock); | ||
346 | } | ||
347 | |||
348 | /* attach to the cookie */ | ||
349 | object->cookie = cookie; | ||
350 | atomic_inc(&cookie->usage); | ||
351 | hlist_add_head(&object->cookie_link, &cookie->backing_objects); | ||
352 | ret = 0; | ||
353 | |||
354 | cant_attach_object: | ||
355 | spin_unlock(&cookie->lock); | ||
356 | _leave(" = %d", ret); | ||
357 | return ret; | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * update the index entries backing a cookie | ||
362 | */ | ||
363 | void __fscache_update_cookie(struct fscache_cookie *cookie) | ||
364 | { | ||
365 | struct fscache_object *object; | ||
366 | struct hlist_node *_p; | ||
367 | |||
368 | fscache_stat(&fscache_n_updates); | ||
369 | |||
370 | if (!cookie) { | ||
371 | fscache_stat(&fscache_n_updates_null); | ||
372 | _leave(" [no cookie]"); | ||
373 | return; | ||
374 | } | ||
375 | |||
376 | _enter("{%s}", cookie->def->name); | ||
377 | |||
378 | BUG_ON(!cookie->def->get_aux); | ||
379 | |||
380 | spin_lock(&cookie->lock); | ||
381 | |||
382 | /* update the index entry on disk in each cache backing this cookie */ | ||
383 | hlist_for_each_entry(object, _p, | ||
384 | &cookie->backing_objects, cookie_link) { | ||
385 | fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); | ||
386 | } | ||
387 | |||
388 | spin_unlock(&cookie->lock); | ||
389 | _leave(""); | ||
390 | } | ||
391 | EXPORT_SYMBOL(__fscache_update_cookie); | ||
392 | |||
393 | /* | ||
394 | * release a cookie back to the cache | ||
395 | * - the object will be marked as recyclable on disk if retire is true | ||
396 | * - all dependents of this cookie must have already been unregistered | ||
397 | * (indices/files/pages) | ||
398 | */ | ||
399 | void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | ||
400 | { | ||
401 | struct fscache_cache *cache; | ||
402 | struct fscache_object *object; | ||
403 | unsigned long event; | ||
404 | |||
405 | fscache_stat(&fscache_n_relinquishes); | ||
406 | |||
407 | if (!cookie) { | ||
408 | fscache_stat(&fscache_n_relinquishes_null); | ||
409 | _leave(" [no cookie]"); | ||
410 | return; | ||
411 | } | ||
412 | |||
413 | _enter("%p{%s,%p},%d", | ||
414 | cookie, cookie->def->name, cookie->netfs_data, retire); | ||
415 | |||
416 | if (atomic_read(&cookie->n_children) != 0) { | ||
417 | printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", | ||
418 | cookie->def->name); | ||
419 | BUG(); | ||
420 | } | ||
421 | |||
422 | /* wait for the cookie to finish being instantiated (or to fail) */ | ||
423 | if (test_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) { | ||
424 | fscache_stat(&fscache_n_relinquishes_waitcrt); | ||
425 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_CREATING, | ||
426 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
427 | } | ||
428 | |||
429 | event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; | ||
430 | |||
431 | /* detach pointers back to the netfs */ | ||
432 | spin_lock(&cookie->lock); | ||
433 | |||
434 | cookie->netfs_data = NULL; | ||
435 | cookie->def = NULL; | ||
436 | |||
437 | /* break links with all the active objects */ | ||
438 | while (!hlist_empty(&cookie->backing_objects)) { | ||
439 | object = hlist_entry(cookie->backing_objects.first, | ||
440 | struct fscache_object, | ||
441 | cookie_link); | ||
442 | |||
443 | _debug("RELEASE OBJ%x", object->debug_id); | ||
444 | |||
445 | /* detach each cache object from the object cookie */ | ||
446 | spin_lock(&object->lock); | ||
447 | hlist_del_init(&object->cookie_link); | ||
448 | |||
449 | cache = object->cache; | ||
450 | object->cookie = NULL; | ||
451 | fscache_raise_event(object, event); | ||
452 | spin_unlock(&object->lock); | ||
453 | |||
454 | if (atomic_dec_and_test(&cookie->usage)) | ||
455 | /* the cookie refcount shouldn't be reduced to 0 yet */ | ||
456 | BUG(); | ||
457 | } | ||
458 | |||
459 | spin_unlock(&cookie->lock); | ||
460 | |||
461 | if (cookie->parent) { | ||
462 | ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); | ||
463 | ASSERTCMP(atomic_read(&cookie->parent->n_children), >, 0); | ||
464 | atomic_dec(&cookie->parent->n_children); | ||
465 | } | ||
466 | |||
467 | /* finally dispose of the cookie */ | ||
468 | ASSERTCMP(atomic_read(&cookie->usage), >, 0); | ||
469 | fscache_cookie_put(cookie); | ||
470 | |||
471 | _leave(""); | ||
472 | } | ||
473 | EXPORT_SYMBOL(__fscache_relinquish_cookie); | ||
474 | |||
475 | /* | ||
476 | * destroy a cookie | ||
477 | */ | ||
478 | void __fscache_cookie_put(struct fscache_cookie *cookie) | ||
479 | { | ||
480 | struct fscache_cookie *parent; | ||
481 | |||
482 | _enter("%p", cookie); | ||
483 | |||
484 | for (;;) { | ||
485 | _debug("FREE COOKIE %p", cookie); | ||
486 | parent = cookie->parent; | ||
487 | BUG_ON(!hlist_empty(&cookie->backing_objects)); | ||
488 | kmem_cache_free(fscache_cookie_jar, cookie); | ||
489 | |||
490 | if (!parent) | ||
491 | break; | ||
492 | |||
493 | cookie = parent; | ||
494 | BUG_ON(atomic_read(&cookie->usage) <= 0); | ||
495 | if (!atomic_dec_and_test(&cookie->usage)) | ||
496 | break; | ||
497 | } | ||
498 | |||
499 | _leave(""); | ||
500 | } | ||
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c new file mode 100644 index 000000000000..f5b4baee7352 --- /dev/null +++ b/fs/fscache/fsdef.c | |||
@@ -0,0 +1,144 @@ | |||
1 | /* Filesystem index definition | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL CACHE | ||
13 | #include <linux/module.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, | ||
17 | void *buffer, uint16_t bufmax); | ||
18 | |||
19 | static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, | ||
20 | void *buffer, uint16_t bufmax); | ||
21 | |||
22 | static | ||
23 | enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data, | ||
24 | const void *data, | ||
25 | uint16_t datalen); | ||
26 | |||
27 | /* | ||
28 | * The root index is owned by FS-Cache itself. | ||
29 | * | ||
30 | * When a netfs requests caching facilities, FS-Cache will, if one doesn't | ||
31 | * already exist, create an entry in the root index with the key being the name | ||
32 | * of the netfs ("AFS" for example), and the auxiliary data holding the index | ||
33 | * structure version supplied by the netfs: | ||
34 | * | ||
35 | * FSDEF | ||
36 | * | | ||
37 | * +-----------+ | ||
38 | * | | | ||
39 | * NFS AFS | ||
40 | * [v=1] [v=1] | ||
41 | * | ||
42 | * If an entry with the appropriate name does already exist, the version is | ||
43 | * compared. If the version is different, the entire subtree from that entry | ||
44 | * will be discarded and a new entry created. | ||
45 | * | ||
46 | * The new entry will be an index, and a cookie referring to it will be passed | ||
47 | * to the netfs. This is then the root handle by which the netfs accesses the | ||
48 | * cache. It can create whatever objects it likes in that index, including | ||
49 | * further indices. | ||
50 | */ | ||
51 | static struct fscache_cookie_def fscache_fsdef_index_def = { | ||
52 | .name = ".FS-Cache", | ||
53 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
54 | }; | ||
55 | |||
56 | struct fscache_cookie fscache_fsdef_index = { | ||
57 | .usage = ATOMIC_INIT(1), | ||
58 | .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), | ||
59 | .backing_objects = HLIST_HEAD_INIT, | ||
60 | .def = &fscache_fsdef_index_def, | ||
61 | }; | ||
62 | EXPORT_SYMBOL(fscache_fsdef_index); | ||
63 | |||
64 | /* | ||
65 | * Definition of an entry in the root index. Each entry is an index, keyed to | ||
66 | * a specific netfs and only applicable to a particular version of the index | ||
67 | * structure used by that netfs. | ||
68 | */ | ||
69 | struct fscache_cookie_def fscache_fsdef_netfs_def = { | ||
70 | .name = "FSDEF.netfs", | ||
71 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
72 | .get_key = fscache_fsdef_netfs_get_key, | ||
73 | .get_aux = fscache_fsdef_netfs_get_aux, | ||
74 | .check_aux = fscache_fsdef_netfs_check_aux, | ||
75 | }; | ||
76 | |||
77 | /* | ||
78 | * get the key data for an FSDEF index record - this is the name of the netfs | ||
79 | * for which this entry is created | ||
80 | */ | ||
81 | static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, | ||
82 | void *buffer, uint16_t bufmax) | ||
83 | { | ||
84 | const struct fscache_netfs *netfs = cookie_netfs_data; | ||
85 | unsigned klen; | ||
86 | |||
87 | _enter("{%s.%u},", netfs->name, netfs->version); | ||
88 | |||
89 | klen = strlen(netfs->name); | ||
90 | if (klen > bufmax) | ||
91 | return 0; | ||
92 | |||
93 | memcpy(buffer, netfs->name, klen); | ||
94 | return klen; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * get the auxiliary data for an FSDEF index record - this is the index | ||
99 | * structure version number of the netfs for which this version is created | ||
100 | */ | ||
101 | static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, | ||
102 | void *buffer, uint16_t bufmax) | ||
103 | { | ||
104 | const struct fscache_netfs *netfs = cookie_netfs_data; | ||
105 | unsigned dlen; | ||
106 | |||
107 | _enter("{%s.%u},", netfs->name, netfs->version); | ||
108 | |||
109 | dlen = sizeof(uint32_t); | ||
110 | if (dlen > bufmax) | ||
111 | return 0; | ||
112 | |||
113 | memcpy(buffer, &netfs->version, dlen); | ||
114 | return dlen; | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * check that the index structure version number stored in the auxiliary data | ||
119 | * matches the one the netfs gave us | ||
120 | */ | ||
121 | static enum fscache_checkaux fscache_fsdef_netfs_check_aux( | ||
122 | void *cookie_netfs_data, | ||
123 | const void *data, | ||
124 | uint16_t datalen) | ||
125 | { | ||
126 | struct fscache_netfs *netfs = cookie_netfs_data; | ||
127 | uint32_t version; | ||
128 | |||
129 | _enter("{%s},,%hu", netfs->name, datalen); | ||
130 | |||
131 | if (datalen != sizeof(version)) { | ||
132 | _leave(" = OBSOLETE [dl=%d v=%zu]", datalen, sizeof(version)); | ||
133 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
134 | } | ||
135 | |||
136 | memcpy(&version, data, sizeof(version)); | ||
137 | if (version != netfs->version) { | ||
138 | _leave(" = OBSOLETE [ver=%x net=%x]", version, netfs->version); | ||
139 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
140 | } | ||
141 | |||
142 | _leave(" = OKAY"); | ||
143 | return FSCACHE_CHECKAUX_OKAY; | ||
144 | } | ||
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c new file mode 100644 index 000000000000..bad496748a59 --- /dev/null +++ b/fs/fscache/histogram.c | |||
@@ -0,0 +1,109 @@ | |||
1 | /* FS-Cache latency histogram | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL THREAD | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/proc_fs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | atomic_t fscache_obj_instantiate_histogram[HZ]; | ||
19 | atomic_t fscache_objs_histogram[HZ]; | ||
20 | atomic_t fscache_ops_histogram[HZ]; | ||
21 | atomic_t fscache_retrieval_delay_histogram[HZ]; | ||
22 | atomic_t fscache_retrieval_histogram[HZ]; | ||
23 | |||
24 | /* | ||
25 | * display the time-taken histogram | ||
26 | */ | ||
27 | static int fscache_histogram_show(struct seq_file *m, void *v) | ||
28 | { | ||
29 | unsigned long index; | ||
30 | unsigned n[5], t; | ||
31 | |||
32 | switch ((unsigned long) v) { | ||
33 | case 1: | ||
34 | seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS " | ||
35 | " RETRV DLY RETRIEVLS\n"); | ||
36 | return 0; | ||
37 | case 2: | ||
38 | seq_puts(m, "===== ===== ========= ========= =========" | ||
39 | " ========= =========\n"); | ||
40 | return 0; | ||
41 | default: | ||
42 | index = (unsigned long) v - 3; | ||
43 | n[0] = atomic_read(&fscache_obj_instantiate_histogram[index]); | ||
44 | n[1] = atomic_read(&fscache_ops_histogram[index]); | ||
45 | n[2] = atomic_read(&fscache_objs_histogram[index]); | ||
46 | n[3] = atomic_read(&fscache_retrieval_delay_histogram[index]); | ||
47 | n[4] = atomic_read(&fscache_retrieval_histogram[index]); | ||
48 | if (!(n[0] | n[1] | n[2] | n[3] | n[4])) | ||
49 | return 0; | ||
50 | |||
51 | t = (index * 1000) / HZ; | ||
52 | |||
53 | seq_printf(m, "%4lu 0.%03u %9u %9u %9u %9u %9u\n", | ||
54 | index, t, n[0], n[1], n[2], n[3], n[4]); | ||
55 | return 0; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * set up the iterator to start reading from the first line | ||
61 | */ | ||
62 | static void *fscache_histogram_start(struct seq_file *m, loff_t *_pos) | ||
63 | { | ||
64 | if ((unsigned long long)*_pos >= HZ + 2) | ||
65 | return NULL; | ||
66 | if (*_pos == 0) | ||
67 | *_pos = 1; | ||
68 | return (void *)(unsigned long) *_pos; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * move to the next line | ||
73 | */ | ||
74 | static void *fscache_histogram_next(struct seq_file *m, void *v, loff_t *pos) | ||
75 | { | ||
76 | (*pos)++; | ||
77 | return (unsigned long long)*pos > HZ + 2 ? | ||
78 | NULL : (void *)(unsigned long) *pos; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * clean up after reading | ||
83 | */ | ||
84 | static void fscache_histogram_stop(struct seq_file *m, void *v) | ||
85 | { | ||
86 | } | ||
87 | |||
88 | static const struct seq_operations fscache_histogram_ops = { | ||
89 | .start = fscache_histogram_start, | ||
90 | .stop = fscache_histogram_stop, | ||
91 | .next = fscache_histogram_next, | ||
92 | .show = fscache_histogram_show, | ||
93 | }; | ||
94 | |||
95 | /* | ||
96 | * open "/proc/fs/fscache/histogram" to provide latency data | ||
97 | */ | ||
98 | static int fscache_histogram_open(struct inode *inode, struct file *file) | ||
99 | { | ||
100 | return seq_open(file, &fscache_histogram_ops); | ||
101 | } | ||
102 | |||
103 | const struct file_operations fscache_histogram_fops = { | ||
104 | .owner = THIS_MODULE, | ||
105 | .open = fscache_histogram_open, | ||
106 | .read = seq_read, | ||
107 | .llseek = seq_lseek, | ||
108 | .release = seq_release, | ||
109 | }; | ||
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h new file mode 100644 index 000000000000..e0cbd16f6dc9 --- /dev/null +++ b/fs/fscache/internal.h | |||
@@ -0,0 +1,380 @@ | |||
1 | /* Internal definitions for FS-Cache | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * Lock order, in the order in which multiple locks should be obtained: | ||
14 | * - fscache_addremove_sem | ||
15 | * - cookie->lock | ||
16 | * - cookie->parent->lock | ||
17 | * - cache->object_list_lock | ||
18 | * - object->lock | ||
19 | * - object->parent->lock | ||
20 | * - fscache_thread_lock | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/fscache-cache.h> | ||
25 | #include <linux/sched.h> | ||
26 | |||
27 | #define FSCACHE_MIN_THREADS 4 | ||
28 | #define FSCACHE_MAX_THREADS 32 | ||
29 | |||
30 | /* | ||
31 | * fsc-cache.c | ||
32 | */ | ||
33 | extern struct list_head fscache_cache_list; | ||
34 | extern struct rw_semaphore fscache_addremove_sem; | ||
35 | |||
36 | extern struct fscache_cache *fscache_select_cache_for_object( | ||
37 | struct fscache_cookie *); | ||
38 | |||
39 | /* | ||
40 | * fsc-cookie.c | ||
41 | */ | ||
42 | extern struct kmem_cache *fscache_cookie_jar; | ||
43 | |||
44 | extern void fscache_cookie_init_once(void *); | ||
45 | extern void __fscache_cookie_put(struct fscache_cookie *); | ||
46 | |||
47 | /* | ||
48 | * fsc-fsdef.c | ||
49 | */ | ||
50 | extern struct fscache_cookie fscache_fsdef_index; | ||
51 | extern struct fscache_cookie_def fscache_fsdef_netfs_def; | ||
52 | |||
53 | /* | ||
54 | * fsc-histogram.c | ||
55 | */ | ||
56 | #ifdef CONFIG_FSCACHE_HISTOGRAM | ||
57 | extern atomic_t fscache_obj_instantiate_histogram[HZ]; | ||
58 | extern atomic_t fscache_objs_histogram[HZ]; | ||
59 | extern atomic_t fscache_ops_histogram[HZ]; | ||
60 | extern atomic_t fscache_retrieval_delay_histogram[HZ]; | ||
61 | extern atomic_t fscache_retrieval_histogram[HZ]; | ||
62 | |||
63 | static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif) | ||
64 | { | ||
65 | unsigned long jif = jiffies - start_jif; | ||
66 | if (jif >= HZ) | ||
67 | jif = HZ - 1; | ||
68 | atomic_inc(&histogram[jif]); | ||
69 | } | ||
70 | |||
71 | extern const struct file_operations fscache_histogram_fops; | ||
72 | |||
73 | #else | ||
74 | #define fscache_hist(hist, start_jif) do {} while (0) | ||
75 | #endif | ||
76 | |||
77 | /* | ||
78 | * fsc-main.c | ||
79 | */ | ||
80 | extern unsigned fscache_defer_lookup; | ||
81 | extern unsigned fscache_defer_create; | ||
82 | extern unsigned fscache_debug; | ||
83 | extern struct kobject *fscache_root; | ||
84 | |||
85 | extern int fscache_wait_bit(void *); | ||
86 | extern int fscache_wait_bit_interruptible(void *); | ||
87 | |||
88 | /* | ||
89 | * fsc-object.c | ||
90 | */ | ||
91 | extern void fscache_withdrawing_object(struct fscache_cache *, | ||
92 | struct fscache_object *); | ||
93 | extern void fscache_enqueue_object(struct fscache_object *); | ||
94 | |||
95 | /* | ||
96 | * fsc-operation.c | ||
97 | */ | ||
98 | extern int fscache_submit_exclusive_op(struct fscache_object *, | ||
99 | struct fscache_operation *); | ||
100 | extern int fscache_submit_op(struct fscache_object *, | ||
101 | struct fscache_operation *); | ||
102 | extern void fscache_abort_object(struct fscache_object *); | ||
103 | extern void fscache_start_operations(struct fscache_object *); | ||
104 | extern void fscache_operation_gc(struct work_struct *); | ||
105 | |||
106 | /* | ||
107 | * fsc-proc.c | ||
108 | */ | ||
109 | #ifdef CONFIG_PROC_FS | ||
110 | extern int __init fscache_proc_init(void); | ||
111 | extern void fscache_proc_cleanup(void); | ||
112 | #else | ||
113 | #define fscache_proc_init() (0) | ||
114 | #define fscache_proc_cleanup() do {} while (0) | ||
115 | #endif | ||
116 | |||
117 | /* | ||
118 | * fsc-stats.c | ||
119 | */ | ||
120 | #ifdef CONFIG_FSCACHE_STATS | ||
121 | extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; | ||
122 | extern atomic_t fscache_n_objs_processed[FSCACHE_MAX_THREADS]; | ||
123 | |||
124 | extern atomic_t fscache_n_op_pend; | ||
125 | extern atomic_t fscache_n_op_run; | ||
126 | extern atomic_t fscache_n_op_enqueue; | ||
127 | extern atomic_t fscache_n_op_deferred_release; | ||
128 | extern atomic_t fscache_n_op_release; | ||
129 | extern atomic_t fscache_n_op_gc; | ||
130 | |||
131 | extern atomic_t fscache_n_attr_changed; | ||
132 | extern atomic_t fscache_n_attr_changed_ok; | ||
133 | extern atomic_t fscache_n_attr_changed_nobufs; | ||
134 | extern atomic_t fscache_n_attr_changed_nomem; | ||
135 | extern atomic_t fscache_n_attr_changed_calls; | ||
136 | |||
137 | extern atomic_t fscache_n_allocs; | ||
138 | extern atomic_t fscache_n_allocs_ok; | ||
139 | extern atomic_t fscache_n_allocs_wait; | ||
140 | extern atomic_t fscache_n_allocs_nobufs; | ||
141 | extern atomic_t fscache_n_alloc_ops; | ||
142 | extern atomic_t fscache_n_alloc_op_waits; | ||
143 | |||
144 | extern atomic_t fscache_n_retrievals; | ||
145 | extern atomic_t fscache_n_retrievals_ok; | ||
146 | extern atomic_t fscache_n_retrievals_wait; | ||
147 | extern atomic_t fscache_n_retrievals_nodata; | ||
148 | extern atomic_t fscache_n_retrievals_nobufs; | ||
149 | extern atomic_t fscache_n_retrievals_intr; | ||
150 | extern atomic_t fscache_n_retrievals_nomem; | ||
151 | extern atomic_t fscache_n_retrieval_ops; | ||
152 | extern atomic_t fscache_n_retrieval_op_waits; | ||
153 | |||
154 | extern atomic_t fscache_n_stores; | ||
155 | extern atomic_t fscache_n_stores_ok; | ||
156 | extern atomic_t fscache_n_stores_again; | ||
157 | extern atomic_t fscache_n_stores_nobufs; | ||
158 | extern atomic_t fscache_n_stores_oom; | ||
159 | extern atomic_t fscache_n_store_ops; | ||
160 | extern atomic_t fscache_n_store_calls; | ||
161 | |||
162 | extern atomic_t fscache_n_marks; | ||
163 | extern atomic_t fscache_n_uncaches; | ||
164 | |||
165 | extern atomic_t fscache_n_acquires; | ||
166 | extern atomic_t fscache_n_acquires_null; | ||
167 | extern atomic_t fscache_n_acquires_no_cache; | ||
168 | extern atomic_t fscache_n_acquires_ok; | ||
169 | extern atomic_t fscache_n_acquires_nobufs; | ||
170 | extern atomic_t fscache_n_acquires_oom; | ||
171 | |||
172 | extern atomic_t fscache_n_updates; | ||
173 | extern atomic_t fscache_n_updates_null; | ||
174 | extern atomic_t fscache_n_updates_run; | ||
175 | |||
176 | extern atomic_t fscache_n_relinquishes; | ||
177 | extern atomic_t fscache_n_relinquishes_null; | ||
178 | extern atomic_t fscache_n_relinquishes_waitcrt; | ||
179 | |||
180 | extern atomic_t fscache_n_cookie_index; | ||
181 | extern atomic_t fscache_n_cookie_data; | ||
182 | extern atomic_t fscache_n_cookie_special; | ||
183 | |||
184 | extern atomic_t fscache_n_object_alloc; | ||
185 | extern atomic_t fscache_n_object_no_alloc; | ||
186 | extern atomic_t fscache_n_object_lookups; | ||
187 | extern atomic_t fscache_n_object_lookups_negative; | ||
188 | extern atomic_t fscache_n_object_lookups_positive; | ||
189 | extern atomic_t fscache_n_object_created; | ||
190 | extern atomic_t fscache_n_object_avail; | ||
191 | extern atomic_t fscache_n_object_dead; | ||
192 | |||
193 | extern atomic_t fscache_n_checkaux_none; | ||
194 | extern atomic_t fscache_n_checkaux_okay; | ||
195 | extern atomic_t fscache_n_checkaux_update; | ||
196 | extern atomic_t fscache_n_checkaux_obsolete; | ||
197 | |||
198 | static inline void fscache_stat(atomic_t *stat) | ||
199 | { | ||
200 | atomic_inc(stat); | ||
201 | } | ||
202 | |||
203 | extern const struct file_operations fscache_stats_fops; | ||
204 | #else | ||
205 | |||
206 | #define fscache_stat(stat) do {} while (0) | ||
207 | #endif | ||
208 | |||
209 | /* | ||
210 | * raise an event on an object | ||
211 | * - if the event is not masked for that object, then the object is | ||
212 | * queued for attention by the thread pool. | ||
213 | */ | ||
214 | static inline void fscache_raise_event(struct fscache_object *object, | ||
215 | unsigned event) | ||
216 | { | ||
217 | if (!test_and_set_bit(event, &object->events) && | ||
218 | test_bit(event, &object->event_mask)) | ||
219 | fscache_enqueue_object(object); | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * drop a reference to a cookie | ||
224 | */ | ||
225 | static inline void fscache_cookie_put(struct fscache_cookie *cookie) | ||
226 | { | ||
227 | BUG_ON(atomic_read(&cookie->usage) <= 0); | ||
228 | if (atomic_dec_and_test(&cookie->usage)) | ||
229 | __fscache_cookie_put(cookie); | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * get an extra reference to a netfs retrieval context | ||
234 | */ | ||
235 | static inline | ||
236 | void *fscache_get_context(struct fscache_cookie *cookie, void *context) | ||
237 | { | ||
238 | if (cookie->def->get_context) | ||
239 | cookie->def->get_context(cookie->netfs_data, context); | ||
240 | return context; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * release a reference to a netfs retrieval context | ||
245 | */ | ||
246 | static inline | ||
247 | void fscache_put_context(struct fscache_cookie *cookie, void *context) | ||
248 | { | ||
249 | if (cookie->def->put_context) | ||
250 | cookie->def->put_context(cookie->netfs_data, context); | ||
251 | } | ||
252 | |||
253 | /*****************************************************************************/ | ||
254 | /* | ||
255 | * debug tracing | ||
256 | */ | ||
257 | #define dbgprintk(FMT, ...) \ | ||
258 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | ||
259 | |||
260 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
261 | static inline __attribute__((format(printf, 1, 2))) | ||
262 | void _dbprintk(const char *fmt, ...) | ||
263 | { | ||
264 | } | ||
265 | |||
266 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | ||
267 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | ||
268 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | ||
269 | |||
270 | #define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | ||
271 | |||
272 | #ifdef __KDEBUG | ||
273 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) | ||
274 | #define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) | ||
275 | #define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) | ||
276 | |||
277 | #elif defined(CONFIG_FSCACHE_DEBUG) | ||
278 | #define _enter(FMT, ...) \ | ||
279 | do { \ | ||
280 | if (__do_kdebug(ENTER)) \ | ||
281 | kenter(FMT, ##__VA_ARGS__); \ | ||
282 | } while (0) | ||
283 | |||
284 | #define _leave(FMT, ...) \ | ||
285 | do { \ | ||
286 | if (__do_kdebug(LEAVE)) \ | ||
287 | kleave(FMT, ##__VA_ARGS__); \ | ||
288 | } while (0) | ||
289 | |||
290 | #define _debug(FMT, ...) \ | ||
291 | do { \ | ||
292 | if (__do_kdebug(DEBUG)) \ | ||
293 | kdebug(FMT, ##__VA_ARGS__); \ | ||
294 | } while (0) | ||
295 | |||
296 | #else | ||
297 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | ||
298 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | ||
299 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | ||
300 | #endif | ||
301 | |||
302 | /* | ||
303 | * determine whether a particular optional debugging point should be logged | ||
304 | * - we need to go through three steps to persuade cpp to correctly join the | ||
305 | * shorthand in FSCACHE_DEBUG_LEVEL with its prefix | ||
306 | */ | ||
307 | #define ____do_kdebug(LEVEL, POINT) \ | ||
308 | unlikely((fscache_debug & \ | ||
309 | (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3)))) | ||
310 | #define ___do_kdebug(LEVEL, POINT) \ | ||
311 | ____do_kdebug(LEVEL, POINT) | ||
312 | #define __do_kdebug(POINT) \ | ||
313 | ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT) | ||
314 | |||
315 | #define FSCACHE_DEBUG_CACHE 0 | ||
316 | #define FSCACHE_DEBUG_COOKIE 1 | ||
317 | #define FSCACHE_DEBUG_PAGE 2 | ||
318 | #define FSCACHE_DEBUG_OPERATION 3 | ||
319 | |||
320 | #define FSCACHE_POINT_ENTER 1 | ||
321 | #define FSCACHE_POINT_LEAVE 2 | ||
322 | #define FSCACHE_POINT_DEBUG 4 | ||
323 | |||
324 | #ifndef FSCACHE_DEBUG_LEVEL | ||
325 | #define FSCACHE_DEBUG_LEVEL CACHE | ||
326 | #endif | ||
327 | |||
328 | /* | ||
329 | * assertions | ||
330 | */ | ||
331 | #if 1 /* defined(__KDEBUGALL) */ | ||
332 | |||
333 | #define ASSERT(X) \ | ||
334 | do { \ | ||
335 | if (unlikely(!(X))) { \ | ||
336 | printk(KERN_ERR "\n"); \ | ||
337 | printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ | ||
338 | BUG(); \ | ||
339 | } \ | ||
340 | } while (0) | ||
341 | |||
342 | #define ASSERTCMP(X, OP, Y) \ | ||
343 | do { \ | ||
344 | if (unlikely(!((X) OP (Y)))) { \ | ||
345 | printk(KERN_ERR "\n"); \ | ||
346 | printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ | ||
347 | printk(KERN_ERR "%lx " #OP " %lx is false\n", \ | ||
348 | (unsigned long)(X), (unsigned long)(Y)); \ | ||
349 | BUG(); \ | ||
350 | } \ | ||
351 | } while (0) | ||
352 | |||
353 | #define ASSERTIF(C, X) \ | ||
354 | do { \ | ||
355 | if (unlikely((C) && !(X))) { \ | ||
356 | printk(KERN_ERR "\n"); \ | ||
357 | printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ | ||
358 | BUG(); \ | ||
359 | } \ | ||
360 | } while (0) | ||
361 | |||
362 | #define ASSERTIFCMP(C, X, OP, Y) \ | ||
363 | do { \ | ||
364 | if (unlikely((C) && !((X) OP (Y)))) { \ | ||
365 | printk(KERN_ERR "\n"); \ | ||
366 | printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ | ||
367 | printk(KERN_ERR "%lx " #OP " %lx is false\n", \ | ||
368 | (unsigned long)(X), (unsigned long)(Y)); \ | ||
369 | BUG(); \ | ||
370 | } \ | ||
371 | } while (0) | ||
372 | |||
373 | #else | ||
374 | |||
375 | #define ASSERT(X) do {} while (0) | ||
376 | #define ASSERTCMP(X, OP, Y) do {} while (0) | ||
377 | #define ASSERTIF(C, X) do {} while (0) | ||
378 | #define ASSERTIFCMP(C, X, OP, Y) do {} while (0) | ||
379 | |||
380 | #endif /* assert or not */ | ||
diff --git a/fs/fscache/main.c b/fs/fscache/main.c new file mode 100644 index 000000000000..4de41b597499 --- /dev/null +++ b/fs/fscache/main.c | |||
@@ -0,0 +1,124 @@ | |||
1 | /* General filesystem local caching manager | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL CACHE | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/completion.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include "internal.h" | ||
19 | |||
20 | MODULE_DESCRIPTION("FS Cache Manager"); | ||
21 | MODULE_AUTHOR("Red Hat, Inc."); | ||
22 | MODULE_LICENSE("GPL"); | ||
23 | |||
24 | unsigned fscache_defer_lookup = 1; | ||
25 | module_param_named(defer_lookup, fscache_defer_lookup, uint, | ||
26 | S_IWUSR | S_IRUGO); | ||
27 | MODULE_PARM_DESC(fscache_defer_lookup, | ||
28 | "Defer cookie lookup to background thread"); | ||
29 | |||
30 | unsigned fscache_defer_create = 1; | ||
31 | module_param_named(defer_create, fscache_defer_create, uint, | ||
32 | S_IWUSR | S_IRUGO); | ||
33 | MODULE_PARM_DESC(fscache_defer_create, | ||
34 | "Defer cookie creation to background thread"); | ||
35 | |||
36 | unsigned fscache_debug; | ||
37 | module_param_named(debug, fscache_debug, uint, | ||
38 | S_IWUSR | S_IRUGO); | ||
39 | MODULE_PARM_DESC(fscache_debug, | ||
40 | "FS-Cache debugging mask"); | ||
41 | |||
42 | struct kobject *fscache_root; | ||
43 | |||
44 | /* | ||
45 | * initialise the fs caching module | ||
46 | */ | ||
47 | static int __init fscache_init(void) | ||
48 | { | ||
49 | int ret; | ||
50 | |||
51 | ret = slow_work_register_user(); | ||
52 | if (ret < 0) | ||
53 | goto error_slow_work; | ||
54 | |||
55 | ret = fscache_proc_init(); | ||
56 | if (ret < 0) | ||
57 | goto error_proc; | ||
58 | |||
59 | fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", | ||
60 | sizeof(struct fscache_cookie), | ||
61 | 0, | ||
62 | 0, | ||
63 | fscache_cookie_init_once); | ||
64 | if (!fscache_cookie_jar) { | ||
65 | printk(KERN_NOTICE | ||
66 | "FS-Cache: Failed to allocate a cookie jar\n"); | ||
67 | ret = -ENOMEM; | ||
68 | goto error_cookie_jar; | ||
69 | } | ||
70 | |||
71 | fscache_root = kobject_create_and_add("fscache", kernel_kobj); | ||
72 | if (!fscache_root) | ||
73 | goto error_kobj; | ||
74 | |||
75 | printk(KERN_NOTICE "FS-Cache: Loaded\n"); | ||
76 | return 0; | ||
77 | |||
78 | error_kobj: | ||
79 | kmem_cache_destroy(fscache_cookie_jar); | ||
80 | error_cookie_jar: | ||
81 | fscache_proc_cleanup(); | ||
82 | error_proc: | ||
83 | slow_work_unregister_user(); | ||
84 | error_slow_work: | ||
85 | return ret; | ||
86 | } | ||
87 | |||
88 | fs_initcall(fscache_init); | ||
89 | |||
90 | /* | ||
91 | * clean up on module removal | ||
92 | */ | ||
93 | static void __exit fscache_exit(void) | ||
94 | { | ||
95 | _enter(""); | ||
96 | |||
97 | kobject_put(fscache_root); | ||
98 | kmem_cache_destroy(fscache_cookie_jar); | ||
99 | fscache_proc_cleanup(); | ||
100 | slow_work_unregister_user(); | ||
101 | printk(KERN_NOTICE "FS-Cache: Unloaded\n"); | ||
102 | } | ||
103 | |||
104 | module_exit(fscache_exit); | ||
105 | |||
106 | /* | ||
107 | * wait_on_bit() sleep function for uninterruptible waiting | ||
108 | */ | ||
109 | int fscache_wait_bit(void *flags) | ||
110 | { | ||
111 | schedule(); | ||
112 | return 0; | ||
113 | } | ||
114 | EXPORT_SYMBOL(fscache_wait_bit); | ||
115 | |||
116 | /* | ||
117 | * wait_on_bit() sleep function for interruptible waiting | ||
118 | */ | ||
119 | int fscache_wait_bit_interruptible(void *flags) | ||
120 | { | ||
121 | schedule(); | ||
122 | return signal_pending(current); | ||
123 | } | ||
124 | EXPORT_SYMBOL(fscache_wait_bit_interruptible); | ||
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c new file mode 100644 index 000000000000..e028b8eb1c40 --- /dev/null +++ b/fs/fscache/netfs.c | |||
@@ -0,0 +1,103 @@ | |||
1 | /* FS-Cache netfs (client) registration | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL COOKIE | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | static LIST_HEAD(fscache_netfs_list); | ||
18 | |||
19 | /* | ||
20 | * register a network filesystem for caching | ||
21 | */ | ||
22 | int __fscache_register_netfs(struct fscache_netfs *netfs) | ||
23 | { | ||
24 | struct fscache_netfs *ptr; | ||
25 | int ret; | ||
26 | |||
27 | _enter("{%s}", netfs->name); | ||
28 | |||
29 | INIT_LIST_HEAD(&netfs->link); | ||
30 | |||
31 | /* allocate a cookie for the primary index */ | ||
32 | netfs->primary_index = | ||
33 | kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); | ||
34 | |||
35 | if (!netfs->primary_index) { | ||
36 | _leave(" = -ENOMEM"); | ||
37 | return -ENOMEM; | ||
38 | } | ||
39 | |||
40 | /* initialise the primary index cookie */ | ||
41 | atomic_set(&netfs->primary_index->usage, 1); | ||
42 | atomic_set(&netfs->primary_index->n_children, 0); | ||
43 | |||
44 | netfs->primary_index->def = &fscache_fsdef_netfs_def; | ||
45 | netfs->primary_index->parent = &fscache_fsdef_index; | ||
46 | netfs->primary_index->netfs_data = netfs; | ||
47 | |||
48 | atomic_inc(&netfs->primary_index->parent->usage); | ||
49 | atomic_inc(&netfs->primary_index->parent->n_children); | ||
50 | |||
51 | spin_lock_init(&netfs->primary_index->lock); | ||
52 | INIT_HLIST_HEAD(&netfs->primary_index->backing_objects); | ||
53 | |||
54 | /* check the netfs type is not already present */ | ||
55 | down_write(&fscache_addremove_sem); | ||
56 | |||
57 | ret = -EEXIST; | ||
58 | list_for_each_entry(ptr, &fscache_netfs_list, link) { | ||
59 | if (strcmp(ptr->name, netfs->name) == 0) | ||
60 | goto already_registered; | ||
61 | } | ||
62 | |||
63 | list_add(&netfs->link, &fscache_netfs_list); | ||
64 | ret = 0; | ||
65 | |||
66 | printk(KERN_NOTICE "FS-Cache: Netfs '%s' registered for caching\n", | ||
67 | netfs->name); | ||
68 | |||
69 | already_registered: | ||
70 | up_write(&fscache_addremove_sem); | ||
71 | |||
72 | if (ret < 0) { | ||
73 | netfs->primary_index->parent = NULL; | ||
74 | __fscache_cookie_put(netfs->primary_index); | ||
75 | netfs->primary_index = NULL; | ||
76 | } | ||
77 | |||
78 | _leave(" = %d", ret); | ||
79 | return ret; | ||
80 | } | ||
81 | EXPORT_SYMBOL(__fscache_register_netfs); | ||
82 | |||
83 | /* | ||
84 | * unregister a network filesystem from the cache | ||
85 | * - all cookies must have been released first | ||
86 | */ | ||
87 | void __fscache_unregister_netfs(struct fscache_netfs *netfs) | ||
88 | { | ||
89 | _enter("{%s.%u}", netfs->name, netfs->version); | ||
90 | |||
91 | down_write(&fscache_addremove_sem); | ||
92 | |||
93 | list_del(&netfs->link); | ||
94 | fscache_relinquish_cookie(netfs->primary_index, 0); | ||
95 | |||
96 | up_write(&fscache_addremove_sem); | ||
97 | |||
98 | printk(KERN_NOTICE "FS-Cache: Netfs '%s' unregistered from caching\n", | ||
99 | netfs->name); | ||
100 | |||
101 | _leave(""); | ||
102 | } | ||
103 | EXPORT_SYMBOL(__fscache_unregister_netfs); | ||
diff --git a/fs/fscache/object.c b/fs/fscache/object.c new file mode 100644 index 000000000000..392a41b1b79d --- /dev/null +++ b/fs/fscache/object.c | |||
@@ -0,0 +1,810 @@ | |||
1 | /* FS-Cache object state machine handler | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/filesystems/caching/object.txt for a description of the | ||
12 | * object state machine and the in-kernel representations. | ||
13 | */ | ||
14 | |||
15 | #define FSCACHE_DEBUG_LEVEL COOKIE | ||
16 | #include <linux/module.h> | ||
17 | #include "internal.h" | ||
18 | |||
19 | const char *fscache_object_states[] = { | ||
20 | [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", | ||
21 | [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", | ||
22 | [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", | ||
23 | [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", | ||
24 | [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", | ||
25 | [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", | ||
26 | [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", | ||
27 | [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", | ||
28 | [FSCACHE_OBJECT_ABORT_INIT] = "OBJECT_ABORT_INIT", | ||
29 | [FSCACHE_OBJECT_RELEASING] = "OBJECT_RELEASING", | ||
30 | [FSCACHE_OBJECT_RECYCLING] = "OBJECT_RECYCLING", | ||
31 | [FSCACHE_OBJECT_WITHDRAWING] = "OBJECT_WITHDRAWING", | ||
32 | [FSCACHE_OBJECT_DEAD] = "OBJECT_DEAD", | ||
33 | }; | ||
34 | EXPORT_SYMBOL(fscache_object_states); | ||
35 | |||
36 | static void fscache_object_slow_work_put_ref(struct slow_work *); | ||
37 | static int fscache_object_slow_work_get_ref(struct slow_work *); | ||
38 | static void fscache_object_slow_work_execute(struct slow_work *); | ||
39 | static void fscache_initialise_object(struct fscache_object *); | ||
40 | static void fscache_lookup_object(struct fscache_object *); | ||
41 | static void fscache_object_available(struct fscache_object *); | ||
42 | static void fscache_release_object(struct fscache_object *); | ||
43 | static void fscache_withdraw_object(struct fscache_object *); | ||
44 | static void fscache_enqueue_dependents(struct fscache_object *); | ||
45 | static void fscache_dequeue_object(struct fscache_object *); | ||
46 | |||
47 | const struct slow_work_ops fscache_object_slow_work_ops = { | ||
48 | .get_ref = fscache_object_slow_work_get_ref, | ||
49 | .put_ref = fscache_object_slow_work_put_ref, | ||
50 | .execute = fscache_object_slow_work_execute, | ||
51 | }; | ||
52 | EXPORT_SYMBOL(fscache_object_slow_work_ops); | ||
53 | |||
54 | /* | ||
55 | * we need to notify the parent when an op completes that we had outstanding | ||
56 | * upon it | ||
57 | */ | ||
58 | static inline void fscache_done_parent_op(struct fscache_object *object) | ||
59 | { | ||
60 | struct fscache_object *parent = object->parent; | ||
61 | |||
62 | _enter("OBJ%x {OBJ%x,%x}", | ||
63 | object->debug_id, parent->debug_id, parent->n_ops); | ||
64 | |||
65 | spin_lock_nested(&parent->lock, 1); | ||
66 | parent->n_ops--; | ||
67 | parent->n_obj_ops--; | ||
68 | if (parent->n_ops == 0) | ||
69 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); | ||
70 | spin_unlock(&parent->lock); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * process events that have been sent to an object's state machine | ||
75 | * - initiates parent lookup | ||
76 | * - does object lookup | ||
77 | * - does object creation | ||
78 | * - does object recycling and retirement | ||
79 | * - does object withdrawal | ||
80 | */ | ||
81 | static void fscache_object_state_machine(struct fscache_object *object) | ||
82 | { | ||
83 | enum fscache_object_state new_state; | ||
84 | |||
85 | ASSERT(object != NULL); | ||
86 | |||
87 | _enter("{OBJ%x,%s,%lx}", | ||
88 | object->debug_id, fscache_object_states[object->state], | ||
89 | object->events); | ||
90 | |||
91 | switch (object->state) { | ||
92 | /* wait for the parent object to become ready */ | ||
93 | case FSCACHE_OBJECT_INIT: | ||
94 | object->event_mask = | ||
95 | ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
96 | fscache_initialise_object(object); | ||
97 | goto done; | ||
98 | |||
99 | /* look up the object metadata on disk */ | ||
100 | case FSCACHE_OBJECT_LOOKING_UP: | ||
101 | fscache_lookup_object(object); | ||
102 | goto lookup_transit; | ||
103 | |||
104 | /* create the object metadata on disk */ | ||
105 | case FSCACHE_OBJECT_CREATING: | ||
106 | fscache_lookup_object(object); | ||
107 | goto lookup_transit; | ||
108 | |||
109 | /* handle an object becoming available; start pending | ||
110 | * operations and queue dependent operations for processing */ | ||
111 | case FSCACHE_OBJECT_AVAILABLE: | ||
112 | fscache_object_available(object); | ||
113 | goto active_transit; | ||
114 | |||
115 | /* normal running state */ | ||
116 | case FSCACHE_OBJECT_ACTIVE: | ||
117 | goto active_transit; | ||
118 | |||
119 | /* update the object metadata on disk */ | ||
120 | case FSCACHE_OBJECT_UPDATING: | ||
121 | clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); | ||
122 | fscache_stat(&fscache_n_updates_run); | ||
123 | object->cache->ops->update_object(object); | ||
124 | goto active_transit; | ||
125 | |||
126 | /* handle an object dying during lookup or creation */ | ||
127 | case FSCACHE_OBJECT_LC_DYING: | ||
128 | object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); | ||
129 | object->cache->ops->lookup_complete(object); | ||
130 | |||
131 | spin_lock(&object->lock); | ||
132 | object->state = FSCACHE_OBJECT_DYING; | ||
133 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, | ||
134 | &object->cookie->flags)) | ||
135 | wake_up_bit(&object->cookie->flags, | ||
136 | FSCACHE_COOKIE_CREATING); | ||
137 | spin_unlock(&object->lock); | ||
138 | |||
139 | fscache_done_parent_op(object); | ||
140 | |||
141 | /* wait for completion of all active operations on this object | ||
142 | * and the death of all child objects of this object */ | ||
143 | case FSCACHE_OBJECT_DYING: | ||
144 | dying: | ||
145 | clear_bit(FSCACHE_OBJECT_EV_CLEARED, &object->events); | ||
146 | spin_lock(&object->lock); | ||
147 | _debug("dying OBJ%x {%d,%d}", | ||
148 | object->debug_id, object->n_ops, object->n_children); | ||
149 | if (object->n_ops == 0 && object->n_children == 0) { | ||
150 | object->event_mask &= | ||
151 | ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
152 | object->event_mask |= | ||
153 | (1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
154 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
155 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
156 | (1 << FSCACHE_OBJECT_EV_ERROR); | ||
157 | } else { | ||
158 | object->event_mask &= | ||
159 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
160 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
161 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
162 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
163 | object->event_mask |= | ||
164 | 1 << FSCACHE_OBJECT_EV_CLEARED; | ||
165 | } | ||
166 | spin_unlock(&object->lock); | ||
167 | fscache_enqueue_dependents(object); | ||
168 | goto terminal_transit; | ||
169 | |||
170 | /* handle an abort during initialisation */ | ||
171 | case FSCACHE_OBJECT_ABORT_INIT: | ||
172 | _debug("handle abort init %lx", object->events); | ||
173 | object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); | ||
174 | |||
175 | spin_lock(&object->lock); | ||
176 | fscache_dequeue_object(object); | ||
177 | |||
178 | object->state = FSCACHE_OBJECT_DYING; | ||
179 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, | ||
180 | &object->cookie->flags)) | ||
181 | wake_up_bit(&object->cookie->flags, | ||
182 | FSCACHE_COOKIE_CREATING); | ||
183 | spin_unlock(&object->lock); | ||
184 | goto dying; | ||
185 | |||
186 | /* handle the netfs releasing an object and possibly marking it | ||
187 | * obsolete too */ | ||
188 | case FSCACHE_OBJECT_RELEASING: | ||
189 | case FSCACHE_OBJECT_RECYCLING: | ||
190 | object->event_mask &= | ||
191 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
192 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
193 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
194 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
195 | fscache_release_object(object); | ||
196 | spin_lock(&object->lock); | ||
197 | object->state = FSCACHE_OBJECT_DEAD; | ||
198 | spin_unlock(&object->lock); | ||
199 | fscache_stat(&fscache_n_object_dead); | ||
200 | goto terminal_transit; | ||
201 | |||
202 | /* handle the parent cache of this object being withdrawn from | ||
203 | * active service */ | ||
204 | case FSCACHE_OBJECT_WITHDRAWING: | ||
205 | object->event_mask &= | ||
206 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
207 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
208 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
209 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
210 | fscache_withdraw_object(object); | ||
211 | spin_lock(&object->lock); | ||
212 | object->state = FSCACHE_OBJECT_DEAD; | ||
213 | spin_unlock(&object->lock); | ||
214 | fscache_stat(&fscache_n_object_dead); | ||
215 | goto terminal_transit; | ||
216 | |||
217 | /* complain about the object being woken up once it is | ||
218 | * deceased */ | ||
219 | case FSCACHE_OBJECT_DEAD: | ||
220 | printk(KERN_ERR "FS-Cache:" | ||
221 | " Unexpected event in dead state %lx\n", | ||
222 | object->events & object->event_mask); | ||
223 | BUG(); | ||
224 | |||
225 | default: | ||
226 | printk(KERN_ERR "FS-Cache: Unknown object state %u\n", | ||
227 | object->state); | ||
228 | BUG(); | ||
229 | } | ||
230 | |||
231 | /* determine the transition from a lookup state */ | ||
232 | lookup_transit: | ||
233 | switch (fls(object->events & object->event_mask) - 1) { | ||
234 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
235 | case FSCACHE_OBJECT_EV_RETIRE: | ||
236 | case FSCACHE_OBJECT_EV_RELEASE: | ||
237 | case FSCACHE_OBJECT_EV_ERROR: | ||
238 | new_state = FSCACHE_OBJECT_LC_DYING; | ||
239 | goto change_state; | ||
240 | case FSCACHE_OBJECT_EV_REQUEUE: | ||
241 | goto done; | ||
242 | case -1: | ||
243 | goto done; /* sleep until event */ | ||
244 | default: | ||
245 | goto unsupported_event; | ||
246 | } | ||
247 | |||
248 | /* determine the transition from an active state */ | ||
249 | active_transit: | ||
250 | switch (fls(object->events & object->event_mask) - 1) { | ||
251 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
252 | case FSCACHE_OBJECT_EV_RETIRE: | ||
253 | case FSCACHE_OBJECT_EV_RELEASE: | ||
254 | case FSCACHE_OBJECT_EV_ERROR: | ||
255 | new_state = FSCACHE_OBJECT_DYING; | ||
256 | goto change_state; | ||
257 | case FSCACHE_OBJECT_EV_UPDATE: | ||
258 | new_state = FSCACHE_OBJECT_UPDATING; | ||
259 | goto change_state; | ||
260 | case -1: | ||
261 | new_state = FSCACHE_OBJECT_ACTIVE; | ||
262 | goto change_state; /* sleep until event */ | ||
263 | default: | ||
264 | goto unsupported_event; | ||
265 | } | ||
266 | |||
267 | /* determine the transition from a terminal state */ | ||
268 | terminal_transit: | ||
269 | switch (fls(object->events & object->event_mask) - 1) { | ||
270 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
271 | new_state = FSCACHE_OBJECT_WITHDRAWING; | ||
272 | goto change_state; | ||
273 | case FSCACHE_OBJECT_EV_RETIRE: | ||
274 | new_state = FSCACHE_OBJECT_RECYCLING; | ||
275 | goto change_state; | ||
276 | case FSCACHE_OBJECT_EV_RELEASE: | ||
277 | new_state = FSCACHE_OBJECT_RELEASING; | ||
278 | goto change_state; | ||
279 | case FSCACHE_OBJECT_EV_ERROR: | ||
280 | new_state = FSCACHE_OBJECT_WITHDRAWING; | ||
281 | goto change_state; | ||
282 | case FSCACHE_OBJECT_EV_CLEARED: | ||
283 | new_state = FSCACHE_OBJECT_DYING; | ||
284 | goto change_state; | ||
285 | case -1: | ||
286 | goto done; /* sleep until event */ | ||
287 | default: | ||
288 | goto unsupported_event; | ||
289 | } | ||
290 | |||
291 | change_state: | ||
292 | spin_lock(&object->lock); | ||
293 | object->state = new_state; | ||
294 | spin_unlock(&object->lock); | ||
295 | |||
296 | done: | ||
297 | _leave(" [->%s]", fscache_object_states[object->state]); | ||
298 | return; | ||
299 | |||
300 | unsupported_event: | ||
301 | printk(KERN_ERR "FS-Cache:" | ||
302 | " Unsupported event %lx [mask %lx] in state %s\n", | ||
303 | object->events, object->event_mask, | ||
304 | fscache_object_states[object->state]); | ||
305 | BUG(); | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * execute an object | ||
310 | */ | ||
311 | static void fscache_object_slow_work_execute(struct slow_work *work) | ||
312 | { | ||
313 | struct fscache_object *object = | ||
314 | container_of(work, struct fscache_object, work); | ||
315 | unsigned long start; | ||
316 | |||
317 | _enter("{OBJ%x}", object->debug_id); | ||
318 | |||
319 | clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
320 | |||
321 | start = jiffies; | ||
322 | fscache_object_state_machine(object); | ||
323 | fscache_hist(fscache_objs_histogram, start); | ||
324 | if (object->events & object->event_mask) | ||
325 | fscache_enqueue_object(object); | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * initialise an object | ||
330 | * - check the specified object's parent to see if we can make use of it | ||
331 | * immediately to do a creation | ||
332 | * - we may need to start the process of creating a parent and we need to wait | ||
333 | * for the parent's lookup and creation to complete if it's not there yet | ||
334 | * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the | ||
335 | * leaf-most cookies of the object and all its children | ||
336 | */ | ||
337 | static void fscache_initialise_object(struct fscache_object *object) | ||
338 | { | ||
339 | struct fscache_object *parent; | ||
340 | |||
341 | _enter(""); | ||
342 | ASSERT(object->cookie != NULL); | ||
343 | ASSERT(object->cookie->parent != NULL); | ||
344 | ASSERT(list_empty(&object->work.link)); | ||
345 | |||
346 | if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | | ||
347 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
348 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
349 | (1 << FSCACHE_OBJECT_EV_WITHDRAW))) { | ||
350 | _debug("abort init %lx", object->events); | ||
351 | spin_lock(&object->lock); | ||
352 | object->state = FSCACHE_OBJECT_ABORT_INIT; | ||
353 | spin_unlock(&object->lock); | ||
354 | return; | ||
355 | } | ||
356 | |||
357 | spin_lock(&object->cookie->lock); | ||
358 | spin_lock_nested(&object->cookie->parent->lock, 1); | ||
359 | |||
360 | parent = object->parent; | ||
361 | if (!parent) { | ||
362 | _debug("no parent"); | ||
363 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | ||
364 | } else { | ||
365 | spin_lock(&object->lock); | ||
366 | spin_lock_nested(&parent->lock, 1); | ||
367 | _debug("parent %s", fscache_object_states[parent->state]); | ||
368 | |||
369 | if (parent->state >= FSCACHE_OBJECT_DYING) { | ||
370 | _debug("bad parent"); | ||
371 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | ||
372 | } else if (parent->state < FSCACHE_OBJECT_AVAILABLE) { | ||
373 | _debug("wait"); | ||
374 | |||
375 | /* we may get woken up in this state by child objects | ||
376 | * binding on to us, so we need to make sure we don't | ||
377 | * add ourself to the list multiple times */ | ||
378 | if (list_empty(&object->dep_link)) { | ||
379 | object->cache->ops->grab_object(object); | ||
380 | list_add(&object->dep_link, | ||
381 | &parent->dependents); | ||
382 | |||
383 | /* fscache_acquire_non_index_cookie() uses this | ||
384 | * to wake the chain up */ | ||
385 | if (parent->state == FSCACHE_OBJECT_INIT) | ||
386 | fscache_enqueue_object(parent); | ||
387 | } | ||
388 | } else { | ||
389 | _debug("go"); | ||
390 | parent->n_ops++; | ||
391 | parent->n_obj_ops++; | ||
392 | object->lookup_jif = jiffies; | ||
393 | object->state = FSCACHE_OBJECT_LOOKING_UP; | ||
394 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
395 | } | ||
396 | |||
397 | spin_unlock(&parent->lock); | ||
398 | spin_unlock(&object->lock); | ||
399 | } | ||
400 | |||
401 | spin_unlock(&object->cookie->parent->lock); | ||
402 | spin_unlock(&object->cookie->lock); | ||
403 | _leave(""); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * look an object up in the cache from which it was allocated | ||
408 | * - we hold an "access lock" on the parent object, so the parent object cannot | ||
409 | * be withdrawn by either party till we've finished | ||
410 | * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the | ||
411 | * leaf-most cookies of the object and all its children | ||
412 | */ | ||
413 | static void fscache_lookup_object(struct fscache_object *object) | ||
414 | { | ||
415 | struct fscache_cookie *cookie = object->cookie; | ||
416 | struct fscache_object *parent; | ||
417 | |||
418 | _enter(""); | ||
419 | |||
420 | parent = object->parent; | ||
421 | ASSERT(parent != NULL); | ||
422 | ASSERTCMP(parent->n_ops, >, 0); | ||
423 | ASSERTCMP(parent->n_obj_ops, >, 0); | ||
424 | |||
425 | /* make sure the parent is still available */ | ||
426 | ASSERTCMP(parent->state, >=, FSCACHE_OBJECT_AVAILABLE); | ||
427 | |||
428 | if (parent->state >= FSCACHE_OBJECT_DYING || | ||
429 | test_bit(FSCACHE_IOERROR, &object->cache->flags)) { | ||
430 | _debug("unavailable"); | ||
431 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | ||
432 | _leave(""); | ||
433 | return; | ||
434 | } | ||
435 | |||
436 | _debug("LOOKUP \"%s/%s\" in \"%s\"", | ||
437 | parent->cookie->def->name, cookie->def->name, | ||
438 | object->cache->tag->name); | ||
439 | |||
440 | fscache_stat(&fscache_n_object_lookups); | ||
441 | object->cache->ops->lookup_object(object); | ||
442 | |||
443 | if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) | ||
444 | set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); | ||
445 | |||
446 | _leave(""); | ||
447 | } | ||
448 | |||
449 | /** | ||
450 | * fscache_object_lookup_negative - Note negative cookie lookup | ||
451 | * @object: Object pointing to cookie to mark | ||
452 | * | ||
453 | * Note negative lookup, permitting those waiting to read data from an already | ||
454 | * existing backing object to continue as there's no data for them to read. | ||
455 | */ | ||
456 | void fscache_object_lookup_negative(struct fscache_object *object) | ||
457 | { | ||
458 | struct fscache_cookie *cookie = object->cookie; | ||
459 | |||
460 | _enter("{OBJ%x,%s}", | ||
461 | object->debug_id, fscache_object_states[object->state]); | ||
462 | |||
463 | spin_lock(&object->lock); | ||
464 | if (object->state == FSCACHE_OBJECT_LOOKING_UP) { | ||
465 | fscache_stat(&fscache_n_object_lookups_negative); | ||
466 | |||
467 | /* transit here to allow write requests to begin stacking up | ||
468 | * and read requests to begin returning ENODATA */ | ||
469 | object->state = FSCACHE_OBJECT_CREATING; | ||
470 | spin_unlock(&object->lock); | ||
471 | |||
472 | set_bit(FSCACHE_COOKIE_PENDING_FILL, &cookie->flags); | ||
473 | set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | ||
474 | |||
475 | _debug("wake up lookup %p", &cookie->flags); | ||
476 | smp_mb__before_clear_bit(); | ||
477 | clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); | ||
478 | smp_mb__after_clear_bit(); | ||
479 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); | ||
480 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
481 | } else { | ||
482 | ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); | ||
483 | spin_unlock(&object->lock); | ||
484 | } | ||
485 | |||
486 | _leave(""); | ||
487 | } | ||
488 | EXPORT_SYMBOL(fscache_object_lookup_negative); | ||
489 | |||
490 | /** | ||
491 | * fscache_obtained_object - Note successful object lookup or creation | ||
492 | * @object: Object pointing to cookie to mark | ||
493 | * | ||
494 | * Note successful lookup and/or creation, permitting those waiting to write | ||
495 | * data to a backing object to continue. | ||
496 | * | ||
497 | * Note that after calling this, an object's cookie may be relinquished by the | ||
498 | * netfs, and so must be accessed with object lock held. | ||
499 | */ | ||
500 | void fscache_obtained_object(struct fscache_object *object) | ||
501 | { | ||
502 | struct fscache_cookie *cookie = object->cookie; | ||
503 | |||
504 | _enter("{OBJ%x,%s}", | ||
505 | object->debug_id, fscache_object_states[object->state]); | ||
506 | |||
507 | /* if we were still looking up, then we must have a positive lookup | ||
508 | * result, in which case there may be data available */ | ||
509 | spin_lock(&object->lock); | ||
510 | if (object->state == FSCACHE_OBJECT_LOOKING_UP) { | ||
511 | fscache_stat(&fscache_n_object_lookups_positive); | ||
512 | |||
513 | clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | ||
514 | |||
515 | object->state = FSCACHE_OBJECT_AVAILABLE; | ||
516 | spin_unlock(&object->lock); | ||
517 | |||
518 | smp_mb__before_clear_bit(); | ||
519 | clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); | ||
520 | smp_mb__after_clear_bit(); | ||
521 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); | ||
522 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
523 | } else { | ||
524 | ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); | ||
525 | fscache_stat(&fscache_n_object_created); | ||
526 | |||
527 | object->state = FSCACHE_OBJECT_AVAILABLE; | ||
528 | spin_unlock(&object->lock); | ||
529 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
530 | smp_wmb(); | ||
531 | } | ||
532 | |||
533 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) | ||
534 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_CREATING); | ||
535 | |||
536 | _leave(""); | ||
537 | } | ||
538 | EXPORT_SYMBOL(fscache_obtained_object); | ||
539 | |||
540 | /* | ||
541 | * handle an object that has just become available | ||
542 | */ | ||
543 | static void fscache_object_available(struct fscache_object *object) | ||
544 | { | ||
545 | _enter("{OBJ%x}", object->debug_id); | ||
546 | |||
547 | spin_lock(&object->lock); | ||
548 | |||
549 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) | ||
550 | wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); | ||
551 | |||
552 | fscache_done_parent_op(object); | ||
553 | if (object->n_in_progress == 0) { | ||
554 | if (object->n_ops > 0) { | ||
555 | ASSERTCMP(object->n_ops, >=, object->n_obj_ops); | ||
556 | ASSERTIF(object->n_ops > object->n_obj_ops, | ||
557 | !list_empty(&object->pending_ops)); | ||
558 | fscache_start_operations(object); | ||
559 | } else { | ||
560 | ASSERT(list_empty(&object->pending_ops)); | ||
561 | } | ||
562 | } | ||
563 | spin_unlock(&object->lock); | ||
564 | |||
565 | object->cache->ops->lookup_complete(object); | ||
566 | fscache_enqueue_dependents(object); | ||
567 | |||
568 | fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); | ||
569 | fscache_stat(&fscache_n_object_avail); | ||
570 | |||
571 | _leave(""); | ||
572 | } | ||
573 | |||
574 | /* | ||
575 | * drop an object's attachments | ||
576 | */ | ||
577 | static void fscache_drop_object(struct fscache_object *object) | ||
578 | { | ||
579 | struct fscache_object *parent = object->parent; | ||
580 | struct fscache_cache *cache = object->cache; | ||
581 | |||
582 | _enter("{OBJ%x,%d}", object->debug_id, object->n_children); | ||
583 | |||
584 | spin_lock(&cache->object_list_lock); | ||
585 | list_del_init(&object->cache_link); | ||
586 | spin_unlock(&cache->object_list_lock); | ||
587 | |||
588 | cache->ops->drop_object(object); | ||
589 | |||
590 | if (parent) { | ||
591 | _debug("release parent OBJ%x {%d}", | ||
592 | parent->debug_id, parent->n_children); | ||
593 | |||
594 | spin_lock(&parent->lock); | ||
595 | parent->n_children--; | ||
596 | if (parent->n_children == 0) | ||
597 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); | ||
598 | spin_unlock(&parent->lock); | ||
599 | object->parent = NULL; | ||
600 | } | ||
601 | |||
602 | /* this just shifts the object release to the slow work processor */ | ||
603 | object->cache->ops->put_object(object); | ||
604 | |||
605 | _leave(""); | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * release or recycle an object that the netfs has discarded | ||
610 | */ | ||
611 | static void fscache_release_object(struct fscache_object *object) | ||
612 | { | ||
613 | _enter(""); | ||
614 | |||
615 | fscache_drop_object(object); | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * withdraw an object from active service | ||
620 | */ | ||
621 | static void fscache_withdraw_object(struct fscache_object *object) | ||
622 | { | ||
623 | struct fscache_cookie *cookie; | ||
624 | bool detached; | ||
625 | |||
626 | _enter(""); | ||
627 | |||
628 | spin_lock(&object->lock); | ||
629 | cookie = object->cookie; | ||
630 | if (cookie) { | ||
631 | /* need to get the cookie lock before the object lock, starting | ||
632 | * from the object pointer */ | ||
633 | atomic_inc(&cookie->usage); | ||
634 | spin_unlock(&object->lock); | ||
635 | |||
636 | detached = false; | ||
637 | spin_lock(&cookie->lock); | ||
638 | spin_lock(&object->lock); | ||
639 | |||
640 | if (object->cookie == cookie) { | ||
641 | hlist_del_init(&object->cookie_link); | ||
642 | object->cookie = NULL; | ||
643 | detached = true; | ||
644 | } | ||
645 | spin_unlock(&cookie->lock); | ||
646 | fscache_cookie_put(cookie); | ||
647 | if (detached) | ||
648 | fscache_cookie_put(cookie); | ||
649 | } | ||
650 | |||
651 | spin_unlock(&object->lock); | ||
652 | |||
653 | fscache_drop_object(object); | ||
654 | } | ||
655 | |||
656 | /* | ||
657 | * withdraw an object from active service at the behest of the cache | ||
658 | * - need break the links to a cached object cookie | ||
659 | * - called under two situations: | ||
660 | * (1) recycler decides to reclaim an in-use object | ||
661 | * (2) a cache is unmounted | ||
662 | * - have to take care as the cookie can be being relinquished by the netfs | ||
663 | * simultaneously | ||
664 | * - the object is pinned by the caller holding a refcount on it | ||
665 | */ | ||
666 | void fscache_withdrawing_object(struct fscache_cache *cache, | ||
667 | struct fscache_object *object) | ||
668 | { | ||
669 | bool enqueue = false; | ||
670 | |||
671 | _enter(",OBJ%x", object->debug_id); | ||
672 | |||
673 | spin_lock(&object->lock); | ||
674 | if (object->state < FSCACHE_OBJECT_WITHDRAWING) { | ||
675 | object->state = FSCACHE_OBJECT_WITHDRAWING; | ||
676 | enqueue = true; | ||
677 | } | ||
678 | spin_unlock(&object->lock); | ||
679 | |||
680 | if (enqueue) | ||
681 | fscache_enqueue_object(object); | ||
682 | |||
683 | _leave(""); | ||
684 | } | ||
685 | |||
686 | /* | ||
687 | * allow the slow work item processor to get a ref on an object | ||
688 | */ | ||
689 | static int fscache_object_slow_work_get_ref(struct slow_work *work) | ||
690 | { | ||
691 | struct fscache_object *object = | ||
692 | container_of(work, struct fscache_object, work); | ||
693 | |||
694 | return object->cache->ops->grab_object(object) ? 0 : -EAGAIN; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * allow the slow work item processor to discard a ref on a work item | ||
699 | */ | ||
700 | static void fscache_object_slow_work_put_ref(struct slow_work *work) | ||
701 | { | ||
702 | struct fscache_object *object = | ||
703 | container_of(work, struct fscache_object, work); | ||
704 | |||
705 | return object->cache->ops->put_object(object); | ||
706 | } | ||
707 | |||
708 | /* | ||
709 | * enqueue an object for metadata-type processing | ||
710 | */ | ||
711 | void fscache_enqueue_object(struct fscache_object *object) | ||
712 | { | ||
713 | _enter("{OBJ%x}", object->debug_id); | ||
714 | |||
715 | slow_work_enqueue(&object->work); | ||
716 | } | ||
717 | |||
718 | /* | ||
719 | * enqueue the dependents of an object for metadata-type processing | ||
720 | * - the caller must hold the object's lock | ||
721 | * - this may cause an already locked object to wind up being processed again | ||
722 | */ | ||
723 | static void fscache_enqueue_dependents(struct fscache_object *object) | ||
724 | { | ||
725 | struct fscache_object *dep; | ||
726 | |||
727 | _enter("{OBJ%x}", object->debug_id); | ||
728 | |||
729 | if (list_empty(&object->dependents)) | ||
730 | return; | ||
731 | |||
732 | spin_lock(&object->lock); | ||
733 | |||
734 | while (!list_empty(&object->dependents)) { | ||
735 | dep = list_entry(object->dependents.next, | ||
736 | struct fscache_object, dep_link); | ||
737 | list_del_init(&dep->dep_link); | ||
738 | |||
739 | |||
740 | /* sort onto appropriate lists */ | ||
741 | fscache_enqueue_object(dep); | ||
742 | dep->cache->ops->put_object(dep); | ||
743 | |||
744 | if (!list_empty(&object->dependents)) | ||
745 | cond_resched_lock(&object->lock); | ||
746 | } | ||
747 | |||
748 | spin_unlock(&object->lock); | ||
749 | } | ||
750 | |||
751 | /* | ||
752 | * remove an object from whatever queue it's waiting on | ||
753 | * - the caller must hold object->lock | ||
754 | */ | ||
755 | void fscache_dequeue_object(struct fscache_object *object) | ||
756 | { | ||
757 | _enter("{OBJ%x}", object->debug_id); | ||
758 | |||
759 | if (!list_empty(&object->dep_link)) { | ||
760 | spin_lock(&object->parent->lock); | ||
761 | list_del_init(&object->dep_link); | ||
762 | spin_unlock(&object->parent->lock); | ||
763 | } | ||
764 | |||
765 | _leave(""); | ||
766 | } | ||
767 | |||
768 | /** | ||
769 | * fscache_check_aux - Ask the netfs whether an object on disk is still valid | ||
770 | * @object: The object to ask about | ||
771 | * @data: The auxiliary data for the object | ||
772 | * @datalen: The size of the auxiliary data | ||
773 | * | ||
774 | * This function consults the netfs about the coherency state of an object | ||
775 | */ | ||
776 | enum fscache_checkaux fscache_check_aux(struct fscache_object *object, | ||
777 | const void *data, uint16_t datalen) | ||
778 | { | ||
779 | enum fscache_checkaux result; | ||
780 | |||
781 | if (!object->cookie->def->check_aux) { | ||
782 | fscache_stat(&fscache_n_checkaux_none); | ||
783 | return FSCACHE_CHECKAUX_OKAY; | ||
784 | } | ||
785 | |||
786 | result = object->cookie->def->check_aux(object->cookie->netfs_data, | ||
787 | data, datalen); | ||
788 | switch (result) { | ||
789 | /* entry okay as is */ | ||
790 | case FSCACHE_CHECKAUX_OKAY: | ||
791 | fscache_stat(&fscache_n_checkaux_okay); | ||
792 | break; | ||
793 | |||
794 | /* entry requires update */ | ||
795 | case FSCACHE_CHECKAUX_NEEDS_UPDATE: | ||
796 | fscache_stat(&fscache_n_checkaux_update); | ||
797 | break; | ||
798 | |||
799 | /* entry requires deletion */ | ||
800 | case FSCACHE_CHECKAUX_OBSOLETE: | ||
801 | fscache_stat(&fscache_n_checkaux_obsolete); | ||
802 | break; | ||
803 | |||
804 | default: | ||
805 | BUG(); | ||
806 | } | ||
807 | |||
808 | return result; | ||
809 | } | ||
810 | EXPORT_SYMBOL(fscache_check_aux); | ||
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c new file mode 100644 index 000000000000..e7f8d53b8b6b --- /dev/null +++ b/fs/fscache/operation.c | |||
@@ -0,0 +1,459 @@ | |||
1 | /* FS-Cache worker operation management routines | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/filesystems/caching/operations.txt | ||
12 | */ | ||
13 | |||
14 | #define FSCACHE_DEBUG_LEVEL OPERATION | ||
15 | #include <linux/module.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | atomic_t fscache_op_debug_id; | ||
19 | EXPORT_SYMBOL(fscache_op_debug_id); | ||
20 | |||
21 | /** | ||
22 | * fscache_enqueue_operation - Enqueue an operation for processing | ||
23 | * @op: The operation to enqueue | ||
24 | * | ||
25 | * Enqueue an operation for processing by the FS-Cache thread pool. | ||
26 | * | ||
27 | * This will get its own ref on the object. | ||
28 | */ | ||
29 | void fscache_enqueue_operation(struct fscache_operation *op) | ||
30 | { | ||
31 | _enter("{OBJ%x OP%x,%u}", | ||
32 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); | ||
33 | |||
34 | ASSERT(op->processor != NULL); | ||
35 | ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); | ||
36 | ASSERTCMP(atomic_read(&op->usage), >, 0); | ||
37 | |||
38 | if (list_empty(&op->pend_link)) { | ||
39 | switch (op->flags & FSCACHE_OP_TYPE) { | ||
40 | case FSCACHE_OP_FAST: | ||
41 | _debug("queue fast"); | ||
42 | atomic_inc(&op->usage); | ||
43 | if (!schedule_work(&op->fast_work)) | ||
44 | fscache_put_operation(op); | ||
45 | break; | ||
46 | case FSCACHE_OP_SLOW: | ||
47 | _debug("queue slow"); | ||
48 | slow_work_enqueue(&op->slow_work); | ||
49 | break; | ||
50 | case FSCACHE_OP_MYTHREAD: | ||
51 | _debug("queue for caller's attention"); | ||
52 | break; | ||
53 | default: | ||
54 | printk(KERN_ERR "FS-Cache: Unexpected op type %lx", | ||
55 | op->flags); | ||
56 | BUG(); | ||
57 | break; | ||
58 | } | ||
59 | fscache_stat(&fscache_n_op_enqueue); | ||
60 | } | ||
61 | } | ||
62 | EXPORT_SYMBOL(fscache_enqueue_operation); | ||
63 | |||
64 | /* | ||
65 | * start an op running | ||
66 | */ | ||
67 | static void fscache_run_op(struct fscache_object *object, | ||
68 | struct fscache_operation *op) | ||
69 | { | ||
70 | object->n_in_progress++; | ||
71 | if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) | ||
72 | wake_up_bit(&op->flags, FSCACHE_OP_WAITING); | ||
73 | if (op->processor) | ||
74 | fscache_enqueue_operation(op); | ||
75 | fscache_stat(&fscache_n_op_run); | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * submit an exclusive operation for an object | ||
80 | * - other ops are excluded from running simultaneously with this one | ||
81 | * - this gets any extra refs it needs on an op | ||
82 | */ | ||
83 | int fscache_submit_exclusive_op(struct fscache_object *object, | ||
84 | struct fscache_operation *op) | ||
85 | { | ||
86 | int ret; | ||
87 | |||
88 | _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); | ||
89 | |||
90 | spin_lock(&object->lock); | ||
91 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); | ||
92 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); | ||
93 | |||
94 | ret = -ENOBUFS; | ||
95 | if (fscache_object_is_active(object)) { | ||
96 | op->object = object; | ||
97 | object->n_ops++; | ||
98 | object->n_exclusive++; /* reads and writes must wait */ | ||
99 | |||
100 | if (object->n_ops > 0) { | ||
101 | atomic_inc(&op->usage); | ||
102 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
103 | fscache_stat(&fscache_n_op_pend); | ||
104 | } else if (!list_empty(&object->pending_ops)) { | ||
105 | atomic_inc(&op->usage); | ||
106 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
107 | fscache_stat(&fscache_n_op_pend); | ||
108 | fscache_start_operations(object); | ||
109 | } else { | ||
110 | ASSERTCMP(object->n_in_progress, ==, 0); | ||
111 | fscache_run_op(object, op); | ||
112 | } | ||
113 | |||
114 | /* need to issue a new write op after this */ | ||
115 | clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); | ||
116 | ret = 0; | ||
117 | } else if (object->state == FSCACHE_OBJECT_CREATING) { | ||
118 | op->object = object; | ||
119 | object->n_ops++; | ||
120 | object->n_exclusive++; /* reads and writes must wait */ | ||
121 | atomic_inc(&op->usage); | ||
122 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
123 | fscache_stat(&fscache_n_op_pend); | ||
124 | ret = 0; | ||
125 | } else { | ||
126 | /* not allowed to submit ops in any other state */ | ||
127 | BUG(); | ||
128 | } | ||
129 | |||
130 | spin_unlock(&object->lock); | ||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * report an unexpected submission | ||
136 | */ | ||
137 | static void fscache_report_unexpected_submission(struct fscache_object *object, | ||
138 | struct fscache_operation *op, | ||
139 | unsigned long ostate) | ||
140 | { | ||
141 | static bool once_only; | ||
142 | struct fscache_operation *p; | ||
143 | unsigned n; | ||
144 | |||
145 | if (once_only) | ||
146 | return; | ||
147 | once_only = true; | ||
148 | |||
149 | kdebug("unexpected submission OP%x [OBJ%x %s]", | ||
150 | op->debug_id, object->debug_id, | ||
151 | fscache_object_states[object->state]); | ||
152 | kdebug("objstate=%s [%s]", | ||
153 | fscache_object_states[object->state], | ||
154 | fscache_object_states[ostate]); | ||
155 | kdebug("objflags=%lx", object->flags); | ||
156 | kdebug("objevent=%lx [%lx]", object->events, object->event_mask); | ||
157 | kdebug("ops=%u inp=%u exc=%u", | ||
158 | object->n_ops, object->n_in_progress, object->n_exclusive); | ||
159 | |||
160 | if (!list_empty(&object->pending_ops)) { | ||
161 | n = 0; | ||
162 | list_for_each_entry(p, &object->pending_ops, pend_link) { | ||
163 | ASSERTCMP(p->object, ==, object); | ||
164 | kdebug("%p %p", op->processor, op->release); | ||
165 | n++; | ||
166 | } | ||
167 | |||
168 | kdebug("n=%u", n); | ||
169 | } | ||
170 | |||
171 | dump_stack(); | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * submit an operation for an object | ||
176 | * - objects may be submitted only in the following states: | ||
177 | * - during object creation (write ops may be submitted) | ||
178 | * - whilst the object is active | ||
179 | * - after an I/O error incurred in one of the two above states (op rejected) | ||
180 | * - this gets any extra refs it needs on an op | ||
181 | */ | ||
182 | int fscache_submit_op(struct fscache_object *object, | ||
183 | struct fscache_operation *op) | ||
184 | { | ||
185 | unsigned long ostate; | ||
186 | int ret; | ||
187 | |||
188 | _enter("{OBJ%x OP%x},{%u}", | ||
189 | object->debug_id, op->debug_id, atomic_read(&op->usage)); | ||
190 | |||
191 | ASSERTCMP(atomic_read(&op->usage), >, 0); | ||
192 | |||
193 | spin_lock(&object->lock); | ||
194 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); | ||
195 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); | ||
196 | |||
197 | ostate = object->state; | ||
198 | smp_rmb(); | ||
199 | |||
200 | if (fscache_object_is_active(object)) { | ||
201 | op->object = object; | ||
202 | object->n_ops++; | ||
203 | |||
204 | if (object->n_exclusive > 0) { | ||
205 | atomic_inc(&op->usage); | ||
206 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
207 | fscache_stat(&fscache_n_op_pend); | ||
208 | } else if (!list_empty(&object->pending_ops)) { | ||
209 | atomic_inc(&op->usage); | ||
210 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
211 | fscache_stat(&fscache_n_op_pend); | ||
212 | fscache_start_operations(object); | ||
213 | } else { | ||
214 | ASSERTCMP(object->n_exclusive, ==, 0); | ||
215 | fscache_run_op(object, op); | ||
216 | } | ||
217 | ret = 0; | ||
218 | } else if (object->state == FSCACHE_OBJECT_CREATING) { | ||
219 | op->object = object; | ||
220 | object->n_ops++; | ||
221 | atomic_inc(&op->usage); | ||
222 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
223 | fscache_stat(&fscache_n_op_pend); | ||
224 | ret = 0; | ||
225 | } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { | ||
226 | fscache_report_unexpected_submission(object, op, ostate); | ||
227 | ASSERT(!fscache_object_is_active(object)); | ||
228 | ret = -ENOBUFS; | ||
229 | } else { | ||
230 | ret = -ENOBUFS; | ||
231 | } | ||
232 | |||
233 | spin_unlock(&object->lock); | ||
234 | return ret; | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * queue an object for withdrawal on error, aborting all following asynchronous | ||
239 | * operations | ||
240 | */ | ||
241 | void fscache_abort_object(struct fscache_object *object) | ||
242 | { | ||
243 | _enter("{OBJ%x}", object->debug_id); | ||
244 | |||
245 | fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * jump start the operation processing on an object | ||
250 | * - caller must hold object->lock | ||
251 | */ | ||
252 | void fscache_start_operations(struct fscache_object *object) | ||
253 | { | ||
254 | struct fscache_operation *op; | ||
255 | bool stop = false; | ||
256 | |||
257 | while (!list_empty(&object->pending_ops) && !stop) { | ||
258 | op = list_entry(object->pending_ops.next, | ||
259 | struct fscache_operation, pend_link); | ||
260 | |||
261 | if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { | ||
262 | if (object->n_in_progress > 0) | ||
263 | break; | ||
264 | stop = true; | ||
265 | } | ||
266 | list_del_init(&op->pend_link); | ||
267 | object->n_in_progress++; | ||
268 | |||
269 | if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) | ||
270 | wake_up_bit(&op->flags, FSCACHE_OP_WAITING); | ||
271 | if (op->processor) | ||
272 | fscache_enqueue_operation(op); | ||
273 | |||
274 | /* the pending queue was holding a ref on the object */ | ||
275 | fscache_put_operation(op); | ||
276 | } | ||
277 | |||
278 | ASSERTCMP(object->n_in_progress, <=, object->n_ops); | ||
279 | |||
280 | _debug("woke %d ops on OBJ%x", | ||
281 | object->n_in_progress, object->debug_id); | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * release an operation | ||
286 | * - queues pending ops if this is the last in-progress op | ||
287 | */ | ||
288 | void fscache_put_operation(struct fscache_operation *op) | ||
289 | { | ||
290 | struct fscache_object *object; | ||
291 | struct fscache_cache *cache; | ||
292 | |||
293 | _enter("{OBJ%x OP%x,%d}", | ||
294 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); | ||
295 | |||
296 | ASSERTCMP(atomic_read(&op->usage), >, 0); | ||
297 | |||
298 | if (!atomic_dec_and_test(&op->usage)) | ||
299 | return; | ||
300 | |||
301 | _debug("PUT OP"); | ||
302 | if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) | ||
303 | BUG(); | ||
304 | |||
305 | fscache_stat(&fscache_n_op_release); | ||
306 | |||
307 | if (op->release) { | ||
308 | op->release(op); | ||
309 | op->release = NULL; | ||
310 | } | ||
311 | |||
312 | object = op->object; | ||
313 | |||
314 | /* now... we may get called with the object spinlock held, so we | ||
315 | * complete the cleanup here only if we can immediately acquire the | ||
316 | * lock, and defer it otherwise */ | ||
317 | if (!spin_trylock(&object->lock)) { | ||
318 | _debug("defer put"); | ||
319 | fscache_stat(&fscache_n_op_deferred_release); | ||
320 | |||
321 | cache = object->cache; | ||
322 | spin_lock(&cache->op_gc_list_lock); | ||
323 | list_add_tail(&op->pend_link, &cache->op_gc_list); | ||
324 | spin_unlock(&cache->op_gc_list_lock); | ||
325 | schedule_work(&cache->op_gc); | ||
326 | _leave(" [defer]"); | ||
327 | return; | ||
328 | } | ||
329 | |||
330 | if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { | ||
331 | ASSERTCMP(object->n_exclusive, >, 0); | ||
332 | object->n_exclusive--; | ||
333 | } | ||
334 | |||
335 | ASSERTCMP(object->n_in_progress, >, 0); | ||
336 | object->n_in_progress--; | ||
337 | if (object->n_in_progress == 0) | ||
338 | fscache_start_operations(object); | ||
339 | |||
340 | ASSERTCMP(object->n_ops, >, 0); | ||
341 | object->n_ops--; | ||
342 | if (object->n_ops == 0) | ||
343 | fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); | ||
344 | |||
345 | spin_unlock(&object->lock); | ||
346 | |||
347 | kfree(op); | ||
348 | _leave(" [done]"); | ||
349 | } | ||
350 | EXPORT_SYMBOL(fscache_put_operation); | ||
351 | |||
352 | /* | ||
353 | * garbage collect operations that have had their release deferred | ||
354 | */ | ||
355 | void fscache_operation_gc(struct work_struct *work) | ||
356 | { | ||
357 | struct fscache_operation *op; | ||
358 | struct fscache_object *object; | ||
359 | struct fscache_cache *cache = | ||
360 | container_of(work, struct fscache_cache, op_gc); | ||
361 | int count = 0; | ||
362 | |||
363 | _enter(""); | ||
364 | |||
365 | do { | ||
366 | spin_lock(&cache->op_gc_list_lock); | ||
367 | if (list_empty(&cache->op_gc_list)) { | ||
368 | spin_unlock(&cache->op_gc_list_lock); | ||
369 | break; | ||
370 | } | ||
371 | |||
372 | op = list_entry(cache->op_gc_list.next, | ||
373 | struct fscache_operation, pend_link); | ||
374 | list_del(&op->pend_link); | ||
375 | spin_unlock(&cache->op_gc_list_lock); | ||
376 | |||
377 | object = op->object; | ||
378 | |||
379 | _debug("GC DEFERRED REL OBJ%x OP%x", | ||
380 | object->debug_id, op->debug_id); | ||
381 | fscache_stat(&fscache_n_op_gc); | ||
382 | |||
383 | ASSERTCMP(atomic_read(&op->usage), ==, 0); | ||
384 | |||
385 | spin_lock(&object->lock); | ||
386 | if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { | ||
387 | ASSERTCMP(object->n_exclusive, >, 0); | ||
388 | object->n_exclusive--; | ||
389 | } | ||
390 | |||
391 | ASSERTCMP(object->n_in_progress, >, 0); | ||
392 | object->n_in_progress--; | ||
393 | if (object->n_in_progress == 0) | ||
394 | fscache_start_operations(object); | ||
395 | |||
396 | ASSERTCMP(object->n_ops, >, 0); | ||
397 | object->n_ops--; | ||
398 | if (object->n_ops == 0) | ||
399 | fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); | ||
400 | |||
401 | spin_unlock(&object->lock); | ||
402 | |||
403 | } while (count++ < 20); | ||
404 | |||
405 | if (!list_empty(&cache->op_gc_list)) | ||
406 | schedule_work(&cache->op_gc); | ||
407 | |||
408 | _leave(""); | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * allow the slow work item processor to get a ref on an operation | ||
413 | */ | ||
414 | static int fscache_op_get_ref(struct slow_work *work) | ||
415 | { | ||
416 | struct fscache_operation *op = | ||
417 | container_of(work, struct fscache_operation, slow_work); | ||
418 | |||
419 | atomic_inc(&op->usage); | ||
420 | return 0; | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * allow the slow work item processor to discard a ref on an operation | ||
425 | */ | ||
426 | static void fscache_op_put_ref(struct slow_work *work) | ||
427 | { | ||
428 | struct fscache_operation *op = | ||
429 | container_of(work, struct fscache_operation, slow_work); | ||
430 | |||
431 | fscache_put_operation(op); | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * execute an operation using the slow thread pool to provide processing context | ||
436 | * - the caller holds a ref to this object, so we don't need to hold one | ||
437 | */ | ||
438 | static void fscache_op_execute(struct slow_work *work) | ||
439 | { | ||
440 | struct fscache_operation *op = | ||
441 | container_of(work, struct fscache_operation, slow_work); | ||
442 | unsigned long start; | ||
443 | |||
444 | _enter("{OBJ%x OP%x,%d}", | ||
445 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); | ||
446 | |||
447 | ASSERT(op->processor != NULL); | ||
448 | start = jiffies; | ||
449 | op->processor(op); | ||
450 | fscache_hist(fscache_ops_histogram, start); | ||
451 | |||
452 | _leave(""); | ||
453 | } | ||
454 | |||
455 | const struct slow_work_ops fscache_op_slow_work_ops = { | ||
456 | .get_ref = fscache_op_get_ref, | ||
457 | .put_ref = fscache_op_put_ref, | ||
458 | .execute = fscache_op_execute, | ||
459 | }; | ||
diff --git a/fs/fscache/page.c b/fs/fscache/page.c new file mode 100644 index 000000000000..2568e0eb644f --- /dev/null +++ b/fs/fscache/page.c | |||
@@ -0,0 +1,816 @@ | |||
1 | /* Cache page management and data I/O routines | ||
2 | * | ||
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL PAGE | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/fscache-cache.h> | ||
15 | #include <linux/buffer_head.h> | ||
16 | #include <linux/pagevec.h> | ||
17 | #include "internal.h" | ||
18 | |||
19 | /* | ||
20 | * check to see if a page is being written to the cache | ||
21 | */ | ||
22 | bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page) | ||
23 | { | ||
24 | void *val; | ||
25 | |||
26 | rcu_read_lock(); | ||
27 | val = radix_tree_lookup(&cookie->stores, page->index); | ||
28 | rcu_read_unlock(); | ||
29 | |||
30 | return val != NULL; | ||
31 | } | ||
32 | EXPORT_SYMBOL(__fscache_check_page_write); | ||
33 | |||
34 | /* | ||
35 | * wait for a page to finish being written to the cache | ||
36 | */ | ||
37 | void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *page) | ||
38 | { | ||
39 | wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); | ||
40 | |||
41 | wait_event(*wq, !__fscache_check_page_write(cookie, page)); | ||
42 | } | ||
43 | EXPORT_SYMBOL(__fscache_wait_on_page_write); | ||
44 | |||
45 | /* | ||
46 | * note that a page has finished being written to the cache | ||
47 | */ | ||
48 | static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page) | ||
49 | { | ||
50 | struct page *xpage; | ||
51 | |||
52 | spin_lock(&cookie->lock); | ||
53 | xpage = radix_tree_delete(&cookie->stores, page->index); | ||
54 | spin_unlock(&cookie->lock); | ||
55 | ASSERT(xpage != NULL); | ||
56 | |||
57 | wake_up_bit(&cookie->flags, 0); | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * actually apply the changed attributes to a cache object | ||
62 | */ | ||
63 | static void fscache_attr_changed_op(struct fscache_operation *op) | ||
64 | { | ||
65 | struct fscache_object *object = op->object; | ||
66 | |||
67 | _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id); | ||
68 | |||
69 | fscache_stat(&fscache_n_attr_changed_calls); | ||
70 | |||
71 | if (fscache_object_is_active(object) && | ||
72 | object->cache->ops->attr_changed(object) < 0) | ||
73 | fscache_abort_object(object); | ||
74 | |||
75 | _leave(""); | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * notification that the attributes on an object have changed | ||
80 | */ | ||
81 | int __fscache_attr_changed(struct fscache_cookie *cookie) | ||
82 | { | ||
83 | struct fscache_operation *op; | ||
84 | struct fscache_object *object; | ||
85 | |||
86 | _enter("%p", cookie); | ||
87 | |||
88 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
89 | |||
90 | fscache_stat(&fscache_n_attr_changed); | ||
91 | |||
92 | op = kzalloc(sizeof(*op), GFP_KERNEL); | ||
93 | if (!op) { | ||
94 | fscache_stat(&fscache_n_attr_changed_nomem); | ||
95 | _leave(" = -ENOMEM"); | ||
96 | return -ENOMEM; | ||
97 | } | ||
98 | |||
99 | fscache_operation_init(op, NULL); | ||
100 | fscache_operation_init_slow(op, fscache_attr_changed_op); | ||
101 | op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); | ||
102 | |||
103 | spin_lock(&cookie->lock); | ||
104 | |||
105 | if (hlist_empty(&cookie->backing_objects)) | ||
106 | goto nobufs; | ||
107 | object = hlist_entry(cookie->backing_objects.first, | ||
108 | struct fscache_object, cookie_link); | ||
109 | |||
110 | if (fscache_submit_exclusive_op(object, op) < 0) | ||
111 | goto nobufs; | ||
112 | spin_unlock(&cookie->lock); | ||
113 | fscache_stat(&fscache_n_attr_changed_ok); | ||
114 | fscache_put_operation(op); | ||
115 | _leave(" = 0"); | ||
116 | return 0; | ||
117 | |||
118 | nobufs: | ||
119 | spin_unlock(&cookie->lock); | ||
120 | kfree(op); | ||
121 | fscache_stat(&fscache_n_attr_changed_nobufs); | ||
122 | _leave(" = %d", -ENOBUFS); | ||
123 | return -ENOBUFS; | ||
124 | } | ||
125 | EXPORT_SYMBOL(__fscache_attr_changed); | ||
126 | |||
127 | /* | ||
128 | * handle secondary execution given to a retrieval op on behalf of the | ||
129 | * cache | ||
130 | */ | ||
131 | static void fscache_retrieval_work(struct work_struct *work) | ||
132 | { | ||
133 | struct fscache_retrieval *op = | ||
134 | container_of(work, struct fscache_retrieval, op.fast_work); | ||
135 | unsigned long start; | ||
136 | |||
137 | _enter("{OP%x}", op->op.debug_id); | ||
138 | |||
139 | start = jiffies; | ||
140 | op->op.processor(&op->op); | ||
141 | fscache_hist(fscache_ops_histogram, start); | ||
142 | fscache_put_operation(&op->op); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * release a retrieval op reference | ||
147 | */ | ||
148 | static void fscache_release_retrieval_op(struct fscache_operation *_op) | ||
149 | { | ||
150 | struct fscache_retrieval *op = | ||
151 | container_of(_op, struct fscache_retrieval, op); | ||
152 | |||
153 | _enter("{OP%x}", op->op.debug_id); | ||
154 | |||
155 | fscache_hist(fscache_retrieval_histogram, op->start_time); | ||
156 | if (op->context) | ||
157 | fscache_put_context(op->op.object->cookie, op->context); | ||
158 | |||
159 | _leave(""); | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * allocate a retrieval op | ||
164 | */ | ||
165 | static struct fscache_retrieval *fscache_alloc_retrieval( | ||
166 | struct address_space *mapping, | ||
167 | fscache_rw_complete_t end_io_func, | ||
168 | void *context) | ||
169 | { | ||
170 | struct fscache_retrieval *op; | ||
171 | |||
172 | /* allocate a retrieval operation and attempt to submit it */ | ||
173 | op = kzalloc(sizeof(*op), GFP_NOIO); | ||
174 | if (!op) { | ||
175 | fscache_stat(&fscache_n_retrievals_nomem); | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | fscache_operation_init(&op->op, fscache_release_retrieval_op); | ||
180 | op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); | ||
181 | op->mapping = mapping; | ||
182 | op->end_io_func = end_io_func; | ||
183 | op->context = context; | ||
184 | op->start_time = jiffies; | ||
185 | INIT_WORK(&op->op.fast_work, fscache_retrieval_work); | ||
186 | INIT_LIST_HEAD(&op->to_do); | ||
187 | return op; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * wait for a deferred lookup to complete | ||
192 | */ | ||
193 | static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) | ||
194 | { | ||
195 | unsigned long jif; | ||
196 | |||
197 | _enter(""); | ||
198 | |||
199 | if (!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) { | ||
200 | _leave(" = 0 [imm]"); | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | fscache_stat(&fscache_n_retrievals_wait); | ||
205 | |||
206 | jif = jiffies; | ||
207 | if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, | ||
208 | fscache_wait_bit_interruptible, | ||
209 | TASK_INTERRUPTIBLE) != 0) { | ||
210 | fscache_stat(&fscache_n_retrievals_intr); | ||
211 | _leave(" = -ERESTARTSYS"); | ||
212 | return -ERESTARTSYS; | ||
213 | } | ||
214 | |||
215 | ASSERT(!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)); | ||
216 | |||
217 | smp_rmb(); | ||
218 | fscache_hist(fscache_retrieval_delay_histogram, jif); | ||
219 | _leave(" = 0 [dly]"); | ||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * read a page from the cache or allocate a block in which to store it | ||
225 | * - we return: | ||
226 | * -ENOMEM - out of memory, nothing done | ||
227 | * -ERESTARTSYS - interrupted | ||
228 | * -ENOBUFS - no backing object available in which to cache the block | ||
229 | * -ENODATA - no data available in the backing object for this block | ||
230 | * 0 - dispatched a read - it'll call end_io_func() when finished | ||
231 | */ | ||
232 | int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, | ||
233 | struct page *page, | ||
234 | fscache_rw_complete_t end_io_func, | ||
235 | void *context, | ||
236 | gfp_t gfp) | ||
237 | { | ||
238 | struct fscache_retrieval *op; | ||
239 | struct fscache_object *object; | ||
240 | int ret; | ||
241 | |||
242 | _enter("%p,%p,,,", cookie, page); | ||
243 | |||
244 | fscache_stat(&fscache_n_retrievals); | ||
245 | |||
246 | if (hlist_empty(&cookie->backing_objects)) | ||
247 | goto nobufs; | ||
248 | |||
249 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
250 | ASSERTCMP(page, !=, NULL); | ||
251 | |||
252 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | ||
253 | return -ERESTARTSYS; | ||
254 | |||
255 | op = fscache_alloc_retrieval(page->mapping, end_io_func, context); | ||
256 | if (!op) { | ||
257 | _leave(" = -ENOMEM"); | ||
258 | return -ENOMEM; | ||
259 | } | ||
260 | |||
261 | spin_lock(&cookie->lock); | ||
262 | |||
263 | if (hlist_empty(&cookie->backing_objects)) | ||
264 | goto nobufs_unlock; | ||
265 | object = hlist_entry(cookie->backing_objects.first, | ||
266 | struct fscache_object, cookie_link); | ||
267 | |||
268 | ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); | ||
269 | |||
270 | if (fscache_submit_op(object, &op->op) < 0) | ||
271 | goto nobufs_unlock; | ||
272 | spin_unlock(&cookie->lock); | ||
273 | |||
274 | fscache_stat(&fscache_n_retrieval_ops); | ||
275 | |||
276 | /* pin the netfs read context in case we need to do the actual netfs | ||
277 | * read because we've encountered a cache read failure */ | ||
278 | fscache_get_context(object->cookie, op->context); | ||
279 | |||
280 | /* we wait for the operation to become active, and then process it | ||
281 | * *here*, in this thread, and not in the thread pool */ | ||
282 | if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { | ||
283 | _debug(">>> WT"); | ||
284 | fscache_stat(&fscache_n_retrieval_op_waits); | ||
285 | wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, | ||
286 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
287 | _debug("<<< GO"); | ||
288 | } | ||
289 | |||
290 | /* ask the cache to honour the operation */ | ||
291 | if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { | ||
292 | ret = object->cache->ops->allocate_page(op, page, gfp); | ||
293 | if (ret == 0) | ||
294 | ret = -ENODATA; | ||
295 | } else { | ||
296 | ret = object->cache->ops->read_or_alloc_page(op, page, gfp); | ||
297 | } | ||
298 | |||
299 | if (ret == -ENOMEM) | ||
300 | fscache_stat(&fscache_n_retrievals_nomem); | ||
301 | else if (ret == -ERESTARTSYS) | ||
302 | fscache_stat(&fscache_n_retrievals_intr); | ||
303 | else if (ret == -ENODATA) | ||
304 | fscache_stat(&fscache_n_retrievals_nodata); | ||
305 | else if (ret < 0) | ||
306 | fscache_stat(&fscache_n_retrievals_nobufs); | ||
307 | else | ||
308 | fscache_stat(&fscache_n_retrievals_ok); | ||
309 | |||
310 | fscache_put_retrieval(op); | ||
311 | _leave(" = %d", ret); | ||
312 | return ret; | ||
313 | |||
314 | nobufs_unlock: | ||
315 | spin_unlock(&cookie->lock); | ||
316 | kfree(op); | ||
317 | nobufs: | ||
318 | fscache_stat(&fscache_n_retrievals_nobufs); | ||
319 | _leave(" = -ENOBUFS"); | ||
320 | return -ENOBUFS; | ||
321 | } | ||
322 | EXPORT_SYMBOL(__fscache_read_or_alloc_page); | ||
323 | |||
324 | /* | ||
325 | * read a list of page from the cache or allocate a block in which to store | ||
326 | * them | ||
327 | * - we return: | ||
328 | * -ENOMEM - out of memory, some pages may be being read | ||
329 | * -ERESTARTSYS - interrupted, some pages may be being read | ||
330 | * -ENOBUFS - no backing object or space available in which to cache any | ||
331 | * pages not being read | ||
332 | * -ENODATA - no data available in the backing object for some or all of | ||
333 | * the pages | ||
334 | * 0 - dispatched a read on all pages | ||
335 | * | ||
336 | * end_io_func() will be called for each page read from the cache as it is | ||
337 | * finishes being read | ||
338 | * | ||
339 | * any pages for which a read is dispatched will be removed from pages and | ||
340 | * nr_pages | ||
341 | */ | ||
342 | int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | ||
343 | struct address_space *mapping, | ||
344 | struct list_head *pages, | ||
345 | unsigned *nr_pages, | ||
346 | fscache_rw_complete_t end_io_func, | ||
347 | void *context, | ||
348 | gfp_t gfp) | ||
349 | { | ||
350 | fscache_pages_retrieval_func_t func; | ||
351 | struct fscache_retrieval *op; | ||
352 | struct fscache_object *object; | ||
353 | int ret; | ||
354 | |||
355 | _enter("%p,,%d,,,", cookie, *nr_pages); | ||
356 | |||
357 | fscache_stat(&fscache_n_retrievals); | ||
358 | |||
359 | if (hlist_empty(&cookie->backing_objects)) | ||
360 | goto nobufs; | ||
361 | |||
362 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
363 | ASSERTCMP(*nr_pages, >, 0); | ||
364 | ASSERT(!list_empty(pages)); | ||
365 | |||
366 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | ||
367 | return -ERESTARTSYS; | ||
368 | |||
369 | op = fscache_alloc_retrieval(mapping, end_io_func, context); | ||
370 | if (!op) | ||
371 | return -ENOMEM; | ||
372 | |||
373 | spin_lock(&cookie->lock); | ||
374 | |||
375 | if (hlist_empty(&cookie->backing_objects)) | ||
376 | goto nobufs_unlock; | ||
377 | object = hlist_entry(cookie->backing_objects.first, | ||
378 | struct fscache_object, cookie_link); | ||
379 | |||
380 | if (fscache_submit_op(object, &op->op) < 0) | ||
381 | goto nobufs_unlock; | ||
382 | spin_unlock(&cookie->lock); | ||
383 | |||
384 | fscache_stat(&fscache_n_retrieval_ops); | ||
385 | |||
386 | /* pin the netfs read context in case we need to do the actual netfs | ||
387 | * read because we've encountered a cache read failure */ | ||
388 | fscache_get_context(object->cookie, op->context); | ||
389 | |||
390 | /* we wait for the operation to become active, and then process it | ||
391 | * *here*, in this thread, and not in the thread pool */ | ||
392 | if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { | ||
393 | _debug(">>> WT"); | ||
394 | fscache_stat(&fscache_n_retrieval_op_waits); | ||
395 | wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, | ||
396 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
397 | _debug("<<< GO"); | ||
398 | } | ||
399 | |||
400 | /* ask the cache to honour the operation */ | ||
401 | if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) | ||
402 | func = object->cache->ops->allocate_pages; | ||
403 | else | ||
404 | func = object->cache->ops->read_or_alloc_pages; | ||
405 | ret = func(op, pages, nr_pages, gfp); | ||
406 | |||
407 | if (ret == -ENOMEM) | ||
408 | fscache_stat(&fscache_n_retrievals_nomem); | ||
409 | else if (ret == -ERESTARTSYS) | ||
410 | fscache_stat(&fscache_n_retrievals_intr); | ||
411 | else if (ret == -ENODATA) | ||
412 | fscache_stat(&fscache_n_retrievals_nodata); | ||
413 | else if (ret < 0) | ||
414 | fscache_stat(&fscache_n_retrievals_nobufs); | ||
415 | else | ||
416 | fscache_stat(&fscache_n_retrievals_ok); | ||
417 | |||
418 | fscache_put_retrieval(op); | ||
419 | _leave(" = %d", ret); | ||
420 | return ret; | ||
421 | |||
422 | nobufs_unlock: | ||
423 | spin_unlock(&cookie->lock); | ||
424 | kfree(op); | ||
425 | nobufs: | ||
426 | fscache_stat(&fscache_n_retrievals_nobufs); | ||
427 | _leave(" = -ENOBUFS"); | ||
428 | return -ENOBUFS; | ||
429 | } | ||
430 | EXPORT_SYMBOL(__fscache_read_or_alloc_pages); | ||
431 | |||
432 | /* | ||
433 | * allocate a block in the cache on which to store a page | ||
434 | * - we return: | ||
435 | * -ENOMEM - out of memory, nothing done | ||
436 | * -ERESTARTSYS - interrupted | ||
437 | * -ENOBUFS - no backing object available in which to cache the block | ||
438 | * 0 - block allocated | ||
439 | */ | ||
440 | int __fscache_alloc_page(struct fscache_cookie *cookie, | ||
441 | struct page *page, | ||
442 | gfp_t gfp) | ||
443 | { | ||
444 | struct fscache_retrieval *op; | ||
445 | struct fscache_object *object; | ||
446 | int ret; | ||
447 | |||
448 | _enter("%p,%p,,,", cookie, page); | ||
449 | |||
450 | fscache_stat(&fscache_n_allocs); | ||
451 | |||
452 | if (hlist_empty(&cookie->backing_objects)) | ||
453 | goto nobufs; | ||
454 | |||
455 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
456 | ASSERTCMP(page, !=, NULL); | ||
457 | |||
458 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | ||
459 | return -ERESTARTSYS; | ||
460 | |||
461 | op = fscache_alloc_retrieval(page->mapping, NULL, NULL); | ||
462 | if (!op) | ||
463 | return -ENOMEM; | ||
464 | |||
465 | spin_lock(&cookie->lock); | ||
466 | |||
467 | if (hlist_empty(&cookie->backing_objects)) | ||
468 | goto nobufs_unlock; | ||
469 | object = hlist_entry(cookie->backing_objects.first, | ||
470 | struct fscache_object, cookie_link); | ||
471 | |||
472 | if (fscache_submit_op(object, &op->op) < 0) | ||
473 | goto nobufs_unlock; | ||
474 | spin_unlock(&cookie->lock); | ||
475 | |||
476 | fscache_stat(&fscache_n_alloc_ops); | ||
477 | |||
478 | if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { | ||
479 | _debug(">>> WT"); | ||
480 | fscache_stat(&fscache_n_alloc_op_waits); | ||
481 | wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, | ||
482 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
483 | _debug("<<< GO"); | ||
484 | } | ||
485 | |||
486 | /* ask the cache to honour the operation */ | ||
487 | ret = object->cache->ops->allocate_page(op, page, gfp); | ||
488 | |||
489 | if (ret < 0) | ||
490 | fscache_stat(&fscache_n_allocs_nobufs); | ||
491 | else | ||
492 | fscache_stat(&fscache_n_allocs_ok); | ||
493 | |||
494 | fscache_put_retrieval(op); | ||
495 | _leave(" = %d", ret); | ||
496 | return ret; | ||
497 | |||
498 | nobufs_unlock: | ||
499 | spin_unlock(&cookie->lock); | ||
500 | kfree(op); | ||
501 | nobufs: | ||
502 | fscache_stat(&fscache_n_allocs_nobufs); | ||
503 | _leave(" = -ENOBUFS"); | ||
504 | return -ENOBUFS; | ||
505 | } | ||
506 | EXPORT_SYMBOL(__fscache_alloc_page); | ||
507 | |||
508 | /* | ||
509 | * release a write op reference | ||
510 | */ | ||
511 | static void fscache_release_write_op(struct fscache_operation *_op) | ||
512 | { | ||
513 | _enter("{OP%x}", _op->debug_id); | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * perform the background storage of a page into the cache | ||
518 | */ | ||
519 | static void fscache_write_op(struct fscache_operation *_op) | ||
520 | { | ||
521 | struct fscache_storage *op = | ||
522 | container_of(_op, struct fscache_storage, op); | ||
523 | struct fscache_object *object = op->op.object; | ||
524 | struct fscache_cookie *cookie = object->cookie; | ||
525 | struct page *page; | ||
526 | unsigned n; | ||
527 | void *results[1]; | ||
528 | int ret; | ||
529 | |||
530 | _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); | ||
531 | |||
532 | spin_lock(&cookie->lock); | ||
533 | spin_lock(&object->lock); | ||
534 | |||
535 | if (!fscache_object_is_active(object)) { | ||
536 | spin_unlock(&object->lock); | ||
537 | spin_unlock(&cookie->lock); | ||
538 | _leave(""); | ||
539 | return; | ||
540 | } | ||
541 | |||
542 | fscache_stat(&fscache_n_store_calls); | ||
543 | |||
544 | /* find a page to store */ | ||
545 | page = NULL; | ||
546 | n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1, | ||
547 | FSCACHE_COOKIE_PENDING_TAG); | ||
548 | if (n != 1) | ||
549 | goto superseded; | ||
550 | page = results[0]; | ||
551 | _debug("gang %d [%lx]", n, page->index); | ||
552 | if (page->index > op->store_limit) | ||
553 | goto superseded; | ||
554 | |||
555 | radix_tree_tag_clear(&cookie->stores, page->index, | ||
556 | FSCACHE_COOKIE_PENDING_TAG); | ||
557 | |||
558 | spin_unlock(&object->lock); | ||
559 | spin_unlock(&cookie->lock); | ||
560 | |||
561 | if (page) { | ||
562 | ret = object->cache->ops->write_page(op, page); | ||
563 | fscache_end_page_write(cookie, page); | ||
564 | page_cache_release(page); | ||
565 | if (ret < 0) | ||
566 | fscache_abort_object(object); | ||
567 | else | ||
568 | fscache_enqueue_operation(&op->op); | ||
569 | } | ||
570 | |||
571 | _leave(""); | ||
572 | return; | ||
573 | |||
574 | superseded: | ||
575 | /* this writer is going away and there aren't any more things to | ||
576 | * write */ | ||
577 | _debug("cease"); | ||
578 | clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); | ||
579 | spin_unlock(&object->lock); | ||
580 | spin_unlock(&cookie->lock); | ||
581 | _leave(""); | ||
582 | } | ||
583 | |||
584 | /* | ||
585 | * request a page be stored in the cache | ||
586 | * - returns: | ||
587 | * -ENOMEM - out of memory, nothing done | ||
588 | * -ENOBUFS - no backing object available in which to cache the page | ||
589 | * 0 - dispatched a write - it'll call end_io_func() when finished | ||
590 | * | ||
591 | * if the cookie still has a backing object at this point, that object can be | ||
592 | * in one of a few states with respect to storage processing: | ||
593 | * | ||
594 | * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is | ||
595 | * set) | ||
596 | * | ||
597 | * (a) no writes yet (set FSCACHE_COOKIE_PENDING_FILL and queue deferred | ||
598 | * fill op) | ||
599 | * | ||
600 | * (b) writes deferred till post-creation (mark page for writing and | ||
601 | * return immediately) | ||
602 | * | ||
603 | * (2) negative lookup, object created, initial fill being made from netfs | ||
604 | * (FSCACHE_COOKIE_INITIAL_FILL is set) | ||
605 | * | ||
606 | * (a) fill point not yet reached this page (mark page for writing and | ||
607 | * return) | ||
608 | * | ||
609 | * (b) fill point passed this page (queue op to store this page) | ||
610 | * | ||
611 | * (3) object extant (queue op to store this page) | ||
612 | * | ||
613 | * any other state is invalid | ||
614 | */ | ||
615 | int __fscache_write_page(struct fscache_cookie *cookie, | ||
616 | struct page *page, | ||
617 | gfp_t gfp) | ||
618 | { | ||
619 | struct fscache_storage *op; | ||
620 | struct fscache_object *object; | ||
621 | int ret; | ||
622 | |||
623 | _enter("%p,%x,", cookie, (u32) page->flags); | ||
624 | |||
625 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
626 | ASSERT(PageFsCache(page)); | ||
627 | |||
628 | fscache_stat(&fscache_n_stores); | ||
629 | |||
630 | op = kzalloc(sizeof(*op), GFP_NOIO); | ||
631 | if (!op) | ||
632 | goto nomem; | ||
633 | |||
634 | fscache_operation_init(&op->op, fscache_release_write_op); | ||
635 | fscache_operation_init_slow(&op->op, fscache_write_op); | ||
636 | op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); | ||
637 | |||
638 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | ||
639 | if (ret < 0) | ||
640 | goto nomem_free; | ||
641 | |||
642 | ret = -ENOBUFS; | ||
643 | spin_lock(&cookie->lock); | ||
644 | |||
645 | if (hlist_empty(&cookie->backing_objects)) | ||
646 | goto nobufs; | ||
647 | object = hlist_entry(cookie->backing_objects.first, | ||
648 | struct fscache_object, cookie_link); | ||
649 | if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) | ||
650 | goto nobufs; | ||
651 | |||
652 | /* add the page to the pending-storage radix tree on the backing | ||
653 | * object */ | ||
654 | spin_lock(&object->lock); | ||
655 | |||
656 | _debug("store limit %llx", (unsigned long long) object->store_limit); | ||
657 | |||
658 | ret = radix_tree_insert(&cookie->stores, page->index, page); | ||
659 | if (ret < 0) { | ||
660 | if (ret == -EEXIST) | ||
661 | goto already_queued; | ||
662 | _debug("insert failed %d", ret); | ||
663 | goto nobufs_unlock_obj; | ||
664 | } | ||
665 | |||
666 | radix_tree_tag_set(&cookie->stores, page->index, | ||
667 | FSCACHE_COOKIE_PENDING_TAG); | ||
668 | page_cache_get(page); | ||
669 | |||
670 | /* we only want one writer at a time, but we do need to queue new | ||
671 | * writers after exclusive ops */ | ||
672 | if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags)) | ||
673 | goto already_pending; | ||
674 | |||
675 | spin_unlock(&object->lock); | ||
676 | |||
677 | op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); | ||
678 | op->store_limit = object->store_limit; | ||
679 | |||
680 | if (fscache_submit_op(object, &op->op) < 0) | ||
681 | goto submit_failed; | ||
682 | |||
683 | spin_unlock(&cookie->lock); | ||
684 | radix_tree_preload_end(); | ||
685 | fscache_stat(&fscache_n_store_ops); | ||
686 | fscache_stat(&fscache_n_stores_ok); | ||
687 | |||
688 | /* the slow work queue now carries its own ref on the object */ | ||
689 | fscache_put_operation(&op->op); | ||
690 | _leave(" = 0"); | ||
691 | return 0; | ||
692 | |||
693 | already_queued: | ||
694 | fscache_stat(&fscache_n_stores_again); | ||
695 | already_pending: | ||
696 | spin_unlock(&object->lock); | ||
697 | spin_unlock(&cookie->lock); | ||
698 | radix_tree_preload_end(); | ||
699 | kfree(op); | ||
700 | fscache_stat(&fscache_n_stores_ok); | ||
701 | _leave(" = 0"); | ||
702 | return 0; | ||
703 | |||
704 | submit_failed: | ||
705 | radix_tree_delete(&cookie->stores, page->index); | ||
706 | page_cache_release(page); | ||
707 | ret = -ENOBUFS; | ||
708 | goto nobufs; | ||
709 | |||
710 | nobufs_unlock_obj: | ||
711 | spin_unlock(&object->lock); | ||
712 | nobufs: | ||
713 | spin_unlock(&cookie->lock); | ||
714 | radix_tree_preload_end(); | ||
715 | kfree(op); | ||
716 | fscache_stat(&fscache_n_stores_nobufs); | ||
717 | _leave(" = -ENOBUFS"); | ||
718 | return -ENOBUFS; | ||
719 | |||
720 | nomem_free: | ||
721 | kfree(op); | ||
722 | nomem: | ||
723 | fscache_stat(&fscache_n_stores_oom); | ||
724 | _leave(" = -ENOMEM"); | ||
725 | return -ENOMEM; | ||
726 | } | ||
727 | EXPORT_SYMBOL(__fscache_write_page); | ||
728 | |||
729 | /* | ||
730 | * remove a page from the cache | ||
731 | */ | ||
732 | void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) | ||
733 | { | ||
734 | struct fscache_object *object; | ||
735 | |||
736 | _enter(",%p", page); | ||
737 | |||
738 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
739 | ASSERTCMP(page, !=, NULL); | ||
740 | |||
741 | fscache_stat(&fscache_n_uncaches); | ||
742 | |||
743 | /* cache withdrawal may beat us to it */ | ||
744 | if (!PageFsCache(page)) | ||
745 | goto done; | ||
746 | |||
747 | /* get the object */ | ||
748 | spin_lock(&cookie->lock); | ||
749 | |||
750 | if (hlist_empty(&cookie->backing_objects)) { | ||
751 | ClearPageFsCache(page); | ||
752 | goto done_unlock; | ||
753 | } | ||
754 | |||
755 | object = hlist_entry(cookie->backing_objects.first, | ||
756 | struct fscache_object, cookie_link); | ||
757 | |||
758 | /* there might now be stuff on disk we could read */ | ||
759 | clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | ||
760 | |||
761 | /* only invoke the cache backend if we managed to mark the page | ||
762 | * uncached here; this deals with synchronisation vs withdrawal */ | ||
763 | if (TestClearPageFsCache(page) && | ||
764 | object->cache->ops->uncache_page) { | ||
765 | /* the cache backend releases the cookie lock */ | ||
766 | object->cache->ops->uncache_page(object, page); | ||
767 | goto done; | ||
768 | } | ||
769 | |||
770 | done_unlock: | ||
771 | spin_unlock(&cookie->lock); | ||
772 | done: | ||
773 | _leave(""); | ||
774 | } | ||
775 | EXPORT_SYMBOL(__fscache_uncache_page); | ||
776 | |||
777 | /** | ||
778 | * fscache_mark_pages_cached - Mark pages as being cached | ||
779 | * @op: The retrieval op pages are being marked for | ||
780 | * @pagevec: The pages to be marked | ||
781 | * | ||
782 | * Mark a bunch of netfs pages as being cached. After this is called, | ||
783 | * the netfs must call fscache_uncache_page() to remove the mark. | ||
784 | */ | ||
785 | void fscache_mark_pages_cached(struct fscache_retrieval *op, | ||
786 | struct pagevec *pagevec) | ||
787 | { | ||
788 | struct fscache_cookie *cookie = op->op.object->cookie; | ||
789 | unsigned long loop; | ||
790 | |||
791 | #ifdef CONFIG_FSCACHE_STATS | ||
792 | atomic_add(pagevec->nr, &fscache_n_marks); | ||
793 | #endif | ||
794 | |||
795 | for (loop = 0; loop < pagevec->nr; loop++) { | ||
796 | struct page *page = pagevec->pages[loop]; | ||
797 | |||
798 | _debug("- mark %p{%lx}", page, page->index); | ||
799 | if (TestSetPageFsCache(page)) { | ||
800 | static bool once_only; | ||
801 | if (!once_only) { | ||
802 | once_only = true; | ||
803 | printk(KERN_WARNING "FS-Cache:" | ||
804 | " Cookie type %s marked page %lx" | ||
805 | " multiple times\n", | ||
806 | cookie->def->name, page->index); | ||
807 | } | ||
808 | } | ||
809 | } | ||
810 | |||
811 | if (cookie->def->mark_pages_cached) | ||
812 | cookie->def->mark_pages_cached(cookie->netfs_data, | ||
813 | op->mapping, pagevec); | ||
814 | pagevec_reinit(pagevec); | ||
815 | } | ||
816 | EXPORT_SYMBOL(fscache_mark_pages_cached); | ||
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c new file mode 100644 index 000000000000..beeab44bc31a --- /dev/null +++ b/fs/fscache/proc.c | |||
@@ -0,0 +1,68 @@ | |||
1 | /* FS-Cache statistics viewing interface | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL OPERATION | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/proc_fs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | /* | ||
19 | * initialise the /proc/fs/fscache/ directory | ||
20 | */ | ||
21 | int __init fscache_proc_init(void) | ||
22 | { | ||
23 | _enter(""); | ||
24 | |||
25 | if (!proc_mkdir("fs/fscache", NULL)) | ||
26 | goto error_dir; | ||
27 | |||
28 | #ifdef CONFIG_FSCACHE_STATS | ||
29 | if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL, | ||
30 | &fscache_stats_fops)) | ||
31 | goto error_stats; | ||
32 | #endif | ||
33 | |||
34 | #ifdef CONFIG_FSCACHE_HISTOGRAM | ||
35 | if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL, | ||
36 | &fscache_histogram_fops)) | ||
37 | goto error_histogram; | ||
38 | #endif | ||
39 | |||
40 | _leave(" = 0"); | ||
41 | return 0; | ||
42 | |||
43 | #ifdef CONFIG_FSCACHE_HISTOGRAM | ||
44 | error_histogram: | ||
45 | #endif | ||
46 | #ifdef CONFIG_FSCACHE_STATS | ||
47 | remove_proc_entry("fs/fscache/stats", NULL); | ||
48 | error_stats: | ||
49 | #endif | ||
50 | remove_proc_entry("fs/fscache", NULL); | ||
51 | error_dir: | ||
52 | _leave(" = -ENOMEM"); | ||
53 | return -ENOMEM; | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * clean up the /proc/fs/fscache/ directory | ||
58 | */ | ||
59 | void fscache_proc_cleanup(void) | ||
60 | { | ||
61 | #ifdef CONFIG_FSCACHE_HISTOGRAM | ||
62 | remove_proc_entry("fs/fscache/histogram", NULL); | ||
63 | #endif | ||
64 | #ifdef CONFIG_FSCACHE_STATS | ||
65 | remove_proc_entry("fs/fscache/stats", NULL); | ||
66 | #endif | ||
67 | remove_proc_entry("fs/fscache", NULL); | ||
68 | } | ||
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c new file mode 100644 index 000000000000..65deb99e756b --- /dev/null +++ b/fs/fscache/stats.c | |||
@@ -0,0 +1,212 @@ | |||
1 | /* FS-Cache statistics | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL THREAD | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/proc_fs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | /* | ||
19 | * operation counters | ||
20 | */ | ||
21 | atomic_t fscache_n_op_pend; | ||
22 | atomic_t fscache_n_op_run; | ||
23 | atomic_t fscache_n_op_enqueue; | ||
24 | atomic_t fscache_n_op_requeue; | ||
25 | atomic_t fscache_n_op_deferred_release; | ||
26 | atomic_t fscache_n_op_release; | ||
27 | atomic_t fscache_n_op_gc; | ||
28 | |||
29 | atomic_t fscache_n_attr_changed; | ||
30 | atomic_t fscache_n_attr_changed_ok; | ||
31 | atomic_t fscache_n_attr_changed_nobufs; | ||
32 | atomic_t fscache_n_attr_changed_nomem; | ||
33 | atomic_t fscache_n_attr_changed_calls; | ||
34 | |||
35 | atomic_t fscache_n_allocs; | ||
36 | atomic_t fscache_n_allocs_ok; | ||
37 | atomic_t fscache_n_allocs_wait; | ||
38 | atomic_t fscache_n_allocs_nobufs; | ||
39 | atomic_t fscache_n_alloc_ops; | ||
40 | atomic_t fscache_n_alloc_op_waits; | ||
41 | |||
42 | atomic_t fscache_n_retrievals; | ||
43 | atomic_t fscache_n_retrievals_ok; | ||
44 | atomic_t fscache_n_retrievals_wait; | ||
45 | atomic_t fscache_n_retrievals_nodata; | ||
46 | atomic_t fscache_n_retrievals_nobufs; | ||
47 | atomic_t fscache_n_retrievals_intr; | ||
48 | atomic_t fscache_n_retrievals_nomem; | ||
49 | atomic_t fscache_n_retrieval_ops; | ||
50 | atomic_t fscache_n_retrieval_op_waits; | ||
51 | |||
52 | atomic_t fscache_n_stores; | ||
53 | atomic_t fscache_n_stores_ok; | ||
54 | atomic_t fscache_n_stores_again; | ||
55 | atomic_t fscache_n_stores_nobufs; | ||
56 | atomic_t fscache_n_stores_oom; | ||
57 | atomic_t fscache_n_store_ops; | ||
58 | atomic_t fscache_n_store_calls; | ||
59 | |||
60 | atomic_t fscache_n_marks; | ||
61 | atomic_t fscache_n_uncaches; | ||
62 | |||
63 | atomic_t fscache_n_acquires; | ||
64 | atomic_t fscache_n_acquires_null; | ||
65 | atomic_t fscache_n_acquires_no_cache; | ||
66 | atomic_t fscache_n_acquires_ok; | ||
67 | atomic_t fscache_n_acquires_nobufs; | ||
68 | atomic_t fscache_n_acquires_oom; | ||
69 | |||
70 | atomic_t fscache_n_updates; | ||
71 | atomic_t fscache_n_updates_null; | ||
72 | atomic_t fscache_n_updates_run; | ||
73 | |||
74 | atomic_t fscache_n_relinquishes; | ||
75 | atomic_t fscache_n_relinquishes_null; | ||
76 | atomic_t fscache_n_relinquishes_waitcrt; | ||
77 | |||
78 | atomic_t fscache_n_cookie_index; | ||
79 | atomic_t fscache_n_cookie_data; | ||
80 | atomic_t fscache_n_cookie_special; | ||
81 | |||
82 | atomic_t fscache_n_object_alloc; | ||
83 | atomic_t fscache_n_object_no_alloc; | ||
84 | atomic_t fscache_n_object_lookups; | ||
85 | atomic_t fscache_n_object_lookups_negative; | ||
86 | atomic_t fscache_n_object_lookups_positive; | ||
87 | atomic_t fscache_n_object_created; | ||
88 | atomic_t fscache_n_object_avail; | ||
89 | atomic_t fscache_n_object_dead; | ||
90 | |||
91 | atomic_t fscache_n_checkaux_none; | ||
92 | atomic_t fscache_n_checkaux_okay; | ||
93 | atomic_t fscache_n_checkaux_update; | ||
94 | atomic_t fscache_n_checkaux_obsolete; | ||
95 | |||
96 | /* | ||
97 | * display the general statistics | ||
98 | */ | ||
99 | static int fscache_stats_show(struct seq_file *m, void *v) | ||
100 | { | ||
101 | seq_puts(m, "FS-Cache statistics\n"); | ||
102 | |||
103 | seq_printf(m, "Cookies: idx=%u dat=%u spc=%u\n", | ||
104 | atomic_read(&fscache_n_cookie_index), | ||
105 | atomic_read(&fscache_n_cookie_data), | ||
106 | atomic_read(&fscache_n_cookie_special)); | ||
107 | |||
108 | seq_printf(m, "Objects: alc=%u nal=%u avl=%u ded=%u\n", | ||
109 | atomic_read(&fscache_n_object_alloc), | ||
110 | atomic_read(&fscache_n_object_no_alloc), | ||
111 | atomic_read(&fscache_n_object_avail), | ||
112 | atomic_read(&fscache_n_object_dead)); | ||
113 | seq_printf(m, "ChkAux : non=%u ok=%u upd=%u obs=%u\n", | ||
114 | atomic_read(&fscache_n_checkaux_none), | ||
115 | atomic_read(&fscache_n_checkaux_okay), | ||
116 | atomic_read(&fscache_n_checkaux_update), | ||
117 | atomic_read(&fscache_n_checkaux_obsolete)); | ||
118 | |||
119 | seq_printf(m, "Pages : mrk=%u unc=%u\n", | ||
120 | atomic_read(&fscache_n_marks), | ||
121 | atomic_read(&fscache_n_uncaches)); | ||
122 | |||
123 | seq_printf(m, "Acquire: n=%u nul=%u noc=%u ok=%u nbf=%u" | ||
124 | " oom=%u\n", | ||
125 | atomic_read(&fscache_n_acquires), | ||
126 | atomic_read(&fscache_n_acquires_null), | ||
127 | atomic_read(&fscache_n_acquires_no_cache), | ||
128 | atomic_read(&fscache_n_acquires_ok), | ||
129 | atomic_read(&fscache_n_acquires_nobufs), | ||
130 | atomic_read(&fscache_n_acquires_oom)); | ||
131 | |||
132 | seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n", | ||
133 | atomic_read(&fscache_n_object_lookups), | ||
134 | atomic_read(&fscache_n_object_lookups_negative), | ||
135 | atomic_read(&fscache_n_object_lookups_positive), | ||
136 | atomic_read(&fscache_n_object_created)); | ||
137 | |||
138 | seq_printf(m, "Updates: n=%u nul=%u run=%u\n", | ||
139 | atomic_read(&fscache_n_updates), | ||
140 | atomic_read(&fscache_n_updates_null), | ||
141 | atomic_read(&fscache_n_updates_run)); | ||
142 | |||
143 | seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u\n", | ||
144 | atomic_read(&fscache_n_relinquishes), | ||
145 | atomic_read(&fscache_n_relinquishes_null), | ||
146 | atomic_read(&fscache_n_relinquishes_waitcrt)); | ||
147 | |||
148 | seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n", | ||
149 | atomic_read(&fscache_n_attr_changed), | ||
150 | atomic_read(&fscache_n_attr_changed_ok), | ||
151 | atomic_read(&fscache_n_attr_changed_nobufs), | ||
152 | atomic_read(&fscache_n_attr_changed_nomem), | ||
153 | atomic_read(&fscache_n_attr_changed_calls)); | ||
154 | |||
155 | seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u\n", | ||
156 | atomic_read(&fscache_n_allocs), | ||
157 | atomic_read(&fscache_n_allocs_ok), | ||
158 | atomic_read(&fscache_n_allocs_wait), | ||
159 | atomic_read(&fscache_n_allocs_nobufs)); | ||
160 | seq_printf(m, "Allocs : ops=%u owt=%u\n", | ||
161 | atomic_read(&fscache_n_alloc_ops), | ||
162 | atomic_read(&fscache_n_alloc_op_waits)); | ||
163 | |||
164 | seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u" | ||
165 | " int=%u oom=%u\n", | ||
166 | atomic_read(&fscache_n_retrievals), | ||
167 | atomic_read(&fscache_n_retrievals_ok), | ||
168 | atomic_read(&fscache_n_retrievals_wait), | ||
169 | atomic_read(&fscache_n_retrievals_nodata), | ||
170 | atomic_read(&fscache_n_retrievals_nobufs), | ||
171 | atomic_read(&fscache_n_retrievals_intr), | ||
172 | atomic_read(&fscache_n_retrievals_nomem)); | ||
173 | seq_printf(m, "Retrvls: ops=%u owt=%u\n", | ||
174 | atomic_read(&fscache_n_retrieval_ops), | ||
175 | atomic_read(&fscache_n_retrieval_op_waits)); | ||
176 | |||
177 | seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n", | ||
178 | atomic_read(&fscache_n_stores), | ||
179 | atomic_read(&fscache_n_stores_ok), | ||
180 | atomic_read(&fscache_n_stores_again), | ||
181 | atomic_read(&fscache_n_stores_nobufs), | ||
182 | atomic_read(&fscache_n_stores_oom)); | ||
183 | seq_printf(m, "Stores : ops=%u run=%u\n", | ||
184 | atomic_read(&fscache_n_store_ops), | ||
185 | atomic_read(&fscache_n_store_calls)); | ||
186 | |||
187 | seq_printf(m, "Ops : pend=%u run=%u enq=%u\n", | ||
188 | atomic_read(&fscache_n_op_pend), | ||
189 | atomic_read(&fscache_n_op_run), | ||
190 | atomic_read(&fscache_n_op_enqueue)); | ||
191 | seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", | ||
192 | atomic_read(&fscache_n_op_deferred_release), | ||
193 | atomic_read(&fscache_n_op_release), | ||
194 | atomic_read(&fscache_n_op_gc)); | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * open "/proc/fs/fscache/stats" allowing provision of a statistical summary | ||
200 | */ | ||
201 | static int fscache_stats_open(struct inode *inode, struct file *file) | ||
202 | { | ||
203 | return single_open(file, fscache_stats_show, NULL); | ||
204 | } | ||
205 | |||
206 | const struct file_operations fscache_stats_fops = { | ||
207 | .owner = THIS_MODULE, | ||
208 | .open = fscache_stats_open, | ||
209 | .read = seq_read, | ||
210 | .llseek = seq_lseek, | ||
211 | .release = seq_release, | ||
212 | }; | ||
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 06da05261e04..8b8eebc5614b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -1032,6 +1032,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | |||
1032 | fuse_put_request(fc, req); | 1032 | fuse_put_request(fc, req); |
1033 | return -ENOMEM; | 1033 | return -ENOMEM; |
1034 | } | 1034 | } |
1035 | req->out.argpages = 1; | ||
1035 | req->num_pages = 1; | 1036 | req->num_pages = 1; |
1036 | req->pages[0] = page; | 1037 | req->pages[0] = page; |
1037 | fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); | 1038 | fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4e340fedf768..2b25133524a3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -386,7 +386,6 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, | |||
386 | req->in.numargs = 1; | 386 | req->in.numargs = 1; |
387 | req->in.args[0].size = sizeof(struct fuse_read_in); | 387 | req->in.args[0].size = sizeof(struct fuse_read_in); |
388 | req->in.args[0].value = inarg; | 388 | req->in.args[0].value = inarg; |
389 | req->out.argpages = 1; | ||
390 | req->out.argvar = 1; | 389 | req->out.argvar = 1; |
391 | req->out.numargs = 1; | 390 | req->out.numargs = 1; |
392 | req->out.args[0].size = count; | 391 | req->out.args[0].size = count; |
@@ -453,6 +452,7 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
453 | attr_ver = fuse_get_attr_version(fc); | 452 | attr_ver = fuse_get_attr_version(fc); |
454 | 453 | ||
455 | req->out.page_zeroing = 1; | 454 | req->out.page_zeroing = 1; |
455 | req->out.argpages = 1; | ||
456 | req->num_pages = 1; | 456 | req->num_pages = 1; |
457 | req->pages[0] = page; | 457 | req->pages[0] = page; |
458 | num_read = fuse_send_read(req, file, inode, pos, count, NULL); | 458 | num_read = fuse_send_read(req, file, inode, pos, count, NULL); |
@@ -510,6 +510,8 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file, | |||
510 | struct fuse_conn *fc = get_fuse_conn(inode); | 510 | struct fuse_conn *fc = get_fuse_conn(inode); |
511 | loff_t pos = page_offset(req->pages[0]); | 511 | loff_t pos = page_offset(req->pages[0]); |
512 | size_t count = req->num_pages << PAGE_CACHE_SHIFT; | 512 | size_t count = req->num_pages << PAGE_CACHE_SHIFT; |
513 | |||
514 | req->out.argpages = 1; | ||
513 | req->out.page_zeroing = 1; | 515 | req->out.page_zeroing = 1; |
514 | fuse_read_fill(req, file, inode, pos, count, FUSE_READ); | 516 | fuse_read_fill(req, file, inode, pos, count, FUSE_READ); |
515 | req->misc.read.attr_ver = fuse_get_attr_version(fc); | 517 | req->misc.read.attr_ver = fuse_get_attr_version(fc); |
@@ -621,7 +623,6 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file, | |||
621 | inarg->flags = file ? file->f_flags : 0; | 623 | inarg->flags = file ? file->f_flags : 0; |
622 | req->in.h.opcode = FUSE_WRITE; | 624 | req->in.h.opcode = FUSE_WRITE; |
623 | req->in.h.nodeid = get_node_id(inode); | 625 | req->in.h.nodeid = get_node_id(inode); |
624 | req->in.argpages = 1; | ||
625 | req->in.numargs = 2; | 626 | req->in.numargs = 2; |
626 | if (fc->minor < 9) | 627 | if (fc->minor < 9) |
627 | req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; | 628 | req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; |
@@ -695,6 +696,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, | |||
695 | if (IS_ERR(req)) | 696 | if (IS_ERR(req)) |
696 | return PTR_ERR(req); | 697 | return PTR_ERR(req); |
697 | 698 | ||
699 | req->in.argpages = 1; | ||
698 | req->num_pages = 1; | 700 | req->num_pages = 1; |
699 | req->pages[0] = page; | 701 | req->pages[0] = page; |
700 | req->page_offset = offset; | 702 | req->page_offset = offset; |
@@ -771,6 +773,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, | |||
771 | size_t count = 0; | 773 | size_t count = 0; |
772 | int err; | 774 | int err; |
773 | 775 | ||
776 | req->in.argpages = 1; | ||
774 | req->page_offset = offset; | 777 | req->page_offset = offset; |
775 | 778 | ||
776 | do { | 779 | do { |
@@ -935,21 +938,28 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) | |||
935 | } | 938 | } |
936 | 939 | ||
937 | static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, | 940 | static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, |
938 | unsigned nbytes, int write) | 941 | unsigned *nbytesp, int write) |
939 | { | 942 | { |
943 | unsigned nbytes = *nbytesp; | ||
940 | unsigned long user_addr = (unsigned long) buf; | 944 | unsigned long user_addr = (unsigned long) buf; |
941 | unsigned offset = user_addr & ~PAGE_MASK; | 945 | unsigned offset = user_addr & ~PAGE_MASK; |
942 | int npages; | 946 | int npages; |
943 | 947 | ||
944 | /* This doesn't work with nfsd */ | 948 | /* Special case for kernel I/O: can copy directly into the buffer */ |
945 | if (!current->mm) | 949 | if (segment_eq(get_fs(), KERNEL_DS)) { |
946 | return -EPERM; | 950 | if (write) |
951 | req->in.args[1].value = (void *) user_addr; | ||
952 | else | ||
953 | req->out.args[0].value = (void *) user_addr; | ||
954 | |||
955 | return 0; | ||
956 | } | ||
947 | 957 | ||
948 | nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); | 958 | nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); |
949 | npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | 959 | npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; |
950 | npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); | 960 | npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); |
951 | down_read(¤t->mm->mmap_sem); | 961 | down_read(¤t->mm->mmap_sem); |
952 | npages = get_user_pages(current, current->mm, user_addr, npages, write, | 962 | npages = get_user_pages(current, current->mm, user_addr, npages, !write, |
953 | 0, req->pages, NULL); | 963 | 0, req->pages, NULL); |
954 | up_read(¤t->mm->mmap_sem); | 964 | up_read(¤t->mm->mmap_sem); |
955 | if (npages < 0) | 965 | if (npages < 0) |
@@ -957,6 +967,15 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, | |||
957 | 967 | ||
958 | req->num_pages = npages; | 968 | req->num_pages = npages; |
959 | req->page_offset = offset; | 969 | req->page_offset = offset; |
970 | |||
971 | if (write) | ||
972 | req->in.argpages = 1; | ||
973 | else | ||
974 | req->out.argpages = 1; | ||
975 | |||
976 | nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; | ||
977 | *nbytesp = min(*nbytesp, nbytes); | ||
978 | |||
960 | return 0; | 979 | return 0; |
961 | } | 980 | } |
962 | 981 | ||
@@ -979,15 +998,13 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, | |||
979 | 998 | ||
980 | while (count) { | 999 | while (count) { |
981 | size_t nres; | 1000 | size_t nres; |
982 | size_t nbytes_limit = min(count, nmax); | 1001 | size_t nbytes = min(count, nmax); |
983 | size_t nbytes; | 1002 | int err = fuse_get_user_pages(req, buf, &nbytes, write); |
984 | int err = fuse_get_user_pages(req, buf, nbytes_limit, !write); | ||
985 | if (err) { | 1003 | if (err) { |
986 | res = err; | 1004 | res = err; |
987 | break; | 1005 | break; |
988 | } | 1006 | } |
989 | nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; | 1007 | |
990 | nbytes = min(nbytes_limit, nbytes); | ||
991 | if (write) | 1008 | if (write) |
992 | nres = fuse_send_write(req, file, inode, pos, nbytes, | 1009 | nres = fuse_send_write(req, file, inode, pos, nbytes, |
993 | current->files); | 1010 | current->files); |
@@ -1163,6 +1180,7 @@ static int fuse_writepage_locked(struct page *page) | |||
1163 | fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); | 1180 | fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); |
1164 | 1181 | ||
1165 | copy_highpage(tmp_page, page); | 1182 | copy_highpage(tmp_page, page); |
1183 | req->in.argpages = 1; | ||
1166 | req->num_pages = 1; | 1184 | req->num_pages = 1; |
1167 | req->pages[0] = tmp_page; | 1185 | req->pages[0] = tmp_page; |
1168 | req->page_offset = 0; | 1186 | req->page_offset = 0; |
@@ -1274,6 +1292,15 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
1274 | return 0; | 1292 | return 0; |
1275 | } | 1293 | } |
1276 | 1294 | ||
1295 | static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) | ||
1296 | { | ||
1297 | /* Can't provide the coherency needed for MAP_SHARED */ | ||
1298 | if (vma->vm_flags & VM_MAYSHARE) | ||
1299 | return -ENODEV; | ||
1300 | |||
1301 | return generic_file_mmap(file, vma); | ||
1302 | } | ||
1303 | |||
1277 | static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, | 1304 | static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, |
1278 | struct file_lock *fl) | 1305 | struct file_lock *fl) |
1279 | { | 1306 | { |
@@ -1908,6 +1935,7 @@ static const struct file_operations fuse_direct_io_file_operations = { | |||
1908 | .llseek = fuse_file_llseek, | 1935 | .llseek = fuse_file_llseek, |
1909 | .read = fuse_direct_read, | 1936 | .read = fuse_direct_read, |
1910 | .write = fuse_direct_write, | 1937 | .write = fuse_direct_write, |
1938 | .mmap = fuse_direct_mmap, | ||
1911 | .open = fuse_open, | 1939 | .open = fuse_open, |
1912 | .flush = fuse_flush, | 1940 | .flush = fuse_flush, |
1913 | .release = fuse_release, | 1941 | .release = fuse_release, |
@@ -1917,7 +1945,7 @@ static const struct file_operations fuse_direct_io_file_operations = { | |||
1917 | .unlocked_ioctl = fuse_file_ioctl, | 1945 | .unlocked_ioctl = fuse_file_ioctl, |
1918 | .compat_ioctl = fuse_file_compat_ioctl, | 1946 | .compat_ioctl = fuse_file_compat_ioctl, |
1919 | .poll = fuse_file_poll, | 1947 | .poll = fuse_file_poll, |
1920 | /* no mmap and splice_read */ | 1948 | /* no splice_read */ |
1921 | }; | 1949 | }; |
1922 | 1950 | ||
1923 | static const struct address_space_operations fuse_file_aops = { | 1951 | static const struct address_space_operations fuse_file_aops = { |
diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 995d63b2e747..e0b53aa7bbec 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c | |||
@@ -134,7 +134,7 @@ generic_acl_init(struct inode *inode, struct inode *dir, | |||
134 | mode_t mode = inode->i_mode; | 134 | mode_t mode = inode->i_mode; |
135 | int error; | 135 | int error; |
136 | 136 | ||
137 | inode->i_mode = mode & ~current->fs->umask; | 137 | inode->i_mode = mode & ~current_umask(); |
138 | if (!S_ISLNK(inode->i_mode)) | 138 | if (!S_ISLNK(inode->i_mode)) |
139 | acl = ops->getacl(dir, ACL_TYPE_DEFAULT); | 139 | acl = ops->getacl(dir, ACL_TYPE_DEFAULT); |
140 | if (acl) { | 140 | if (acl) { |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 43764f4fa763..fa881bdc3d85 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -215,7 +215,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) | |||
215 | if (error) | 215 | if (error) |
216 | return error; | 216 | return error; |
217 | if (!acl) { | 217 | if (!acl) { |
218 | mode &= ~current->fs->umask; | 218 | mode &= ~current_umask(); |
219 | if (mode != ip->i_inode.i_mode) | 219 | if (mode != ip->i_inode.i_mode) |
220 | error = munge_mode(ip, mode); | 220 | error = munge_mode(ip, mode); |
221 | return error; | 221 | return error; |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index c8b5acf4b0b7..a36bb749926d 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -82,6 +82,7 @@ static void hfs_put_super(struct super_block *sb) | |||
82 | static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 82 | static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
83 | { | 83 | { |
84 | struct super_block *sb = dentry->d_sb; | 84 | struct super_block *sb = dentry->d_sb; |
85 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
85 | 86 | ||
86 | buf->f_type = HFS_SUPER_MAGIC; | 87 | buf->f_type = HFS_SUPER_MAGIC; |
87 | buf->f_bsize = sb->s_blocksize; | 88 | buf->f_bsize = sb->s_blocksize; |
@@ -90,6 +91,8 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
90 | buf->f_bavail = buf->f_bfree; | 91 | buf->f_bavail = buf->f_bfree; |
91 | buf->f_files = HFS_SB(sb)->fs_ablocks; | 92 | buf->f_files = HFS_SB(sb)->fs_ablocks; |
92 | buf->f_ffree = HFS_SB(sb)->free_ablocks; | 93 | buf->f_ffree = HFS_SB(sb)->free_ablocks; |
94 | buf->f_fsid.val[0] = (u32)id; | ||
95 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
93 | buf->f_namelen = HFS_NAMELEN; | 96 | buf->f_namelen = HFS_NAMELEN; |
94 | 97 | ||
95 | return 0; | 98 | return 0; |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index bab7f8d1bdfa..3fcbb0e1f6fc 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
@@ -48,7 +48,7 @@ void hfsplus_fill_defaults(struct hfsplus_sb_info *opts) | |||
48 | 48 | ||
49 | opts->creator = HFSPLUS_DEF_CR_TYPE; | 49 | opts->creator = HFSPLUS_DEF_CR_TYPE; |
50 | opts->type = HFSPLUS_DEF_CR_TYPE; | 50 | opts->type = HFSPLUS_DEF_CR_TYPE; |
51 | opts->umask = current->fs->umask; | 51 | opts->umask = current_umask(); |
52 | opts->uid = current_uid(); | 52 | opts->uid = current_uid(); |
53 | opts->gid = current_gid(); | 53 | opts->gid = current_gid(); |
54 | opts->part = -1; | 54 | opts->part = -1; |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index eb74531a0a8e..f2a64020f42e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -223,6 +223,7 @@ static void hfsplus_put_super(struct super_block *sb) | |||
223 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | 223 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) |
224 | { | 224 | { |
225 | struct super_block *sb = dentry->d_sb; | 225 | struct super_block *sb = dentry->d_sb; |
226 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
226 | 227 | ||
227 | buf->f_type = HFSPLUS_SUPER_MAGIC; | 228 | buf->f_type = HFSPLUS_SUPER_MAGIC; |
228 | buf->f_bsize = sb->s_blocksize; | 229 | buf->f_bsize = sb->s_blocksize; |
@@ -231,6 +232,8 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
231 | buf->f_bavail = buf->f_bfree; | 232 | buf->f_bavail = buf->f_bfree; |
232 | buf->f_files = 0xFFFFFFFF; | 233 | buf->f_files = 0xFFFFFFFF; |
233 | buf->f_ffree = 0xFFFFFFFF - HFSPLUS_SB(sb).next_cnid; | 234 | buf->f_ffree = 0xFFFFFFFF - HFSPLUS_SB(sb).next_cnid; |
235 | buf->f_fsid.val[0] = (u32)id; | ||
236 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
234 | buf->f_namelen = HFSPLUS_MAX_STRLEN; | 237 | buf->f_namelen = HFSPLUS_MAX_STRLEN; |
235 | 238 | ||
236 | return 0; | 239 | return 0; |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 0d049b8919c4..fecf402d7b8a 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -136,6 +136,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
136 | { | 136 | { |
137 | struct super_block *s = dentry->d_sb; | 137 | struct super_block *s = dentry->d_sb; |
138 | struct hpfs_sb_info *sbi = hpfs_sb(s); | 138 | struct hpfs_sb_info *sbi = hpfs_sb(s); |
139 | u64 id = huge_encode_dev(s->s_bdev->bd_dev); | ||
139 | lock_kernel(); | 140 | lock_kernel(); |
140 | 141 | ||
141 | /*if (sbi->sb_n_free == -1) {*/ | 142 | /*if (sbi->sb_n_free == -1) {*/ |
@@ -149,6 +150,8 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
149 | buf->f_bavail = sbi->sb_n_free; | 150 | buf->f_bavail = sbi->sb_n_free; |
150 | buf->f_files = sbi->sb_dirband_size / 4; | 151 | buf->f_files = sbi->sb_dirband_size / 4; |
151 | buf->f_ffree = sbi->sb_n_free_dnodes; | 152 | buf->f_ffree = sbi->sb_n_free_dnodes; |
153 | buf->f_fsid.val[0] = (u32)id; | ||
154 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
152 | buf->f_namelen = 254; | 155 | buf->f_namelen = 254; |
153 | 156 | ||
154 | unlock_kernel(); | 157 | unlock_kernel(); |
@@ -477,7 +480,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
477 | 480 | ||
478 | uid = current_uid(); | 481 | uid = current_uid(); |
479 | gid = current_gid(); | 482 | gid = current_gid(); |
480 | umask = current->fs->umask; | 483 | umask = current_umask(); |
481 | lowercase = 0; | 484 | lowercase = 0; |
482 | conv = CONV_BINARY; | 485 | conv = CONV_BINARY; |
483 | eas = 2; | 486 | eas = 2; |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index b278f7f52024..a5089a6dd67a 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -280,7 +280,12 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, | |||
280 | "errno = %d\n", err); | 280 | "errno = %d\n", err); |
281 | return err; | 281 | return err; |
282 | } | 282 | } |
283 | count = hppfs_read_file(hppfs->host_fd, buf, count); | 283 | err = hppfs_read_file(hppfs->host_fd, buf, count); |
284 | if (err < 0) { | ||
285 | printk(KERN_ERR "hppfs_read: read failed: %d\n", err); | ||
286 | return err; | ||
287 | } | ||
288 | count = err; | ||
284 | if (count > 0) | 289 | if (count > 0) |
285 | *ppos += count; | 290 | *ppos += count; |
286 | } | 291 | } |
diff --git a/fs/internal.h b/fs/internal.h index 53af885f1732..b4dac4fb6b61 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | struct super_block; | 12 | struct super_block; |
13 | struct linux_binprm; | 13 | struct linux_binprm; |
14 | struct path; | ||
14 | 15 | ||
15 | /* | 16 | /* |
16 | * block_dev.c | 17 | * block_dev.c |
@@ -43,7 +44,7 @@ extern void __init chrdev_init(void); | |||
43 | /* | 44 | /* |
44 | * exec.c | 45 | * exec.c |
45 | */ | 46 | */ |
46 | extern void check_unsafe_exec(struct linux_binprm *); | 47 | extern int check_unsafe_exec(struct linux_binprm *); |
47 | 48 | ||
48 | /* | 49 | /* |
49 | * namespace.c | 50 | * namespace.c |
@@ -60,3 +61,8 @@ extern void umount_tree(struct vfsmount *, int, struct list_head *); | |||
60 | extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); | 61 | extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); |
61 | 62 | ||
62 | extern void __init mnt_init(void); | 63 | extern void __init mnt_init(void); |
64 | |||
65 | /* | ||
66 | * fs_struct.c | ||
67 | */ | ||
68 | extern void chroot_fs_refs(struct path *, struct path *); | ||
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 13d2eddd0692..b4cbe9603c7d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -923,6 +923,7 @@ out_freesbi: | |||
923 | static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) | 923 | static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) |
924 | { | 924 | { |
925 | struct super_block *sb = dentry->d_sb; | 925 | struct super_block *sb = dentry->d_sb; |
926 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
926 | 927 | ||
927 | buf->f_type = ISOFS_SUPER_MAGIC; | 928 | buf->f_type = ISOFS_SUPER_MAGIC; |
928 | buf->f_bsize = sb->s_blocksize; | 929 | buf->f_bsize = sb->s_blocksize; |
@@ -932,6 +933,8 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) | |||
932 | buf->f_bavail = 0; | 933 | buf->f_bavail = 0; |
933 | buf->f_files = ISOFS_SB(sb)->s_ninodes; | 934 | buf->f_files = ISOFS_SB(sb)->s_ninodes; |
934 | buf->f_ffree = 0; | 935 | buf->f_ffree = 0; |
936 | buf->f_fsid.val[0] = (u32)id; | ||
937 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
935 | buf->f_namelen = NAME_MAX; | 938 | buf->f_namelen = NAME_MAX; |
936 | return 0; | 939 | return 0; |
937 | } | 940 | } |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 3fbffb1ea714..f8077b9c8981 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/bio.h> | ||
23 | 24 | ||
24 | /* | 25 | /* |
25 | * Default IO end handler for temporary BJ_IO buffer_heads. | 26 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -171,14 +172,15 @@ static int journal_write_commit_record(journal_t *journal, | |||
171 | return (ret == -EIO); | 172 | return (ret == -EIO); |
172 | } | 173 | } |
173 | 174 | ||
174 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | 175 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs, |
176 | int write_op) | ||
175 | { | 177 | { |
176 | int i; | 178 | int i; |
177 | 179 | ||
178 | for (i = 0; i < bufs; i++) { | 180 | for (i = 0; i < bufs; i++) { |
179 | wbuf[i]->b_end_io = end_buffer_write_sync; | 181 | wbuf[i]->b_end_io = end_buffer_write_sync; |
180 | /* We use-up our safety reference in submit_bh() */ | 182 | /* We use-up our safety reference in submit_bh() */ |
181 | submit_bh(WRITE, wbuf[i]); | 183 | submit_bh(write_op, wbuf[i]); |
182 | } | 184 | } |
183 | } | 185 | } |
184 | 186 | ||
@@ -186,7 +188,8 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | |||
186 | * Submit all the data buffers to disk | 188 | * Submit all the data buffers to disk |
187 | */ | 189 | */ |
188 | static int journal_submit_data_buffers(journal_t *journal, | 190 | static int journal_submit_data_buffers(journal_t *journal, |
189 | transaction_t *commit_transaction) | 191 | transaction_t *commit_transaction, |
192 | int write_op) | ||
190 | { | 193 | { |
191 | struct journal_head *jh; | 194 | struct journal_head *jh; |
192 | struct buffer_head *bh; | 195 | struct buffer_head *bh; |
@@ -225,7 +228,7 @@ write_out_data: | |||
225 | BUFFER_TRACE(bh, "needs blocking lock"); | 228 | BUFFER_TRACE(bh, "needs blocking lock"); |
226 | spin_unlock(&journal->j_list_lock); | 229 | spin_unlock(&journal->j_list_lock); |
227 | /* Write out all data to prevent deadlocks */ | 230 | /* Write out all data to prevent deadlocks */ |
228 | journal_do_submit_data(wbuf, bufs); | 231 | journal_do_submit_data(wbuf, bufs, write_op); |
229 | bufs = 0; | 232 | bufs = 0; |
230 | lock_buffer(bh); | 233 | lock_buffer(bh); |
231 | spin_lock(&journal->j_list_lock); | 234 | spin_lock(&journal->j_list_lock); |
@@ -256,7 +259,7 @@ write_out_data: | |||
256 | jbd_unlock_bh_state(bh); | 259 | jbd_unlock_bh_state(bh); |
257 | if (bufs == journal->j_wbufsize) { | 260 | if (bufs == journal->j_wbufsize) { |
258 | spin_unlock(&journal->j_list_lock); | 261 | spin_unlock(&journal->j_list_lock); |
259 | journal_do_submit_data(wbuf, bufs); | 262 | journal_do_submit_data(wbuf, bufs, write_op); |
260 | bufs = 0; | 263 | bufs = 0; |
261 | goto write_out_data; | 264 | goto write_out_data; |
262 | } | 265 | } |
@@ -286,7 +289,7 @@ write_out_data: | |||
286 | } | 289 | } |
287 | } | 290 | } |
288 | spin_unlock(&journal->j_list_lock); | 291 | spin_unlock(&journal->j_list_lock); |
289 | journal_do_submit_data(wbuf, bufs); | 292 | journal_do_submit_data(wbuf, bufs, write_op); |
290 | 293 | ||
291 | return err; | 294 | return err; |
292 | } | 295 | } |
@@ -315,6 +318,7 @@ void journal_commit_transaction(journal_t *journal) | |||
315 | int first_tag = 0; | 318 | int first_tag = 0; |
316 | int tag_flag; | 319 | int tag_flag; |
317 | int i; | 320 | int i; |
321 | int write_op = WRITE; | ||
318 | 322 | ||
319 | /* | 323 | /* |
320 | * First job: lock down the current transaction and wait for | 324 | * First job: lock down the current transaction and wait for |
@@ -347,6 +351,8 @@ void journal_commit_transaction(journal_t *journal) | |||
347 | spin_lock(&journal->j_state_lock); | 351 | spin_lock(&journal->j_state_lock); |
348 | commit_transaction->t_state = T_LOCKED; | 352 | commit_transaction->t_state = T_LOCKED; |
349 | 353 | ||
354 | if (commit_transaction->t_synchronous_commit) | ||
355 | write_op = WRITE_SYNC; | ||
350 | spin_lock(&commit_transaction->t_handle_lock); | 356 | spin_lock(&commit_transaction->t_handle_lock); |
351 | while (commit_transaction->t_updates) { | 357 | while (commit_transaction->t_updates) { |
352 | DEFINE_WAIT(wait); | 358 | DEFINE_WAIT(wait); |
@@ -431,7 +437,8 @@ void journal_commit_transaction(journal_t *journal) | |||
431 | * Now start flushing things to disk, in the order they appear | 437 | * Now start flushing things to disk, in the order they appear |
432 | * on the transaction lists. Data blocks go first. | 438 | * on the transaction lists. Data blocks go first. |
433 | */ | 439 | */ |
434 | err = journal_submit_data_buffers(journal, commit_transaction); | 440 | err = journal_submit_data_buffers(journal, commit_transaction, |
441 | write_op); | ||
435 | 442 | ||
436 | /* | 443 | /* |
437 | * Wait for all previously submitted IO to complete. | 444 | * Wait for all previously submitted IO to complete. |
@@ -660,7 +667,7 @@ start_journal_io: | |||
660 | clear_buffer_dirty(bh); | 667 | clear_buffer_dirty(bh); |
661 | set_buffer_uptodate(bh); | 668 | set_buffer_uptodate(bh); |
662 | bh->b_end_io = journal_end_buffer_io_sync; | 669 | bh->b_end_io = journal_end_buffer_io_sync; |
663 | submit_bh(WRITE, bh); | 670 | submit_bh(write_op, bh); |
664 | } | 671 | } |
665 | cond_resched(); | 672 | cond_resched(); |
666 | 673 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index e79c07812afa..737f7246a4b5 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -637,6 +637,8 @@ struct journal_head *journal_get_descriptor_buffer(journal_t *journal) | |||
637 | return NULL; | 637 | return NULL; |
638 | 638 | ||
639 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 639 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); |
640 | if (!bh) | ||
641 | return NULL; | ||
640 | lock_buffer(bh); | 642 | lock_buffer(bh); |
641 | memset(bh->b_data, 0, journal->j_blocksize); | 643 | memset(bh->b_data, 0, journal->j_blocksize); |
642 | set_buffer_uptodate(bh); | 644 | set_buffer_uptodate(bh); |
@@ -733,9 +735,7 @@ journal_t * journal_init_dev(struct block_device *bdev, | |||
733 | if (!journal->j_wbuf) { | 735 | if (!journal->j_wbuf) { |
734 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", | 736 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", |
735 | __func__); | 737 | __func__); |
736 | kfree(journal); | 738 | goto out_err; |
737 | journal = NULL; | ||
738 | goto out; | ||
739 | } | 739 | } |
740 | journal->j_dev = bdev; | 740 | journal->j_dev = bdev; |
741 | journal->j_fs_dev = fs_dev; | 741 | journal->j_fs_dev = fs_dev; |
@@ -743,11 +743,19 @@ journal_t * journal_init_dev(struct block_device *bdev, | |||
743 | journal->j_maxlen = len; | 743 | journal->j_maxlen = len; |
744 | 744 | ||
745 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 745 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
746 | J_ASSERT(bh != NULL); | 746 | if (!bh) { |
747 | printk(KERN_ERR | ||
748 | "%s: Cannot get buffer for journal superblock\n", | ||
749 | __func__); | ||
750 | goto out_err; | ||
751 | } | ||
747 | journal->j_sb_buffer = bh; | 752 | journal->j_sb_buffer = bh; |
748 | journal->j_superblock = (journal_superblock_t *)bh->b_data; | 753 | journal->j_superblock = (journal_superblock_t *)bh->b_data; |
749 | out: | 754 | |
750 | return journal; | 755 | return journal; |
756 | out_err: | ||
757 | kfree(journal); | ||
758 | return NULL; | ||
751 | } | 759 | } |
752 | 760 | ||
753 | /** | 761 | /** |
@@ -787,8 +795,7 @@ journal_t * journal_init_inode (struct inode *inode) | |||
787 | if (!journal->j_wbuf) { | 795 | if (!journal->j_wbuf) { |
788 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", | 796 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", |
789 | __func__); | 797 | __func__); |
790 | kfree(journal); | 798 | goto out_err; |
791 | return NULL; | ||
792 | } | 799 | } |
793 | 800 | ||
794 | err = journal_bmap(journal, 0, &blocknr); | 801 | err = journal_bmap(journal, 0, &blocknr); |
@@ -796,16 +803,23 @@ journal_t * journal_init_inode (struct inode *inode) | |||
796 | if (err) { | 803 | if (err) { |
797 | printk(KERN_ERR "%s: Cannnot locate journal superblock\n", | 804 | printk(KERN_ERR "%s: Cannnot locate journal superblock\n", |
798 | __func__); | 805 | __func__); |
799 | kfree(journal); | 806 | goto out_err; |
800 | return NULL; | ||
801 | } | 807 | } |
802 | 808 | ||
803 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 809 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); |
804 | J_ASSERT(bh != NULL); | 810 | if (!bh) { |
811 | printk(KERN_ERR | ||
812 | "%s: Cannot get buffer for journal superblock\n", | ||
813 | __func__); | ||
814 | goto out_err; | ||
815 | } | ||
805 | journal->j_sb_buffer = bh; | 816 | journal->j_sb_buffer = bh; |
806 | journal->j_superblock = (journal_superblock_t *)bh->b_data; | 817 | journal->j_superblock = (journal_superblock_t *)bh->b_data; |
807 | 818 | ||
808 | return journal; | 819 | return journal; |
820 | out_err: | ||
821 | kfree(journal); | ||
822 | return NULL; | ||
809 | } | 823 | } |
810 | 824 | ||
811 | /* | 825 | /* |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index e6a117431277..ed886e6db399 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -1440,6 +1440,8 @@ int journal_stop(handle_t *handle) | |||
1440 | } | 1440 | } |
1441 | } | 1441 | } |
1442 | 1442 | ||
1443 | if (handle->h_sync) | ||
1444 | transaction->t_synchronous_commit = 1; | ||
1443 | current->journal_info = NULL; | 1445 | current->journal_info = NULL; |
1444 | spin_lock(&journal->j_state_lock); | 1446 | spin_lock(&journal->j_state_lock); |
1445 | spin_lock(&transaction->t_handle_lock); | 1447 | spin_lock(&transaction->t_handle_lock); |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index d98713777a1b..77ccf8cb0823 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -336,7 +336,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) | |||
336 | return PTR_ERR(acl); | 336 | return PTR_ERR(acl); |
337 | 337 | ||
338 | if (!acl) { | 338 | if (!acl) { |
339 | *i_mode &= ~current->fs->umask; | 339 | *i_mode &= ~current_umask(); |
340 | } else { | 340 | } else { |
341 | if (S_ISDIR(*i_mode)) | 341 | if (S_ISDIR(*i_mode)) |
342 | jffs2_iset_acl(inode, &f->i_acl_default, acl); | 342 | jffs2_iset_acl(inode, &f->i_acl_default, acl); |
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index a166c1669e82..06ca1b8d2054 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c | |||
@@ -182,7 +182,7 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) | |||
182 | cleanup: | 182 | cleanup: |
183 | posix_acl_release(acl); | 183 | posix_acl_release(acl); |
184 | } else | 184 | } else |
185 | inode->i_mode &= ~current->fs->umask; | 185 | inode->i_mode &= ~current_umask(); |
186 | 186 | ||
187 | JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | | 187 | JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | |
188 | inode->i_mode; | 188 | inode->i_mode; |
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 618865b3128b..daad3c2740db 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -321,15 +321,20 @@ out: | |||
321 | 321 | ||
322 | static int minix_statfs(struct dentry *dentry, struct kstatfs *buf) | 322 | static int minix_statfs(struct dentry *dentry, struct kstatfs *buf) |
323 | { | 323 | { |
324 | struct minix_sb_info *sbi = minix_sb(dentry->d_sb); | 324 | struct super_block *sb = dentry->d_sb; |
325 | buf->f_type = dentry->d_sb->s_magic; | 325 | struct minix_sb_info *sbi = minix_sb(sb); |
326 | buf->f_bsize = dentry->d_sb->s_blocksize; | 326 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
327 | buf->f_type = sb->s_magic; | ||
328 | buf->f_bsize = sb->s_blocksize; | ||
327 | buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size; | 329 | buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size; |
328 | buf->f_bfree = minix_count_free_blocks(sbi); | 330 | buf->f_bfree = minix_count_free_blocks(sbi); |
329 | buf->f_bavail = buf->f_bfree; | 331 | buf->f_bavail = buf->f_bfree; |
330 | buf->f_files = sbi->s_ninodes; | 332 | buf->f_files = sbi->s_ninodes; |
331 | buf->f_ffree = minix_count_free_inodes(sbi); | 333 | buf->f_ffree = minix_count_free_inodes(sbi); |
332 | buf->f_namelen = sbi->s_namelen; | 334 | buf->f_namelen = sbi->s_namelen; |
335 | buf->f_fsid.val[0] = (u32)id; | ||
336 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
337 | |||
333 | return 0; | 338 | return 0; |
334 | } | 339 | } |
335 | 340 | ||
diff --git a/fs/mpage.c b/fs/mpage.c index 16c3ef37eae3..680ba60863ff 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -82,7 +82,7 @@ static void mpage_end_io_write(struct bio *bio, int err) | |||
82 | bio_put(bio); | 82 | bio_put(bio); |
83 | } | 83 | } |
84 | 84 | ||
85 | struct bio *mpage_bio_submit(int rw, struct bio *bio) | 85 | static struct bio *mpage_bio_submit(int rw, struct bio *bio) |
86 | { | 86 | { |
87 | bio->bi_end_io = mpage_end_io_read; | 87 | bio->bi_end_io = mpage_end_io_read; |
88 | if (rw == WRITE) | 88 | if (rw == WRITE) |
@@ -90,7 +90,6 @@ struct bio *mpage_bio_submit(int rw, struct bio *bio) | |||
90 | submit_bio(rw, bio); | 90 | submit_bio(rw, bio); |
91 | return NULL; | 91 | return NULL; |
92 | } | 92 | } |
93 | EXPORT_SYMBOL(mpage_bio_submit); | ||
94 | 93 | ||
95 | static struct bio * | 94 | static struct bio * |
96 | mpage_alloc(struct block_device *bdev, | 95 | mpage_alloc(struct block_device *bdev, |
@@ -439,7 +438,14 @@ EXPORT_SYMBOL(mpage_readpage); | |||
439 | * just allocate full-size (16-page) BIOs. | 438 | * just allocate full-size (16-page) BIOs. |
440 | */ | 439 | */ |
441 | 440 | ||
442 | int __mpage_writepage(struct page *page, struct writeback_control *wbc, | 441 | struct mpage_data { |
442 | struct bio *bio; | ||
443 | sector_t last_block_in_bio; | ||
444 | get_block_t *get_block; | ||
445 | unsigned use_writepage; | ||
446 | }; | ||
447 | |||
448 | static int __mpage_writepage(struct page *page, struct writeback_control *wbc, | ||
443 | void *data) | 449 | void *data) |
444 | { | 450 | { |
445 | struct mpage_data *mpd = data; | 451 | struct mpage_data *mpd = data; |
@@ -648,7 +654,6 @@ out: | |||
648 | mpd->bio = bio; | 654 | mpd->bio = bio; |
649 | return ret; | 655 | return ret; |
650 | } | 656 | } |
651 | EXPORT_SYMBOL(__mpage_writepage); | ||
652 | 657 | ||
653 | /** | 658 | /** |
654 | * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them | 659 | * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them |
diff --git a/fs/namei.c b/fs/namei.c index d040ce11785d..b8433ebfae05 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/file.h> | 32 | #include <linux/file.h> |
33 | #include <linux/fcntl.h> | 33 | #include <linux/fcntl.h> |
34 | #include <linux/device_cgroup.h> | 34 | #include <linux/device_cgroup.h> |
35 | #include <linux/fs_struct.h> | ||
35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
36 | 37 | ||
37 | #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) | 38 | #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) |
@@ -1578,7 +1579,7 @@ static int __open_namei_create(struct nameidata *nd, struct path *path, | |||
1578 | struct dentry *dir = nd->path.dentry; | 1579 | struct dentry *dir = nd->path.dentry; |
1579 | 1580 | ||
1580 | if (!IS_POSIXACL(dir->d_inode)) | 1581 | if (!IS_POSIXACL(dir->d_inode)) |
1581 | mode &= ~current->fs->umask; | 1582 | mode &= ~current_umask(); |
1582 | error = security_path_mknod(&nd->path, path->dentry, mode, 0); | 1583 | error = security_path_mknod(&nd->path, path->dentry, mode, 0); |
1583 | if (error) | 1584 | if (error) |
1584 | goto out_unlock; | 1585 | goto out_unlock; |
@@ -1989,7 +1990,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, | |||
1989 | goto out_unlock; | 1990 | goto out_unlock; |
1990 | } | 1991 | } |
1991 | if (!IS_POSIXACL(nd.path.dentry->d_inode)) | 1992 | if (!IS_POSIXACL(nd.path.dentry->d_inode)) |
1992 | mode &= ~current->fs->umask; | 1993 | mode &= ~current_umask(); |
1993 | error = may_mknod(mode); | 1994 | error = may_mknod(mode); |
1994 | if (error) | 1995 | if (error) |
1995 | goto out_dput; | 1996 | goto out_dput; |
@@ -2067,7 +2068,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) | |||
2067 | goto out_unlock; | 2068 | goto out_unlock; |
2068 | 2069 | ||
2069 | if (!IS_POSIXACL(nd.path.dentry->d_inode)) | 2070 | if (!IS_POSIXACL(nd.path.dentry->d_inode)) |
2070 | mode &= ~current->fs->umask; | 2071 | mode &= ~current_umask(); |
2071 | error = mnt_want_write(nd.path.mnt); | 2072 | error = mnt_want_write(nd.path.mnt); |
2072 | if (error) | 2073 | if (error) |
2073 | goto out_dput; | 2074 | goto out_dput; |
@@ -2897,10 +2898,3 @@ EXPORT_SYMBOL(vfs_symlink); | |||
2897 | EXPORT_SYMBOL(vfs_unlink); | 2898 | EXPORT_SYMBOL(vfs_unlink); |
2898 | EXPORT_SYMBOL(dentry_unhash); | 2899 | EXPORT_SYMBOL(dentry_unhash); |
2899 | EXPORT_SYMBOL(generic_readlink); | 2900 | EXPORT_SYMBOL(generic_readlink); |
2900 | |||
2901 | /* to be mentioned only in INIT_TASK */ | ||
2902 | struct fs_struct init_fs = { | ||
2903 | .count = ATOMIC_INIT(1), | ||
2904 | .lock = __RW_LOCK_UNLOCKED(init_fs.lock), | ||
2905 | .umask = 0022, | ||
2906 | }; | ||
diff --git a/fs/namespace.c b/fs/namespace.c index 0a42e0e96027..c6f54e4c4290 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/ramfs.h> | 27 | #include <linux/ramfs.h> |
28 | #include <linux/log2.h> | 28 | #include <linux/log2.h> |
29 | #include <linux/idr.h> | 29 | #include <linux/idr.h> |
30 | #include <linux/fs_struct.h> | ||
30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
31 | #include <asm/unistd.h> | 32 | #include <asm/unistd.h> |
32 | #include "pnode.h" | 33 | #include "pnode.h" |
@@ -2093,66 +2094,6 @@ out1: | |||
2093 | } | 2094 | } |
2094 | 2095 | ||
2095 | /* | 2096 | /* |
2096 | * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. | ||
2097 | * It can block. Requires the big lock held. | ||
2098 | */ | ||
2099 | void set_fs_root(struct fs_struct *fs, struct path *path) | ||
2100 | { | ||
2101 | struct path old_root; | ||
2102 | |||
2103 | write_lock(&fs->lock); | ||
2104 | old_root = fs->root; | ||
2105 | fs->root = *path; | ||
2106 | path_get(path); | ||
2107 | write_unlock(&fs->lock); | ||
2108 | if (old_root.dentry) | ||
2109 | path_put(&old_root); | ||
2110 | } | ||
2111 | |||
2112 | /* | ||
2113 | * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. | ||
2114 | * It can block. Requires the big lock held. | ||
2115 | */ | ||
2116 | void set_fs_pwd(struct fs_struct *fs, struct path *path) | ||
2117 | { | ||
2118 | struct path old_pwd; | ||
2119 | |||
2120 | write_lock(&fs->lock); | ||
2121 | old_pwd = fs->pwd; | ||
2122 | fs->pwd = *path; | ||
2123 | path_get(path); | ||
2124 | write_unlock(&fs->lock); | ||
2125 | |||
2126 | if (old_pwd.dentry) | ||
2127 | path_put(&old_pwd); | ||
2128 | } | ||
2129 | |||
2130 | static void chroot_fs_refs(struct path *old_root, struct path *new_root) | ||
2131 | { | ||
2132 | struct task_struct *g, *p; | ||
2133 | struct fs_struct *fs; | ||
2134 | |||
2135 | read_lock(&tasklist_lock); | ||
2136 | do_each_thread(g, p) { | ||
2137 | task_lock(p); | ||
2138 | fs = p->fs; | ||
2139 | if (fs) { | ||
2140 | atomic_inc(&fs->count); | ||
2141 | task_unlock(p); | ||
2142 | if (fs->root.dentry == old_root->dentry | ||
2143 | && fs->root.mnt == old_root->mnt) | ||
2144 | set_fs_root(fs, new_root); | ||
2145 | if (fs->pwd.dentry == old_root->dentry | ||
2146 | && fs->pwd.mnt == old_root->mnt) | ||
2147 | set_fs_pwd(fs, new_root); | ||
2148 | put_fs_struct(fs); | ||
2149 | } else | ||
2150 | task_unlock(p); | ||
2151 | } while_each_thread(g, p); | ||
2152 | read_unlock(&tasklist_lock); | ||
2153 | } | ||
2154 | |||
2155 | /* | ||
2156 | * pivot_root Semantics: | 2097 | * pivot_root Semantics: |
2157 | * Moves the root file system of the current process to the directory put_old, | 2098 | * Moves the root file system of the current process to the directory put_old, |
2158 | * makes new_root as the new root file system of the current process, and sets | 2099 | * makes new_root as the new root file system of the current process, and sets |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 36fe20d6eba2..e67f3ec07736 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -84,3 +84,11 @@ config ROOT_NFS | |||
84 | <file:Documentation/filesystems/nfsroot.txt>. | 84 | <file:Documentation/filesystems/nfsroot.txt>. |
85 | 85 | ||
86 | Most people say N here. | 86 | Most people say N here. |
87 | |||
88 | config NFS_FSCACHE | ||
89 | bool "Provide NFS client caching support (EXPERIMENTAL)" | ||
90 | depends on EXPERIMENTAL | ||
91 | depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y | ||
92 | help | ||
93 | Say Y here if you want NFS data to be cached locally on disc through | ||
94 | the general filesystem cache manager | ||
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index ac6170c594a3..845159814de2 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -15,3 +15,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ | |||
15 | callback.o callback_xdr.o callback_proc.o \ | 15 | callback.o callback_xdr.o callback_proc.o \ |
16 | nfs4namespace.o | 16 | nfs4namespace.o |
17 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 17 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
18 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index aba38017bdef..75c9cd2aa119 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "delegation.h" | 45 | #include "delegation.h" |
46 | #include "iostat.h" | 46 | #include "iostat.h" |
47 | #include "internal.h" | 47 | #include "internal.h" |
48 | #include "fscache.h" | ||
48 | 49 | ||
49 | #define NFSDBG_FACILITY NFSDBG_CLIENT | 50 | #define NFSDBG_FACILITY NFSDBG_CLIENT |
50 | 51 | ||
@@ -154,6 +155,8 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
154 | if (!IS_ERR(cred)) | 155 | if (!IS_ERR(cred)) |
155 | clp->cl_machine_cred = cred; | 156 | clp->cl_machine_cred = cred; |
156 | 157 | ||
158 | nfs_fscache_get_client_cookie(clp); | ||
159 | |||
157 | return clp; | 160 | return clp; |
158 | 161 | ||
159 | error_3: | 162 | error_3: |
@@ -187,6 +190,8 @@ static void nfs_free_client(struct nfs_client *clp) | |||
187 | 190 | ||
188 | nfs4_shutdown_client(clp); | 191 | nfs4_shutdown_client(clp); |
189 | 192 | ||
193 | nfs_fscache_release_client_cookie(clp); | ||
194 | |||
190 | /* -EIO all pending I/O */ | 195 | /* -EIO all pending I/O */ |
191 | if (!IS_ERR(clp->cl_rpcclient)) | 196 | if (!IS_ERR(clp->cl_rpcclient)) |
192 | rpc_shutdown_client(clp->cl_rpcclient); | 197 | rpc_shutdown_client(clp->cl_rpcclient); |
@@ -760,6 +765,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
760 | 765 | ||
761 | /* Initialise the client representation from the mount data */ | 766 | /* Initialise the client representation from the mount data */ |
762 | server->flags = data->flags; | 767 | server->flags = data->flags; |
768 | server->options = data->options; | ||
763 | 769 | ||
764 | if (data->rsize) | 770 | if (data->rsize) |
765 | server->rsize = nfs_block_size(data->rsize, NULL); | 771 | server->rsize = nfs_block_size(data->rsize, NULL); |
@@ -1148,6 +1154,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
1148 | /* Initialise the client representation from the mount data */ | 1154 | /* Initialise the client representation from the mount data */ |
1149 | server->flags = data->flags; | 1155 | server->flags = data->flags; |
1150 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1156 | server->caps |= NFS_CAP_ATOMIC_OPEN; |
1157 | server->options = data->options; | ||
1151 | 1158 | ||
1152 | /* Get a client record */ | 1159 | /* Get a client record */ |
1153 | error = nfs4_set_client(server, | 1160 | error = nfs4_set_client(server, |
@@ -1559,7 +1566,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1559 | 1566 | ||
1560 | /* display header on line 1 */ | 1567 | /* display header on line 1 */ |
1561 | if (v == &nfs_volume_list) { | 1568 | if (v == &nfs_volume_list) { |
1562 | seq_puts(m, "NV SERVER PORT DEV FSID\n"); | 1569 | seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); |
1563 | return 0; | 1570 | return 0; |
1564 | } | 1571 | } |
1565 | /* display one transport per line on subsequent lines */ | 1572 | /* display one transport per line on subsequent lines */ |
@@ -1573,12 +1580,13 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1573 | (unsigned long long) server->fsid.major, | 1580 | (unsigned long long) server->fsid.major, |
1574 | (unsigned long long) server->fsid.minor); | 1581 | (unsigned long long) server->fsid.minor); |
1575 | 1582 | ||
1576 | seq_printf(m, "v%u %s %s %-7s %-17s\n", | 1583 | seq_printf(m, "v%u %s %s %-7s %-17s %s\n", |
1577 | clp->rpc_ops->version, | 1584 | clp->rpc_ops->version, |
1578 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), | 1585 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), |
1579 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), | 1586 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), |
1580 | dev, | 1587 | dev, |
1581 | fsid); | 1588 | fsid, |
1589 | nfs_server_fscache_state(server)); | ||
1582 | 1590 | ||
1583 | return 0; | 1591 | return 0; |
1584 | } | 1592 | } |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0abf3f331f56..3523b895eb4b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "delegation.h" | 35 | #include "delegation.h" |
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | #include "iostat.h" | 37 | #include "iostat.h" |
38 | #include "fscache.h" | ||
38 | 39 | ||
39 | #define NFSDBG_FACILITY NFSDBG_FILE | 40 | #define NFSDBG_FACILITY NFSDBG_FILE |
40 | 41 | ||
@@ -409,6 +410,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, | |||
409 | return copied; | 410 | return copied; |
410 | } | 411 | } |
411 | 412 | ||
413 | /* | ||
414 | * Partially or wholly invalidate a page | ||
415 | * - Release the private state associated with a page if undergoing complete | ||
416 | * page invalidation | ||
417 | * - Called if either PG_private or PG_fscache is set on the page | ||
418 | * - Caller holds page lock | ||
419 | */ | ||
412 | static void nfs_invalidate_page(struct page *page, unsigned long offset) | 420 | static void nfs_invalidate_page(struct page *page, unsigned long offset) |
413 | { | 421 | { |
414 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); | 422 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); |
@@ -417,23 +425,43 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) | |||
417 | return; | 425 | return; |
418 | /* Cancel any unstarted writes on this page */ | 426 | /* Cancel any unstarted writes on this page */ |
419 | nfs_wb_page_cancel(page->mapping->host, page); | 427 | nfs_wb_page_cancel(page->mapping->host, page); |
428 | |||
429 | nfs_fscache_invalidate_page(page, page->mapping->host); | ||
420 | } | 430 | } |
421 | 431 | ||
432 | /* | ||
433 | * Attempt to release the private state associated with a page | ||
434 | * - Called if either PG_private or PG_fscache is set on the page | ||
435 | * - Caller holds page lock | ||
436 | * - Return true (may release page) or false (may not) | ||
437 | */ | ||
422 | static int nfs_release_page(struct page *page, gfp_t gfp) | 438 | static int nfs_release_page(struct page *page, gfp_t gfp) |
423 | { | 439 | { |
424 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); | 440 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); |
425 | 441 | ||
426 | /* If PagePrivate() is set, then the page is not freeable */ | 442 | /* If PagePrivate() is set, then the page is not freeable */ |
427 | return 0; | 443 | if (PagePrivate(page)) |
444 | return 0; | ||
445 | return nfs_fscache_release_page(page, gfp); | ||
428 | } | 446 | } |
429 | 447 | ||
448 | /* | ||
449 | * Attempt to clear the private state associated with a page when an error | ||
450 | * occurs that requires the cached contents of an inode to be written back or | ||
451 | * destroyed | ||
452 | * - Called if either PG_private or fscache is set on the page | ||
453 | * - Caller holds page lock | ||
454 | * - Return 0 if successful, -error otherwise | ||
455 | */ | ||
430 | static int nfs_launder_page(struct page *page) | 456 | static int nfs_launder_page(struct page *page) |
431 | { | 457 | { |
432 | struct inode *inode = page->mapping->host; | 458 | struct inode *inode = page->mapping->host; |
459 | struct nfs_inode *nfsi = NFS_I(inode); | ||
433 | 460 | ||
434 | dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", | 461 | dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", |
435 | inode->i_ino, (long long)page_offset(page)); | 462 | inode->i_ino, (long long)page_offset(page)); |
436 | 463 | ||
464 | nfs_fscache_wait_on_page_write(nfsi, page); | ||
437 | return nfs_wb_page(inode, page); | 465 | return nfs_wb_page(inode, page); |
438 | } | 466 | } |
439 | 467 | ||
@@ -451,6 +479,11 @@ const struct address_space_operations nfs_file_aops = { | |||
451 | .launder_page = nfs_launder_page, | 479 | .launder_page = nfs_launder_page, |
452 | }; | 480 | }; |
453 | 481 | ||
482 | /* | ||
483 | * Notification that a PTE pointing to an NFS page is about to be made | ||
484 | * writable, implying that someone is about to modify the page through a | ||
485 | * shared-writable mapping | ||
486 | */ | ||
454 | static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 487 | static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) |
455 | { | 488 | { |
456 | struct page *page = vmf->page; | 489 | struct page *page = vmf->page; |
@@ -465,6 +498,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
465 | filp->f_mapping->host->i_ino, | 498 | filp->f_mapping->host->i_ino, |
466 | (long long)page_offset(page)); | 499 | (long long)page_offset(page)); |
467 | 500 | ||
501 | /* make sure the cache has finished storing the page */ | ||
502 | nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); | ||
503 | |||
468 | lock_page(page); | 504 | lock_page(page); |
469 | mapping = page->mapping; | 505 | mapping = page->mapping; |
470 | if (mapping != dentry->d_inode->i_mapping) | 506 | if (mapping != dentry->d_inode->i_mapping) |
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c new file mode 100644 index 000000000000..5b1006480bc2 --- /dev/null +++ b/fs/nfs/fscache-index.c | |||
@@ -0,0 +1,337 @@ | |||
1 | /* NFS FS-Cache index structure definition | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/mm.h> | ||
16 | #include <linux/nfs_fs.h> | ||
17 | #include <linux/nfs_fs_sb.h> | ||
18 | #include <linux/in6.h> | ||
19 | |||
20 | #include "internal.h" | ||
21 | #include "fscache.h" | ||
22 | |||
23 | #define NFSDBG_FACILITY NFSDBG_FSCACHE | ||
24 | |||
25 | /* | ||
26 | * Define the NFS filesystem for FS-Cache. Upon registration FS-Cache sticks | ||
27 | * the cookie for the top-level index object for NFS into here. The top-level | ||
28 | * index can than have other cache objects inserted into it. | ||
29 | */ | ||
30 | struct fscache_netfs nfs_fscache_netfs = { | ||
31 | .name = "nfs", | ||
32 | .version = 0, | ||
33 | }; | ||
34 | |||
35 | /* | ||
36 | * Register NFS for caching | ||
37 | */ | ||
38 | int nfs_fscache_register(void) | ||
39 | { | ||
40 | return fscache_register_netfs(&nfs_fscache_netfs); | ||
41 | } | ||
42 | |||
43 | /* | ||
44 | * Unregister NFS for caching | ||
45 | */ | ||
46 | void nfs_fscache_unregister(void) | ||
47 | { | ||
48 | fscache_unregister_netfs(&nfs_fscache_netfs); | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Layout of the key for an NFS server cache object. | ||
53 | */ | ||
54 | struct nfs_server_key { | ||
55 | uint16_t nfsversion; /* NFS protocol version */ | ||
56 | uint16_t family; /* address family */ | ||
57 | uint16_t port; /* IP port */ | ||
58 | union { | ||
59 | struct in_addr ipv4_addr; /* IPv4 address */ | ||
60 | struct in6_addr ipv6_addr; /* IPv6 address */ | ||
61 | } addr[0]; | ||
62 | }; | ||
63 | |||
64 | /* | ||
65 | * Generate a key to describe a server in the main NFS index | ||
66 | * - We return the length of the key, or 0 if we can't generate one | ||
67 | */ | ||
68 | static uint16_t nfs_server_get_key(const void *cookie_netfs_data, | ||
69 | void *buffer, uint16_t bufmax) | ||
70 | { | ||
71 | const struct nfs_client *clp = cookie_netfs_data; | ||
72 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr; | ||
73 | const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr; | ||
74 | struct nfs_server_key *key = buffer; | ||
75 | uint16_t len = sizeof(struct nfs_server_key); | ||
76 | |||
77 | key->nfsversion = clp->rpc_ops->version; | ||
78 | key->family = clp->cl_addr.ss_family; | ||
79 | |||
80 | memset(key, 0, len); | ||
81 | |||
82 | switch (clp->cl_addr.ss_family) { | ||
83 | case AF_INET: | ||
84 | key->port = sin->sin_port; | ||
85 | key->addr[0].ipv4_addr = sin->sin_addr; | ||
86 | len += sizeof(key->addr[0].ipv4_addr); | ||
87 | break; | ||
88 | |||
89 | case AF_INET6: | ||
90 | key->port = sin6->sin6_port; | ||
91 | key->addr[0].ipv6_addr = sin6->sin6_addr; | ||
92 | len += sizeof(key->addr[0].ipv6_addr); | ||
93 | break; | ||
94 | |||
95 | default: | ||
96 | printk(KERN_WARNING "NFS: Unknown network family '%d'\n", | ||
97 | clp->cl_addr.ss_family); | ||
98 | len = 0; | ||
99 | break; | ||
100 | } | ||
101 | |||
102 | return len; | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Define the server object for FS-Cache. This is used to describe a server | ||
107 | * object to fscache_acquire_cookie(). It is keyed by the NFS protocol and | ||
108 | * server address parameters. | ||
109 | */ | ||
110 | const struct fscache_cookie_def nfs_fscache_server_index_def = { | ||
111 | .name = "NFS.server", | ||
112 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
113 | .get_key = nfs_server_get_key, | ||
114 | }; | ||
115 | |||
116 | /* | ||
117 | * Generate a key to describe a superblock key in the main NFS index | ||
118 | */ | ||
119 | static uint16_t nfs_super_get_key(const void *cookie_netfs_data, | ||
120 | void *buffer, uint16_t bufmax) | ||
121 | { | ||
122 | const struct nfs_fscache_key *key; | ||
123 | const struct nfs_server *nfss = cookie_netfs_data; | ||
124 | uint16_t len; | ||
125 | |||
126 | key = nfss->fscache_key; | ||
127 | len = sizeof(key->key) + key->key.uniq_len; | ||
128 | if (len > bufmax) { | ||
129 | len = 0; | ||
130 | } else { | ||
131 | memcpy(buffer, &key->key, sizeof(key->key)); | ||
132 | memcpy(buffer + sizeof(key->key), | ||
133 | key->key.uniquifier, key->key.uniq_len); | ||
134 | } | ||
135 | |||
136 | return len; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Define the superblock object for FS-Cache. This is used to describe a | ||
141 | * superblock object to fscache_acquire_cookie(). It is keyed by all the NFS | ||
142 | * parameters that might cause a separate superblock. | ||
143 | */ | ||
144 | const struct fscache_cookie_def nfs_fscache_super_index_def = { | ||
145 | .name = "NFS.super", | ||
146 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
147 | .get_key = nfs_super_get_key, | ||
148 | }; | ||
149 | |||
150 | /* | ||
151 | * Definition of the auxiliary data attached to NFS inode storage objects | ||
152 | * within the cache. | ||
153 | * | ||
154 | * The contents of this struct are recorded in the on-disk local cache in the | ||
155 | * auxiliary data attached to the data storage object backing an inode. This | ||
156 | * permits coherency to be managed when a new inode binds to an already extant | ||
157 | * cache object. | ||
158 | */ | ||
159 | struct nfs_fscache_inode_auxdata { | ||
160 | struct timespec mtime; | ||
161 | struct timespec ctime; | ||
162 | loff_t size; | ||
163 | u64 change_attr; | ||
164 | }; | ||
165 | |||
166 | /* | ||
167 | * Generate a key to describe an NFS inode in an NFS server's index | ||
168 | */ | ||
169 | static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data, | ||
170 | void *buffer, uint16_t bufmax) | ||
171 | { | ||
172 | const struct nfs_inode *nfsi = cookie_netfs_data; | ||
173 | uint16_t nsize; | ||
174 | |||
175 | /* use the inode's NFS filehandle as the key */ | ||
176 | nsize = nfsi->fh.size; | ||
177 | memcpy(buffer, nfsi->fh.data, nsize); | ||
178 | return nsize; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * Get certain file attributes from the netfs data | ||
183 | * - This function can be absent for an index | ||
184 | * - Not permitted to return an error | ||
185 | * - The netfs data from the cookie being used as the source is presented | ||
186 | */ | ||
187 | static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data, | ||
188 | uint64_t *size) | ||
189 | { | ||
190 | const struct nfs_inode *nfsi = cookie_netfs_data; | ||
191 | |||
192 | *size = nfsi->vfs_inode.i_size; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Get the auxiliary data from netfs data | ||
197 | * - This function can be absent if the index carries no state data | ||
198 | * - Should store the auxiliary data in the buffer | ||
199 | * - Should return the amount of amount stored | ||
200 | * - Not permitted to return an error | ||
201 | * - The netfs data from the cookie being used as the source is presented | ||
202 | */ | ||
203 | static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, | ||
204 | void *buffer, uint16_t bufmax) | ||
205 | { | ||
206 | struct nfs_fscache_inode_auxdata auxdata; | ||
207 | const struct nfs_inode *nfsi = cookie_netfs_data; | ||
208 | |||
209 | memset(&auxdata, 0, sizeof(auxdata)); | ||
210 | auxdata.size = nfsi->vfs_inode.i_size; | ||
211 | auxdata.mtime = nfsi->vfs_inode.i_mtime; | ||
212 | auxdata.ctime = nfsi->vfs_inode.i_ctime; | ||
213 | |||
214 | if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) | ||
215 | auxdata.change_attr = nfsi->change_attr; | ||
216 | |||
217 | if (bufmax > sizeof(auxdata)) | ||
218 | bufmax = sizeof(auxdata); | ||
219 | |||
220 | memcpy(buffer, &auxdata, bufmax); | ||
221 | return bufmax; | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * Consult the netfs about the state of an object | ||
226 | * - This function can be absent if the index carries no state data | ||
227 | * - The netfs data from the cookie being used as the target is | ||
228 | * presented, as is the auxiliary data | ||
229 | */ | ||
230 | static | ||
231 | enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, | ||
232 | const void *data, | ||
233 | uint16_t datalen) | ||
234 | { | ||
235 | struct nfs_fscache_inode_auxdata auxdata; | ||
236 | struct nfs_inode *nfsi = cookie_netfs_data; | ||
237 | |||
238 | if (datalen != sizeof(auxdata)) | ||
239 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
240 | |||
241 | memset(&auxdata, 0, sizeof(auxdata)); | ||
242 | auxdata.size = nfsi->vfs_inode.i_size; | ||
243 | auxdata.mtime = nfsi->vfs_inode.i_mtime; | ||
244 | auxdata.ctime = nfsi->vfs_inode.i_ctime; | ||
245 | |||
246 | if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) | ||
247 | auxdata.change_attr = nfsi->change_attr; | ||
248 | |||
249 | if (memcmp(data, &auxdata, datalen) != 0) | ||
250 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
251 | |||
252 | return FSCACHE_CHECKAUX_OKAY; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Indication from FS-Cache that the cookie is no longer cached | ||
257 | * - This function is called when the backing store currently caching a cookie | ||
258 | * is removed | ||
259 | * - The netfs should use this to clean up any markers indicating cached pages | ||
260 | * - This is mandatory for any object that may have data | ||
261 | */ | ||
262 | static void nfs_fscache_inode_now_uncached(void *cookie_netfs_data) | ||
263 | { | ||
264 | struct nfs_inode *nfsi = cookie_netfs_data; | ||
265 | struct pagevec pvec; | ||
266 | pgoff_t first; | ||
267 | int loop, nr_pages; | ||
268 | |||
269 | pagevec_init(&pvec, 0); | ||
270 | first = 0; | ||
271 | |||
272 | dprintk("NFS: nfs_inode_now_uncached: nfs_inode 0x%p\n", nfsi); | ||
273 | |||
274 | for (;;) { | ||
275 | /* grab a bunch of pages to unmark */ | ||
276 | nr_pages = pagevec_lookup(&pvec, | ||
277 | nfsi->vfs_inode.i_mapping, | ||
278 | first, | ||
279 | PAGEVEC_SIZE - pagevec_count(&pvec)); | ||
280 | if (!nr_pages) | ||
281 | break; | ||
282 | |||
283 | for (loop = 0; loop < nr_pages; loop++) | ||
284 | ClearPageFsCache(pvec.pages[loop]); | ||
285 | |||
286 | first = pvec.pages[nr_pages - 1]->index + 1; | ||
287 | |||
288 | pvec.nr = nr_pages; | ||
289 | pagevec_release(&pvec); | ||
290 | cond_resched(); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Get an extra reference on a read context. | ||
296 | * - This function can be absent if the completion function doesn't require a | ||
297 | * context. | ||
298 | * - The read context is passed back to NFS in the event that a data read on the | ||
299 | * cache fails with EIO - in which case the server must be contacted to | ||
300 | * retrieve the data, which requires the read context for security. | ||
301 | */ | ||
302 | static void nfs_fh_get_context(void *cookie_netfs_data, void *context) | ||
303 | { | ||
304 | get_nfs_open_context(context); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * Release an extra reference on a read context. | ||
309 | * - This function can be absent if the completion function doesn't require a | ||
310 | * context. | ||
311 | */ | ||
312 | static void nfs_fh_put_context(void *cookie_netfs_data, void *context) | ||
313 | { | ||
314 | if (context) | ||
315 | put_nfs_open_context(context); | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Define the inode object for FS-Cache. This is used to describe an inode | ||
320 | * object to fscache_acquire_cookie(). It is keyed by the NFS file handle for | ||
321 | * an inode. | ||
322 | * | ||
323 | * Coherency is managed by comparing the copies of i_size, i_mtime and i_ctime | ||
324 | * held in the cache auxiliary data for the data storage object with those in | ||
325 | * the inode struct in memory. | ||
326 | */ | ||
327 | const struct fscache_cookie_def nfs_fscache_inode_object_def = { | ||
328 | .name = "NFS.fh", | ||
329 | .type = FSCACHE_COOKIE_TYPE_DATAFILE, | ||
330 | .get_key = nfs_fscache_inode_get_key, | ||
331 | .get_attr = nfs_fscache_inode_get_attr, | ||
332 | .get_aux = nfs_fscache_inode_get_aux, | ||
333 | .check_aux = nfs_fscache_inode_check_aux, | ||
334 | .now_uncached = nfs_fscache_inode_now_uncached, | ||
335 | .get_context = nfs_fh_get_context, | ||
336 | .put_context = nfs_fh_put_context, | ||
337 | }; | ||
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c new file mode 100644 index 000000000000..379be678cb7e --- /dev/null +++ b/fs/nfs/fscache.c | |||
@@ -0,0 +1,523 @@ | |||
1 | /* NFS filesystem cache interface | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/mm.h> | ||
16 | #include <linux/nfs_fs.h> | ||
17 | #include <linux/nfs_fs_sb.h> | ||
18 | #include <linux/in6.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | #include "internal.h" | ||
22 | #include "iostat.h" | ||
23 | #include "fscache.h" | ||
24 | |||
25 | #define NFSDBG_FACILITY NFSDBG_FSCACHE | ||
26 | |||
27 | static struct rb_root nfs_fscache_keys = RB_ROOT; | ||
28 | static DEFINE_SPINLOCK(nfs_fscache_keys_lock); | ||
29 | |||
30 | /* | ||
31 | * Get the per-client index cookie for an NFS client if the appropriate mount | ||
32 | * flag was set | ||
33 | * - We always try and get an index cookie for the client, but get filehandle | ||
34 | * cookies on a per-superblock basis, depending on the mount flags | ||
35 | */ | ||
36 | void nfs_fscache_get_client_cookie(struct nfs_client *clp) | ||
37 | { | ||
38 | /* create a cache index for looking up filehandles */ | ||
39 | clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index, | ||
40 | &nfs_fscache_server_index_def, | ||
41 | clp); | ||
42 | dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n", | ||
43 | clp, clp->fscache); | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * Dispose of a per-client cookie | ||
48 | */ | ||
49 | void nfs_fscache_release_client_cookie(struct nfs_client *clp) | ||
50 | { | ||
51 | dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n", | ||
52 | clp, clp->fscache); | ||
53 | |||
54 | fscache_relinquish_cookie(clp->fscache, 0); | ||
55 | clp->fscache = NULL; | ||
56 | } | ||
57 | |||
58 | /* | ||
59 | * Get the cache cookie for an NFS superblock. We have to handle | ||
60 | * uniquification here because the cache doesn't do it for us. | ||
61 | */ | ||
62 | void nfs_fscache_get_super_cookie(struct super_block *sb, | ||
63 | struct nfs_parsed_mount_data *data) | ||
64 | { | ||
65 | struct nfs_fscache_key *key, *xkey; | ||
66 | struct nfs_server *nfss = NFS_SB(sb); | ||
67 | struct rb_node **p, *parent; | ||
68 | const char *uniq = data->fscache_uniq ?: ""; | ||
69 | int diff, ulen; | ||
70 | |||
71 | ulen = strlen(uniq); | ||
72 | key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL); | ||
73 | if (!key) | ||
74 | return; | ||
75 | |||
76 | key->nfs_client = nfss->nfs_client; | ||
77 | key->key.super.s_flags = sb->s_flags & NFS_MS_MASK; | ||
78 | key->key.nfs_server.flags = nfss->flags; | ||
79 | key->key.nfs_server.rsize = nfss->rsize; | ||
80 | key->key.nfs_server.wsize = nfss->wsize; | ||
81 | key->key.nfs_server.acregmin = nfss->acregmin; | ||
82 | key->key.nfs_server.acregmax = nfss->acregmax; | ||
83 | key->key.nfs_server.acdirmin = nfss->acdirmin; | ||
84 | key->key.nfs_server.acdirmax = nfss->acdirmax; | ||
85 | key->key.nfs_server.fsid = nfss->fsid; | ||
86 | key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor; | ||
87 | |||
88 | key->key.uniq_len = ulen; | ||
89 | memcpy(key->key.uniquifier, uniq, ulen); | ||
90 | |||
91 | spin_lock(&nfs_fscache_keys_lock); | ||
92 | p = &nfs_fscache_keys.rb_node; | ||
93 | parent = NULL; | ||
94 | while (*p) { | ||
95 | parent = *p; | ||
96 | xkey = rb_entry(parent, struct nfs_fscache_key, node); | ||
97 | |||
98 | if (key->nfs_client < xkey->nfs_client) | ||
99 | goto go_left; | ||
100 | if (key->nfs_client > xkey->nfs_client) | ||
101 | goto go_right; | ||
102 | |||
103 | diff = memcmp(&key->key, &xkey->key, sizeof(key->key)); | ||
104 | if (diff < 0) | ||
105 | goto go_left; | ||
106 | if (diff > 0) | ||
107 | goto go_right; | ||
108 | |||
109 | if (key->key.uniq_len == 0) | ||
110 | goto non_unique; | ||
111 | diff = memcmp(key->key.uniquifier, | ||
112 | xkey->key.uniquifier, | ||
113 | key->key.uniq_len); | ||
114 | if (diff < 0) | ||
115 | goto go_left; | ||
116 | if (diff > 0) | ||
117 | goto go_right; | ||
118 | goto non_unique; | ||
119 | |||
120 | go_left: | ||
121 | p = &(*p)->rb_left; | ||
122 | continue; | ||
123 | go_right: | ||
124 | p = &(*p)->rb_right; | ||
125 | } | ||
126 | |||
127 | rb_link_node(&key->node, parent, p); | ||
128 | rb_insert_color(&key->node, &nfs_fscache_keys); | ||
129 | spin_unlock(&nfs_fscache_keys_lock); | ||
130 | nfss->fscache_key = key; | ||
131 | |||
132 | /* create a cache index for looking up filehandles */ | ||
133 | nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, | ||
134 | &nfs_fscache_super_index_def, | ||
135 | nfss); | ||
136 | dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", | ||
137 | nfss, nfss->fscache); | ||
138 | return; | ||
139 | |||
140 | non_unique: | ||
141 | spin_unlock(&nfs_fscache_keys_lock); | ||
142 | kfree(key); | ||
143 | nfss->fscache_key = NULL; | ||
144 | nfss->fscache = NULL; | ||
145 | printk(KERN_WARNING "NFS:" | ||
146 | " Cache request denied due to non-unique superblock keys\n"); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * release a per-superblock cookie | ||
151 | */ | ||
152 | void nfs_fscache_release_super_cookie(struct super_block *sb) | ||
153 | { | ||
154 | struct nfs_server *nfss = NFS_SB(sb); | ||
155 | |||
156 | dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n", | ||
157 | nfss, nfss->fscache); | ||
158 | |||
159 | fscache_relinquish_cookie(nfss->fscache, 0); | ||
160 | nfss->fscache = NULL; | ||
161 | |||
162 | if (nfss->fscache_key) { | ||
163 | spin_lock(&nfs_fscache_keys_lock); | ||
164 | rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys); | ||
165 | spin_unlock(&nfs_fscache_keys_lock); | ||
166 | kfree(nfss->fscache_key); | ||
167 | nfss->fscache_key = NULL; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Initialise the per-inode cache cookie pointer for an NFS inode. | ||
173 | */ | ||
174 | void nfs_fscache_init_inode_cookie(struct inode *inode) | ||
175 | { | ||
176 | NFS_I(inode)->fscache = NULL; | ||
177 | if (S_ISREG(inode->i_mode)) | ||
178 | set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * Get the per-inode cache cookie for an NFS inode. | ||
183 | */ | ||
184 | static void nfs_fscache_enable_inode_cookie(struct inode *inode) | ||
185 | { | ||
186 | struct super_block *sb = inode->i_sb; | ||
187 | struct nfs_inode *nfsi = NFS_I(inode); | ||
188 | |||
189 | if (nfsi->fscache || !NFS_FSCACHE(inode)) | ||
190 | return; | ||
191 | |||
192 | if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) { | ||
193 | nfsi->fscache = fscache_acquire_cookie( | ||
194 | NFS_SB(sb)->fscache, | ||
195 | &nfs_fscache_inode_object_def, | ||
196 | nfsi); | ||
197 | |||
198 | dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", | ||
199 | sb, nfsi, nfsi->fscache); | ||
200 | } | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Release a per-inode cookie. | ||
205 | */ | ||
206 | void nfs_fscache_release_inode_cookie(struct inode *inode) | ||
207 | { | ||
208 | struct nfs_inode *nfsi = NFS_I(inode); | ||
209 | |||
210 | dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", | ||
211 | nfsi, nfsi->fscache); | ||
212 | |||
213 | fscache_relinquish_cookie(nfsi->fscache, 0); | ||
214 | nfsi->fscache = NULL; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Retire a per-inode cookie, destroying the data attached to it. | ||
219 | */ | ||
220 | void nfs_fscache_zap_inode_cookie(struct inode *inode) | ||
221 | { | ||
222 | struct nfs_inode *nfsi = NFS_I(inode); | ||
223 | |||
224 | dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n", | ||
225 | nfsi, nfsi->fscache); | ||
226 | |||
227 | fscache_relinquish_cookie(nfsi->fscache, 1); | ||
228 | nfsi->fscache = NULL; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Turn off the cache with regard to a per-inode cookie if opened for writing, | ||
233 | * invalidating all the pages in the page cache relating to the associated | ||
234 | * inode to clear the per-page caching. | ||
235 | */ | ||
236 | static void nfs_fscache_disable_inode_cookie(struct inode *inode) | ||
237 | { | ||
238 | clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); | ||
239 | |||
240 | if (NFS_I(inode)->fscache) { | ||
241 | dfprintk(FSCACHE, | ||
242 | "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); | ||
243 | |||
244 | /* Need to invalidate any mapped pages that were read in before | ||
245 | * turning off the cache. | ||
246 | */ | ||
247 | if (inode->i_mapping && inode->i_mapping->nrpages) | ||
248 | invalidate_inode_pages2(inode->i_mapping); | ||
249 | |||
250 | nfs_fscache_zap_inode_cookie(inode); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * wait_on_bit() sleep function for uninterruptible waiting | ||
256 | */ | ||
257 | static int nfs_fscache_wait_bit(void *flags) | ||
258 | { | ||
259 | schedule(); | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * Lock against someone else trying to also acquire or relinquish a cookie | ||
265 | */ | ||
266 | static inline void nfs_fscache_inode_lock(struct inode *inode) | ||
267 | { | ||
268 | struct nfs_inode *nfsi = NFS_I(inode); | ||
269 | |||
270 | while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags)) | ||
271 | wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK, | ||
272 | nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Unlock cookie management lock | ||
277 | */ | ||
278 | static inline void nfs_fscache_inode_unlock(struct inode *inode) | ||
279 | { | ||
280 | struct nfs_inode *nfsi = NFS_I(inode); | ||
281 | |||
282 | smp_mb__before_clear_bit(); | ||
283 | clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags); | ||
284 | smp_mb__after_clear_bit(); | ||
285 | wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK); | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * Decide if we should enable or disable local caching for this inode. | ||
290 | * - For now, with NFS, only regular files that are open read-only will be able | ||
291 | * to use the cache. | ||
292 | * - May be invoked multiple times in parallel by parallel nfs_open() functions. | ||
293 | */ | ||
294 | void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) | ||
295 | { | ||
296 | if (NFS_FSCACHE(inode)) { | ||
297 | nfs_fscache_inode_lock(inode); | ||
298 | if ((filp->f_flags & O_ACCMODE) != O_RDONLY) | ||
299 | nfs_fscache_disable_inode_cookie(inode); | ||
300 | else | ||
301 | nfs_fscache_enable_inode_cookie(inode); | ||
302 | nfs_fscache_inode_unlock(inode); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * Replace a per-inode cookie due to revalidation detecting a file having | ||
308 | * changed on the server. | ||
309 | */ | ||
310 | void nfs_fscache_reset_inode_cookie(struct inode *inode) | ||
311 | { | ||
312 | struct nfs_inode *nfsi = NFS_I(inode); | ||
313 | struct nfs_server *nfss = NFS_SERVER(inode); | ||
314 | struct fscache_cookie *old = nfsi->fscache; | ||
315 | |||
316 | nfs_fscache_inode_lock(inode); | ||
317 | if (nfsi->fscache) { | ||
318 | /* retire the current fscache cache and get a new one */ | ||
319 | fscache_relinquish_cookie(nfsi->fscache, 1); | ||
320 | |||
321 | nfsi->fscache = fscache_acquire_cookie( | ||
322 | nfss->nfs_client->fscache, | ||
323 | &nfs_fscache_inode_object_def, | ||
324 | nfsi); | ||
325 | |||
326 | dfprintk(FSCACHE, | ||
327 | "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", | ||
328 | nfss, nfsi, old, nfsi->fscache); | ||
329 | } | ||
330 | nfs_fscache_inode_unlock(inode); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Release the caching state associated with a page, if the page isn't busy | ||
335 | * interacting with the cache. | ||
336 | * - Returns true (can release page) or false (page busy). | ||
337 | */ | ||
338 | int nfs_fscache_release_page(struct page *page, gfp_t gfp) | ||
339 | { | ||
340 | struct nfs_inode *nfsi = NFS_I(page->mapping->host); | ||
341 | struct fscache_cookie *cookie = nfsi->fscache; | ||
342 | |||
343 | BUG_ON(!cookie); | ||
344 | |||
345 | if (fscache_check_page_write(cookie, page)) { | ||
346 | if (!(gfp & __GFP_WAIT)) | ||
347 | return 0; | ||
348 | fscache_wait_on_page_write(cookie, page); | ||
349 | } | ||
350 | |||
351 | if (PageFsCache(page)) { | ||
352 | dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", | ||
353 | cookie, page, nfsi); | ||
354 | |||
355 | fscache_uncache_page(cookie, page); | ||
356 | nfs_add_fscache_stats(page->mapping->host, | ||
357 | NFSIOS_FSCACHE_PAGES_UNCACHED, 1); | ||
358 | } | ||
359 | |||
360 | return 1; | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * Release the caching state associated with a page if undergoing complete page | ||
365 | * invalidation. | ||
366 | */ | ||
367 | void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) | ||
368 | { | ||
369 | struct nfs_inode *nfsi = NFS_I(inode); | ||
370 | struct fscache_cookie *cookie = nfsi->fscache; | ||
371 | |||
372 | BUG_ON(!cookie); | ||
373 | |||
374 | dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", | ||
375 | cookie, page, nfsi); | ||
376 | |||
377 | fscache_wait_on_page_write(cookie, page); | ||
378 | |||
379 | BUG_ON(!PageLocked(page)); | ||
380 | fscache_uncache_page(cookie, page); | ||
381 | nfs_add_fscache_stats(page->mapping->host, | ||
382 | NFSIOS_FSCACHE_PAGES_UNCACHED, 1); | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * Handle completion of a page being read from the cache. | ||
387 | * - Called in process (keventd) context. | ||
388 | */ | ||
389 | static void nfs_readpage_from_fscache_complete(struct page *page, | ||
390 | void *context, | ||
391 | int error) | ||
392 | { | ||
393 | dfprintk(FSCACHE, | ||
394 | "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n", | ||
395 | page, context, error); | ||
396 | |||
397 | /* if the read completes with an error, we just unlock the page and let | ||
398 | * the VM reissue the readpage */ | ||
399 | if (!error) { | ||
400 | SetPageUptodate(page); | ||
401 | unlock_page(page); | ||
402 | } else { | ||
403 | error = nfs_readpage_async(context, page->mapping->host, page); | ||
404 | if (error) | ||
405 | unlock_page(page); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * Retrieve a page from fscache | ||
411 | */ | ||
412 | int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, | ||
413 | struct inode *inode, struct page *page) | ||
414 | { | ||
415 | int ret; | ||
416 | |||
417 | dfprintk(FSCACHE, | ||
418 | "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", | ||
419 | NFS_I(inode)->fscache, page, page->index, page->flags, inode); | ||
420 | |||
421 | ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, | ||
422 | page, | ||
423 | nfs_readpage_from_fscache_complete, | ||
424 | ctx, | ||
425 | GFP_KERNEL); | ||
426 | |||
427 | switch (ret) { | ||
428 | case 0: /* read BIO submitted (page in fscache) */ | ||
429 | dfprintk(FSCACHE, | ||
430 | "NFS: readpage_from_fscache: BIO submitted\n"); | ||
431 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); | ||
432 | return ret; | ||
433 | |||
434 | case -ENOBUFS: /* inode not in cache */ | ||
435 | case -ENODATA: /* page not in cache */ | ||
436 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); | ||
437 | dfprintk(FSCACHE, | ||
438 | "NFS: readpage_from_fscache %d\n", ret); | ||
439 | return 1; | ||
440 | |||
441 | default: | ||
442 | dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); | ||
443 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); | ||
444 | } | ||
445 | return ret; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * Retrieve a set of pages from fscache | ||
450 | */ | ||
451 | int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, | ||
452 | struct inode *inode, | ||
453 | struct address_space *mapping, | ||
454 | struct list_head *pages, | ||
455 | unsigned *nr_pages) | ||
456 | { | ||
457 | int ret, npages = *nr_pages; | ||
458 | |||
459 | dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", | ||
460 | NFS_I(inode)->fscache, npages, inode); | ||
461 | |||
462 | ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, | ||
463 | mapping, pages, nr_pages, | ||
464 | nfs_readpage_from_fscache_complete, | ||
465 | ctx, | ||
466 | mapping_gfp_mask(mapping)); | ||
467 | if (*nr_pages < npages) | ||
468 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, | ||
469 | npages); | ||
470 | if (*nr_pages > 0) | ||
471 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, | ||
472 | *nr_pages); | ||
473 | |||
474 | switch (ret) { | ||
475 | case 0: /* read submitted to the cache for all pages */ | ||
476 | BUG_ON(!list_empty(pages)); | ||
477 | BUG_ON(*nr_pages != 0); | ||
478 | dfprintk(FSCACHE, | ||
479 | "NFS: nfs_getpages_from_fscache: submitted\n"); | ||
480 | |||
481 | return ret; | ||
482 | |||
483 | case -ENOBUFS: /* some pages aren't cached and can't be */ | ||
484 | case -ENODATA: /* some pages aren't cached */ | ||
485 | dfprintk(FSCACHE, | ||
486 | "NFS: nfs_getpages_from_fscache: no page: %d\n", ret); | ||
487 | return 1; | ||
488 | |||
489 | default: | ||
490 | dfprintk(FSCACHE, | ||
491 | "NFS: nfs_getpages_from_fscache: ret %d\n", ret); | ||
492 | } | ||
493 | |||
494 | return ret; | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * Store a newly fetched page in fscache | ||
499 | * - PG_fscache must be set on the page | ||
500 | */ | ||
501 | void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) | ||
502 | { | ||
503 | int ret; | ||
504 | |||
505 | dfprintk(FSCACHE, | ||
506 | "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", | ||
507 | NFS_I(inode)->fscache, page, page->index, page->flags, sync); | ||
508 | |||
509 | ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL); | ||
510 | dfprintk(FSCACHE, | ||
511 | "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", | ||
512 | page, page->index, page->flags, ret); | ||
513 | |||
514 | if (ret != 0) { | ||
515 | fscache_uncache_page(NFS_I(inode)->fscache, page); | ||
516 | nfs_add_fscache_stats(inode, | ||
517 | NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); | ||
518 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); | ||
519 | } else { | ||
520 | nfs_add_fscache_stats(inode, | ||
521 | NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); | ||
522 | } | ||
523 | } | ||
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h new file mode 100644 index 000000000000..6e809bb0ff08 --- /dev/null +++ b/fs/nfs/fscache.h | |||
@@ -0,0 +1,220 @@ | |||
1 | /* NFS filesystem cache interface definitions | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #ifndef _NFS_FSCACHE_H | ||
13 | #define _NFS_FSCACHE_H | ||
14 | |||
15 | #include <linux/nfs_fs.h> | ||
16 | #include <linux/nfs_mount.h> | ||
17 | #include <linux/nfs4_mount.h> | ||
18 | #include <linux/fscache.h> | ||
19 | |||
20 | #ifdef CONFIG_NFS_FSCACHE | ||
21 | |||
22 | /* | ||
23 | * set of NFS FS-Cache objects that form a superblock key | ||
24 | */ | ||
25 | struct nfs_fscache_key { | ||
26 | struct rb_node node; | ||
27 | struct nfs_client *nfs_client; /* the server */ | ||
28 | |||
29 | /* the elements of the unique key - as used by nfs_compare_super() and | ||
30 | * nfs_compare_mount_options() to distinguish superblocks */ | ||
31 | struct { | ||
32 | struct { | ||
33 | unsigned long s_flags; /* various flags | ||
34 | * (& NFS_MS_MASK) */ | ||
35 | } super; | ||
36 | |||
37 | struct { | ||
38 | struct nfs_fsid fsid; | ||
39 | int flags; | ||
40 | unsigned int rsize; /* read size */ | ||
41 | unsigned int wsize; /* write size */ | ||
42 | unsigned int acregmin; /* attr cache timeouts */ | ||
43 | unsigned int acregmax; | ||
44 | unsigned int acdirmin; | ||
45 | unsigned int acdirmax; | ||
46 | } nfs_server; | ||
47 | |||
48 | struct { | ||
49 | rpc_authflavor_t au_flavor; | ||
50 | } rpc_auth; | ||
51 | |||
52 | /* uniquifier - can be used if nfs_server.flags includes | ||
53 | * NFS_MOUNT_UNSHARED */ | ||
54 | u8 uniq_len; | ||
55 | char uniquifier[0]; | ||
56 | } key; | ||
57 | }; | ||
58 | |||
59 | /* | ||
60 | * fscache-index.c | ||
61 | */ | ||
62 | extern struct fscache_netfs nfs_fscache_netfs; | ||
63 | extern const struct fscache_cookie_def nfs_fscache_server_index_def; | ||
64 | extern const struct fscache_cookie_def nfs_fscache_super_index_def; | ||
65 | extern const struct fscache_cookie_def nfs_fscache_inode_object_def; | ||
66 | |||
67 | extern int nfs_fscache_register(void); | ||
68 | extern void nfs_fscache_unregister(void); | ||
69 | |||
70 | /* | ||
71 | * fscache.c | ||
72 | */ | ||
73 | extern void nfs_fscache_get_client_cookie(struct nfs_client *); | ||
74 | extern void nfs_fscache_release_client_cookie(struct nfs_client *); | ||
75 | |||
76 | extern void nfs_fscache_get_super_cookie(struct super_block *, | ||
77 | struct nfs_parsed_mount_data *); | ||
78 | extern void nfs_fscache_release_super_cookie(struct super_block *); | ||
79 | |||
80 | extern void nfs_fscache_init_inode_cookie(struct inode *); | ||
81 | extern void nfs_fscache_release_inode_cookie(struct inode *); | ||
82 | extern void nfs_fscache_zap_inode_cookie(struct inode *); | ||
83 | extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *); | ||
84 | extern void nfs_fscache_reset_inode_cookie(struct inode *); | ||
85 | |||
86 | extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); | ||
87 | extern int nfs_fscache_release_page(struct page *, gfp_t); | ||
88 | |||
89 | extern int __nfs_readpage_from_fscache(struct nfs_open_context *, | ||
90 | struct inode *, struct page *); | ||
91 | extern int __nfs_readpages_from_fscache(struct nfs_open_context *, | ||
92 | struct inode *, struct address_space *, | ||
93 | struct list_head *, unsigned *); | ||
94 | extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int); | ||
95 | |||
96 | /* | ||
97 | * wait for a page to complete writing to the cache | ||
98 | */ | ||
99 | static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, | ||
100 | struct page *page) | ||
101 | { | ||
102 | if (PageFsCache(page)) | ||
103 | fscache_wait_on_page_write(nfsi->fscache, page); | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * release the caching state associated with a page if undergoing complete page | ||
108 | * invalidation | ||
109 | */ | ||
110 | static inline void nfs_fscache_invalidate_page(struct page *page, | ||
111 | struct inode *inode) | ||
112 | { | ||
113 | if (PageFsCache(page)) | ||
114 | __nfs_fscache_invalidate_page(page, inode); | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Retrieve a page from an inode data storage object. | ||
119 | */ | ||
120 | static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, | ||
121 | struct inode *inode, | ||
122 | struct page *page) | ||
123 | { | ||
124 | if (NFS_I(inode)->fscache) | ||
125 | return __nfs_readpage_from_fscache(ctx, inode, page); | ||
126 | return -ENOBUFS; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Retrieve a set of pages from an inode data storage object. | ||
131 | */ | ||
132 | static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, | ||
133 | struct inode *inode, | ||
134 | struct address_space *mapping, | ||
135 | struct list_head *pages, | ||
136 | unsigned *nr_pages) | ||
137 | { | ||
138 | if (NFS_I(inode)->fscache) | ||
139 | return __nfs_readpages_from_fscache(ctx, inode, mapping, pages, | ||
140 | nr_pages); | ||
141 | return -ENOBUFS; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Store a page newly fetched from the server in an inode data storage object | ||
146 | * in the cache. | ||
147 | */ | ||
148 | static inline void nfs_readpage_to_fscache(struct inode *inode, | ||
149 | struct page *page, | ||
150 | int sync) | ||
151 | { | ||
152 | if (PageFsCache(page)) | ||
153 | __nfs_readpage_to_fscache(inode, page, sync); | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * indicate the client caching state as readable text | ||
158 | */ | ||
159 | static inline const char *nfs_server_fscache_state(struct nfs_server *server) | ||
160 | { | ||
161 | if (server->fscache && (server->options & NFS_OPTION_FSCACHE)) | ||
162 | return "yes"; | ||
163 | return "no "; | ||
164 | } | ||
165 | |||
166 | |||
167 | #else /* CONFIG_NFS_FSCACHE */ | ||
168 | static inline int nfs_fscache_register(void) { return 0; } | ||
169 | static inline void nfs_fscache_unregister(void) {} | ||
170 | |||
171 | static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} | ||
172 | static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} | ||
173 | |||
174 | static inline void nfs_fscache_get_super_cookie( | ||
175 | struct super_block *sb, | ||
176 | struct nfs_parsed_mount_data *data) | ||
177 | { | ||
178 | } | ||
179 | static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} | ||
180 | |||
181 | static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} | ||
182 | static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {} | ||
183 | static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {} | ||
184 | static inline void nfs_fscache_set_inode_cookie(struct inode *inode, | ||
185 | struct file *filp) {} | ||
186 | static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {} | ||
187 | |||
188 | static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) | ||
189 | { | ||
190 | return 1; /* True: may release page */ | ||
191 | } | ||
192 | static inline void nfs_fscache_invalidate_page(struct page *page, | ||
193 | struct inode *inode) {} | ||
194 | static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, | ||
195 | struct page *page) {} | ||
196 | |||
197 | static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, | ||
198 | struct inode *inode, | ||
199 | struct page *page) | ||
200 | { | ||
201 | return -ENOBUFS; | ||
202 | } | ||
203 | static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, | ||
204 | struct inode *inode, | ||
205 | struct address_space *mapping, | ||
206 | struct list_head *pages, | ||
207 | unsigned *nr_pages) | ||
208 | { | ||
209 | return -ENOBUFS; | ||
210 | } | ||
211 | static inline void nfs_readpage_to_fscache(struct inode *inode, | ||
212 | struct page *page, int sync) {} | ||
213 | |||
214 | static inline const char *nfs_server_fscache_state(struct nfs_server *server) | ||
215 | { | ||
216 | return "no "; | ||
217 | } | ||
218 | |||
219 | #endif /* CONFIG_NFS_FSCACHE */ | ||
220 | #endif /* _NFS_FSCACHE_H */ | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a834d1d850b7..64f87194d390 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "delegation.h" | 46 | #include "delegation.h" |
47 | #include "iostat.h" | 47 | #include "iostat.h" |
48 | #include "internal.h" | 48 | #include "internal.h" |
49 | #include "fscache.h" | ||
49 | 50 | ||
50 | #define NFSDBG_FACILITY NFSDBG_VFS | 51 | #define NFSDBG_FACILITY NFSDBG_VFS |
51 | 52 | ||
@@ -121,6 +122,7 @@ void nfs_clear_inode(struct inode *inode) | |||
121 | BUG_ON(!list_empty(&NFS_I(inode)->open_files)); | 122 | BUG_ON(!list_empty(&NFS_I(inode)->open_files)); |
122 | nfs_zap_acl_cache(inode); | 123 | nfs_zap_acl_cache(inode); |
123 | nfs_access_zap_cache(inode); | 124 | nfs_access_zap_cache(inode); |
125 | nfs_fscache_release_inode_cookie(inode); | ||
124 | } | 126 | } |
125 | 127 | ||
126 | /** | 128 | /** |
@@ -355,6 +357,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
355 | nfsi->attrtimeo_timestamp = now; | 357 | nfsi->attrtimeo_timestamp = now; |
356 | nfsi->access_cache = RB_ROOT; | 358 | nfsi->access_cache = RB_ROOT; |
357 | 359 | ||
360 | nfs_fscache_init_inode_cookie(inode); | ||
361 | |||
358 | unlock_new_inode(inode); | 362 | unlock_new_inode(inode); |
359 | } else | 363 | } else |
360 | nfs_refresh_inode(inode, fattr); | 364 | nfs_refresh_inode(inode, fattr); |
@@ -686,6 +690,7 @@ int nfs_open(struct inode *inode, struct file *filp) | |||
686 | ctx->mode = filp->f_mode; | 690 | ctx->mode = filp->f_mode; |
687 | nfs_file_set_open_context(filp, ctx); | 691 | nfs_file_set_open_context(filp, ctx); |
688 | put_nfs_open_context(ctx); | 692 | put_nfs_open_context(ctx); |
693 | nfs_fscache_set_inode_cookie(inode, filp); | ||
689 | return 0; | 694 | return 0; |
690 | } | 695 | } |
691 | 696 | ||
@@ -786,6 +791,7 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa | |||
786 | memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); | 791 | memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); |
787 | spin_unlock(&inode->i_lock); | 792 | spin_unlock(&inode->i_lock); |
788 | nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); | 793 | nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); |
794 | nfs_fscache_reset_inode_cookie(inode); | ||
789 | dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", | 795 | dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", |
790 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); | 796 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); |
791 | return 0; | 797 | return 0; |
@@ -1030,6 +1036,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1030 | spin_lock(&inode->i_lock); | 1036 | spin_lock(&inode->i_lock); |
1031 | status = nfs_refresh_inode_locked(inode, fattr); | 1037 | status = nfs_refresh_inode_locked(inode, fattr); |
1032 | spin_unlock(&inode->i_lock); | 1038 | spin_unlock(&inode->i_lock); |
1039 | |||
1033 | return status; | 1040 | return status; |
1034 | } | 1041 | } |
1035 | 1042 | ||
@@ -1436,6 +1443,10 @@ static int __init init_nfs_fs(void) | |||
1436 | { | 1443 | { |
1437 | int err; | 1444 | int err; |
1438 | 1445 | ||
1446 | err = nfs_fscache_register(); | ||
1447 | if (err < 0) | ||
1448 | goto out7; | ||
1449 | |||
1439 | err = nfsiod_start(); | 1450 | err = nfsiod_start(); |
1440 | if (err) | 1451 | if (err) |
1441 | goto out6; | 1452 | goto out6; |
@@ -1488,6 +1499,8 @@ out4: | |||
1488 | out5: | 1499 | out5: |
1489 | nfsiod_stop(); | 1500 | nfsiod_stop(); |
1490 | out6: | 1501 | out6: |
1502 | nfs_fscache_unregister(); | ||
1503 | out7: | ||
1491 | return err; | 1504 | return err; |
1492 | } | 1505 | } |
1493 | 1506 | ||
@@ -1498,6 +1511,7 @@ static void __exit exit_nfs_fs(void) | |||
1498 | nfs_destroy_readpagecache(); | 1511 | nfs_destroy_readpagecache(); |
1499 | nfs_destroy_inodecache(); | 1512 | nfs_destroy_inodecache(); |
1500 | nfs_destroy_nfspagecache(); | 1513 | nfs_destroy_nfspagecache(); |
1514 | nfs_fscache_unregister(); | ||
1501 | #ifdef CONFIG_PROC_FS | 1515 | #ifdef CONFIG_PROC_FS |
1502 | rpc_proc_unregister("nfs"); | 1516 | rpc_proc_unregister("nfs"); |
1503 | #endif | 1517 | #endif |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2041f68ff1cc..e4d6a8348adf 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -5,6 +5,8 @@ | |||
5 | #include <linux/mount.h> | 5 | #include <linux/mount.h> |
6 | #include <linux/security.h> | 6 | #include <linux/security.h> |
7 | 7 | ||
8 | #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) | ||
9 | |||
8 | struct nfs_string; | 10 | struct nfs_string; |
9 | 11 | ||
10 | /* Maximum number of readahead requests | 12 | /* Maximum number of readahead requests |
@@ -37,10 +39,12 @@ struct nfs_parsed_mount_data { | |||
37 | int acregmin, acregmax, | 39 | int acregmin, acregmax, |
38 | acdirmin, acdirmax; | 40 | acdirmin, acdirmax; |
39 | int namlen; | 41 | int namlen; |
42 | unsigned int options; | ||
40 | unsigned int bsize; | 43 | unsigned int bsize; |
41 | unsigned int auth_flavor_len; | 44 | unsigned int auth_flavor_len; |
42 | rpc_authflavor_t auth_flavors[1]; | 45 | rpc_authflavor_t auth_flavors[1]; |
43 | char *client_address; | 46 | char *client_address; |
47 | char *fscache_uniq; | ||
44 | 48 | ||
45 | struct { | 49 | struct { |
46 | struct sockaddr_storage address; | 50 | struct sockaddr_storage address; |
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index a36952810032..a2ab2529b5ca 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h | |||
@@ -16,6 +16,9 @@ | |||
16 | 16 | ||
17 | struct nfs_iostats { | 17 | struct nfs_iostats { |
18 | unsigned long long bytes[__NFSIOS_BYTESMAX]; | 18 | unsigned long long bytes[__NFSIOS_BYTESMAX]; |
19 | #ifdef CONFIG_NFS_FSCACHE | ||
20 | unsigned long long fscache[__NFSIOS_FSCACHEMAX]; | ||
21 | #endif | ||
19 | unsigned long events[__NFSIOS_COUNTSMAX]; | 22 | unsigned long events[__NFSIOS_COUNTSMAX]; |
20 | } ____cacheline_aligned; | 23 | } ____cacheline_aligned; |
21 | 24 | ||
@@ -57,6 +60,21 @@ static inline void nfs_add_stats(const struct inode *inode, | |||
57 | nfs_add_server_stats(NFS_SERVER(inode), stat, addend); | 60 | nfs_add_server_stats(NFS_SERVER(inode), stat, addend); |
58 | } | 61 | } |
59 | 62 | ||
63 | #ifdef CONFIG_NFS_FSCACHE | ||
64 | static inline void nfs_add_fscache_stats(struct inode *inode, | ||
65 | enum nfs_stat_fscachecounters stat, | ||
66 | unsigned long addend) | ||
67 | { | ||
68 | struct nfs_iostats *iostats; | ||
69 | int cpu; | ||
70 | |||
71 | cpu = get_cpu(); | ||
72 | iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); | ||
73 | iostats->fscache[stat] += addend; | ||
74 | put_cpu_no_resched(); | ||
75 | } | ||
76 | #endif | ||
77 | |||
60 | static inline struct nfs_iostats *nfs_alloc_iostats(void) | 78 | static inline struct nfs_iostats *nfs_alloc_iostats(void) |
61 | { | 79 | { |
62 | return alloc_percpu(struct nfs_iostats); | 80 | return alloc_percpu(struct nfs_iostats); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index b82fe6847f14..d0cc5ce0edfe 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -328,7 +328,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
328 | data->arg.create.verifier[1] = current->pid; | 328 | data->arg.create.verifier[1] = current->pid; |
329 | } | 329 | } |
330 | 330 | ||
331 | sattr->ia_mode &= ~current->fs->umask; | 331 | sattr->ia_mode &= ~current_umask(); |
332 | 332 | ||
333 | for (;;) { | 333 | for (;;) { |
334 | status = nfs3_do_create(dir, dentry, data); | 334 | status = nfs3_do_create(dir, dentry, data); |
@@ -528,7 +528,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) | |||
528 | 528 | ||
529 | dprintk("NFS call mkdir %s\n", dentry->d_name.name); | 529 | dprintk("NFS call mkdir %s\n", dentry->d_name.name); |
530 | 530 | ||
531 | sattr->ia_mode &= ~current->fs->umask; | 531 | sattr->ia_mode &= ~current_umask(); |
532 | 532 | ||
533 | data = nfs3_alloc_createdata(); | 533 | data = nfs3_alloc_createdata(); |
534 | if (data == NULL) | 534 | if (data == NULL) |
@@ -639,7 +639,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
639 | dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, | 639 | dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, |
640 | MAJOR(rdev), MINOR(rdev)); | 640 | MAJOR(rdev), MINOR(rdev)); |
641 | 641 | ||
642 | sattr->ia_mode &= ~current->fs->umask; | 642 | sattr->ia_mode &= ~current_umask(); |
643 | 643 | ||
644 | data = nfs3_alloc_createdata(); | 644 | data = nfs3_alloc_createdata(); |
645 | if (data == NULL) | 645 | if (data == NULL) |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 97bacccff579..a4d242680299 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -1501,7 +1501,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
1501 | attr.ia_mode = nd->intent.open.create_mode; | 1501 | attr.ia_mode = nd->intent.open.create_mode; |
1502 | attr.ia_valid = ATTR_MODE; | 1502 | attr.ia_valid = ATTR_MODE; |
1503 | if (!IS_POSIXACL(dir)) | 1503 | if (!IS_POSIXACL(dir)) |
1504 | attr.ia_mode &= ~current->fs->umask; | 1504 | attr.ia_mode &= ~current_umask(); |
1505 | } else { | 1505 | } else { |
1506 | attr.ia_valid = 0; | 1506 | attr.ia_valid = 0; |
1507 | BUG_ON(nd->intent.open.flags & O_CREAT); | 1507 | BUG_ON(nd->intent.open.flags & O_CREAT); |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f856004bb7fa..4ace3c50a8eb 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -24,6 +24,7 @@ | |||
24 | 24 | ||
25 | #include "internal.h" | 25 | #include "internal.h" |
26 | #include "iostat.h" | 26 | #include "iostat.h" |
27 | #include "fscache.h" | ||
27 | 28 | ||
28 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 29 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
29 | 30 | ||
@@ -111,8 +112,8 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) | |||
111 | } | 112 | } |
112 | } | 113 | } |
113 | 114 | ||
114 | static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | 115 | int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, |
115 | struct page *page) | 116 | struct page *page) |
116 | { | 117 | { |
117 | LIST_HEAD(one_request); | 118 | LIST_HEAD(one_request); |
118 | struct nfs_page *new; | 119 | struct nfs_page *new; |
@@ -139,6 +140,11 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
139 | 140 | ||
140 | static void nfs_readpage_release(struct nfs_page *req) | 141 | static void nfs_readpage_release(struct nfs_page *req) |
141 | { | 142 | { |
143 | struct inode *d_inode = req->wb_context->path.dentry->d_inode; | ||
144 | |||
145 | if (PageUptodate(req->wb_page)) | ||
146 | nfs_readpage_to_fscache(d_inode, req->wb_page, 0); | ||
147 | |||
142 | unlock_page(req->wb_page); | 148 | unlock_page(req->wb_page); |
143 | 149 | ||
144 | dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", | 150 | dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", |
@@ -510,8 +516,15 @@ int nfs_readpage(struct file *file, struct page *page) | |||
510 | } else | 516 | } else |
511 | ctx = get_nfs_open_context(nfs_file_open_context(file)); | 517 | ctx = get_nfs_open_context(nfs_file_open_context(file)); |
512 | 518 | ||
519 | if (!IS_SYNC(inode)) { | ||
520 | error = nfs_readpage_from_fscache(ctx, inode, page); | ||
521 | if (error == 0) | ||
522 | goto out; | ||
523 | } | ||
524 | |||
513 | error = nfs_readpage_async(ctx, inode, page); | 525 | error = nfs_readpage_async(ctx, inode, page); |
514 | 526 | ||
527 | out: | ||
515 | put_nfs_open_context(ctx); | 528 | put_nfs_open_context(ctx); |
516 | return error; | 529 | return error; |
517 | out_unlock: | 530 | out_unlock: |
@@ -584,6 +597,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
584 | return -EBADF; | 597 | return -EBADF; |
585 | } else | 598 | } else |
586 | desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); | 599 | desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); |
600 | |||
601 | /* attempt to read as many of the pages as possible from the cache | ||
602 | * - this returns -ENOBUFS immediately if the cookie is negative | ||
603 | */ | ||
604 | ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, | ||
605 | pages, &nr_pages); | ||
606 | if (ret == 0) | ||
607 | goto read_complete; /* all pages were read */ | ||
608 | |||
587 | if (rsize < PAGE_CACHE_SIZE) | 609 | if (rsize < PAGE_CACHE_SIZE) |
588 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | 610 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); |
589 | else | 611 | else |
@@ -594,6 +616,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
594 | nfs_pageio_complete(&pgio); | 616 | nfs_pageio_complete(&pgio); |
595 | npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 617 | npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
596 | nfs_add_stats(inode, NFSIOS_READPAGES, npages); | 618 | nfs_add_stats(inode, NFSIOS_READPAGES, npages); |
619 | read_complete: | ||
597 | put_nfs_open_context(desc.ctx); | 620 | put_nfs_open_context(desc.ctx); |
598 | out: | 621 | out: |
599 | return ret; | 622 | return ret; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0942fcbbad3c..82eaadbff408 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include "delegation.h" | 60 | #include "delegation.h" |
61 | #include "iostat.h" | 61 | #include "iostat.h" |
62 | #include "internal.h" | 62 | #include "internal.h" |
63 | #include "fscache.h" | ||
63 | 64 | ||
64 | #define NFSDBG_FACILITY NFSDBG_VFS | 65 | #define NFSDBG_FACILITY NFSDBG_VFS |
65 | 66 | ||
@@ -76,6 +77,7 @@ enum { | |||
76 | Opt_rdirplus, Opt_nordirplus, | 77 | Opt_rdirplus, Opt_nordirplus, |
77 | Opt_sharecache, Opt_nosharecache, | 78 | Opt_sharecache, Opt_nosharecache, |
78 | Opt_resvport, Opt_noresvport, | 79 | Opt_resvport, Opt_noresvport, |
80 | Opt_fscache, Opt_nofscache, | ||
79 | 81 | ||
80 | /* Mount options that take integer arguments */ | 82 | /* Mount options that take integer arguments */ |
81 | Opt_port, | 83 | Opt_port, |
@@ -93,6 +95,7 @@ enum { | |||
93 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, | 95 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
94 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 96 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
95 | Opt_lookupcache, | 97 | Opt_lookupcache, |
98 | Opt_fscache_uniq, | ||
96 | 99 | ||
97 | /* Special mount options */ | 100 | /* Special mount options */ |
98 | Opt_userspace, Opt_deprecated, Opt_sloppy, | 101 | Opt_userspace, Opt_deprecated, Opt_sloppy, |
@@ -132,6 +135,9 @@ static const match_table_t nfs_mount_option_tokens = { | |||
132 | { Opt_nosharecache, "nosharecache" }, | 135 | { Opt_nosharecache, "nosharecache" }, |
133 | { Opt_resvport, "resvport" }, | 136 | { Opt_resvport, "resvport" }, |
134 | { Opt_noresvport, "noresvport" }, | 137 | { Opt_noresvport, "noresvport" }, |
138 | { Opt_fscache, "fsc" }, | ||
139 | { Opt_fscache_uniq, "fsc=%s" }, | ||
140 | { Opt_nofscache, "nofsc" }, | ||
135 | 141 | ||
136 | { Opt_port, "port=%u" }, | 142 | { Opt_port, "port=%u" }, |
137 | { Opt_rsize, "rsize=%u" }, | 143 | { Opt_rsize, "rsize=%u" }, |
@@ -563,6 +569,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
563 | if (clp->rpc_ops->version == 4) | 569 | if (clp->rpc_ops->version == 4) |
564 | seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); | 570 | seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); |
565 | #endif | 571 | #endif |
572 | if (nfss->options & NFS_OPTION_FSCACHE) | ||
573 | seq_printf(m, ",fsc"); | ||
566 | } | 574 | } |
567 | 575 | ||
568 | /* | 576 | /* |
@@ -641,6 +649,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) | |||
641 | totals.events[i] += stats->events[i]; | 649 | totals.events[i] += stats->events[i]; |
642 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) | 650 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) |
643 | totals.bytes[i] += stats->bytes[i]; | 651 | totals.bytes[i] += stats->bytes[i]; |
652 | #ifdef CONFIG_NFS_FSCACHE | ||
653 | for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) | ||
654 | totals.fscache[i] += stats->fscache[i]; | ||
655 | #endif | ||
644 | 656 | ||
645 | preempt_enable(); | 657 | preempt_enable(); |
646 | } | 658 | } |
@@ -651,6 +663,13 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) | |||
651 | seq_printf(m, "\n\tbytes:\t"); | 663 | seq_printf(m, "\n\tbytes:\t"); |
652 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) | 664 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) |
653 | seq_printf(m, "%Lu ", totals.bytes[i]); | 665 | seq_printf(m, "%Lu ", totals.bytes[i]); |
666 | #ifdef CONFIG_NFS_FSCACHE | ||
667 | if (nfss->options & NFS_OPTION_FSCACHE) { | ||
668 | seq_printf(m, "\n\tfsc:\t"); | ||
669 | for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) | ||
670 | seq_printf(m, "%Lu ", totals.bytes[i]); | ||
671 | } | ||
672 | #endif | ||
654 | seq_printf(m, "\n"); | 673 | seq_printf(m, "\n"); |
655 | 674 | ||
656 | rpc_print_iostats(m, nfss->client); | 675 | rpc_print_iostats(m, nfss->client); |
@@ -1044,6 +1063,24 @@ static int nfs_parse_mount_options(char *raw, | |||
1044 | case Opt_noresvport: | 1063 | case Opt_noresvport: |
1045 | mnt->flags |= NFS_MOUNT_NORESVPORT; | 1064 | mnt->flags |= NFS_MOUNT_NORESVPORT; |
1046 | break; | 1065 | break; |
1066 | case Opt_fscache: | ||
1067 | mnt->options |= NFS_OPTION_FSCACHE; | ||
1068 | kfree(mnt->fscache_uniq); | ||
1069 | mnt->fscache_uniq = NULL; | ||
1070 | break; | ||
1071 | case Opt_nofscache: | ||
1072 | mnt->options &= ~NFS_OPTION_FSCACHE; | ||
1073 | kfree(mnt->fscache_uniq); | ||
1074 | mnt->fscache_uniq = NULL; | ||
1075 | break; | ||
1076 | case Opt_fscache_uniq: | ||
1077 | string = match_strdup(args); | ||
1078 | if (!string) | ||
1079 | goto out_nomem; | ||
1080 | kfree(mnt->fscache_uniq); | ||
1081 | mnt->fscache_uniq = string; | ||
1082 | mnt->options |= NFS_OPTION_FSCACHE; | ||
1083 | break; | ||
1047 | 1084 | ||
1048 | /* | 1085 | /* |
1049 | * options that take numeric values | 1086 | * options that take numeric values |
@@ -1870,8 +1907,6 @@ static void nfs_clone_super(struct super_block *sb, | |||
1870 | nfs_initialise_sb(sb); | 1907 | nfs_initialise_sb(sb); |
1871 | } | 1908 | } |
1872 | 1909 | ||
1873 | #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) | ||
1874 | |||
1875 | static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) | 1910 | static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) |
1876 | { | 1911 | { |
1877 | const struct nfs_server *a = s->s_fs_info; | 1912 | const struct nfs_server *a = s->s_fs_info; |
@@ -2036,6 +2071,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2036 | if (!s->s_root) { | 2071 | if (!s->s_root) { |
2037 | /* initial superblock/root creation */ | 2072 | /* initial superblock/root creation */ |
2038 | nfs_fill_super(s, data); | 2073 | nfs_fill_super(s, data); |
2074 | nfs_fscache_get_super_cookie(s, data); | ||
2039 | } | 2075 | } |
2040 | 2076 | ||
2041 | mntroot = nfs_get_root(s, mntfh); | 2077 | mntroot = nfs_get_root(s, mntfh); |
@@ -2056,6 +2092,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2056 | out: | 2092 | out: |
2057 | kfree(data->nfs_server.hostname); | 2093 | kfree(data->nfs_server.hostname); |
2058 | kfree(data->mount_server.hostname); | 2094 | kfree(data->mount_server.hostname); |
2095 | kfree(data->fscache_uniq); | ||
2059 | security_free_mnt_opts(&data->lsm_opts); | 2096 | security_free_mnt_opts(&data->lsm_opts); |
2060 | out_free_fh: | 2097 | out_free_fh: |
2061 | kfree(mntfh); | 2098 | kfree(mntfh); |
@@ -2083,6 +2120,7 @@ static void nfs_kill_super(struct super_block *s) | |||
2083 | 2120 | ||
2084 | bdi_unregister(&server->backing_dev_info); | 2121 | bdi_unregister(&server->backing_dev_info); |
2085 | kill_anon_super(s); | 2122 | kill_anon_super(s); |
2123 | nfs_fscache_release_super_cookie(s); | ||
2086 | nfs_free_server(server); | 2124 | nfs_free_server(server); |
2087 | } | 2125 | } |
2088 | 2126 | ||
@@ -2390,6 +2428,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
2390 | if (!s->s_root) { | 2428 | if (!s->s_root) { |
2391 | /* initial superblock/root creation */ | 2429 | /* initial superblock/root creation */ |
2392 | nfs4_fill_super(s); | 2430 | nfs4_fill_super(s); |
2431 | nfs_fscache_get_super_cookie(s, data); | ||
2393 | } | 2432 | } |
2394 | 2433 | ||
2395 | mntroot = nfs4_get_root(s, mntfh); | 2434 | mntroot = nfs4_get_root(s, mntfh); |
@@ -2411,6 +2450,7 @@ out: | |||
2411 | kfree(data->client_address); | 2450 | kfree(data->client_address); |
2412 | kfree(data->nfs_server.export_path); | 2451 | kfree(data->nfs_server.export_path); |
2413 | kfree(data->nfs_server.hostname); | 2452 | kfree(data->nfs_server.hostname); |
2453 | kfree(data->fscache_uniq); | ||
2414 | security_free_mnt_opts(&data->lsm_opts); | 2454 | security_free_mnt_opts(&data->lsm_opts); |
2415 | out_free_fh: | 2455 | out_free_fh: |
2416 | kfree(mntfh); | 2456 | kfree(mntfh); |
@@ -2437,6 +2477,7 @@ static void nfs4_kill_super(struct super_block *sb) | |||
2437 | kill_anon_super(sb); | 2477 | kill_anon_super(sb); |
2438 | 2478 | ||
2439 | nfs4_renewd_prepare_shutdown(server); | 2479 | nfs4_renewd_prepare_shutdown(server); |
2480 | nfs_fscache_release_super_cookie(sb); | ||
2440 | nfs_free_server(server); | 2481 | nfs_free_server(server); |
2441 | } | 2482 | } |
2442 | 2483 | ||
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index bc3567bab8c4..7c09852be713 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -403,7 +403,6 @@ static int | |||
403 | nfsd(void *vrqstp) | 403 | nfsd(void *vrqstp) |
404 | { | 404 | { |
405 | struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; | 405 | struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; |
406 | struct fs_struct *fsp; | ||
407 | int err, preverr = 0; | 406 | int err, preverr = 0; |
408 | 407 | ||
409 | /* Lock module and set up kernel thread */ | 408 | /* Lock module and set up kernel thread */ |
@@ -412,13 +411,11 @@ nfsd(void *vrqstp) | |||
412 | /* At this point, the thread shares current->fs | 411 | /* At this point, the thread shares current->fs |
413 | * with the init process. We need to create files with a | 412 | * with the init process. We need to create files with a |
414 | * umask of 0 instead of init's umask. */ | 413 | * umask of 0 instead of init's umask. */ |
415 | fsp = copy_fs_struct(current->fs); | 414 | if (unshare_fs_struct() < 0) { |
416 | if (!fsp) { | ||
417 | printk("Unable to start nfsd thread: out of memory\n"); | 415 | printk("Unable to start nfsd thread: out of memory\n"); |
418 | goto out; | 416 | goto out; |
419 | } | 417 | } |
420 | exit_fs(current); | 418 | |
421 | current->fs = fsp; | ||
422 | current->fs->umask = 0; | 419 | current->fs->umask = 0; |
423 | 420 | ||
424 | /* | 421 | /* |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 12dfb44c22e5..fbeaec762103 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -296,7 +296,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
296 | return PTR_ERR(acl); | 296 | return PTR_ERR(acl); |
297 | } | 297 | } |
298 | if (!acl) | 298 | if (!acl) |
299 | inode->i_mode &= ~current->fs->umask; | 299 | inode->i_mode &= ~current_umask(); |
300 | } | 300 | } |
301 | if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { | 301 | if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { |
302 | struct posix_acl *clone; | 302 | struct posix_acl *clone; |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 19e3a96aa02c..678a067d9251 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -294,6 +294,55 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { | |||
294 | .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters, | 294 | .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters, |
295 | }; | 295 | }; |
296 | 296 | ||
297 | static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
298 | u64 blkno) | ||
299 | { | ||
300 | struct ocfs2_dx_root_block *dx_root = et->et_object; | ||
301 | |||
302 | dx_root->dr_last_eb_blk = cpu_to_le64(blkno); | ||
303 | } | ||
304 | |||
305 | static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
306 | { | ||
307 | struct ocfs2_dx_root_block *dx_root = et->et_object; | ||
308 | |||
309 | return le64_to_cpu(dx_root->dr_last_eb_blk); | ||
310 | } | ||
311 | |||
312 | static void ocfs2_dx_root_update_clusters(struct inode *inode, | ||
313 | struct ocfs2_extent_tree *et, | ||
314 | u32 clusters) | ||
315 | { | ||
316 | struct ocfs2_dx_root_block *dx_root = et->et_object; | ||
317 | |||
318 | le32_add_cpu(&dx_root->dr_clusters, clusters); | ||
319 | } | ||
320 | |||
321 | static int ocfs2_dx_root_sanity_check(struct inode *inode, | ||
322 | struct ocfs2_extent_tree *et) | ||
323 | { | ||
324 | struct ocfs2_dx_root_block *dx_root = et->et_object; | ||
325 | |||
326 | BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root)); | ||
327 | |||
328 | return 0; | ||
329 | } | ||
330 | |||
331 | static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et) | ||
332 | { | ||
333 | struct ocfs2_dx_root_block *dx_root = et->et_object; | ||
334 | |||
335 | et->et_root_el = &dx_root->dr_list; | ||
336 | } | ||
337 | |||
338 | static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = { | ||
339 | .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk, | ||
340 | .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk, | ||
341 | .eo_update_clusters = ocfs2_dx_root_update_clusters, | ||
342 | .eo_sanity_check = ocfs2_dx_root_sanity_check, | ||
343 | .eo_fill_root_el = ocfs2_dx_root_fill_root_el, | ||
344 | }; | ||
345 | |||
297 | static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, | 346 | static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, |
298 | struct inode *inode, | 347 | struct inode *inode, |
299 | struct buffer_head *bh, | 348 | struct buffer_head *bh, |
@@ -339,6 +388,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | |||
339 | &ocfs2_xattr_value_et_ops); | 388 | &ocfs2_xattr_value_et_ops); |
340 | } | 389 | } |
341 | 390 | ||
391 | void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, | ||
392 | struct inode *inode, | ||
393 | struct buffer_head *bh) | ||
394 | { | ||
395 | __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr, | ||
396 | NULL, &ocfs2_dx_root_et_ops); | ||
397 | } | ||
398 | |||
342 | static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, | 399 | static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, |
343 | u64 new_last_eb_blk) | 400 | u64 new_last_eb_blk) |
344 | { | 401 | { |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index cceff5c37f47..353254ba29e1 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -75,6 +75,9 @@ struct ocfs2_xattr_value_buf; | |||
75 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | 75 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, |
76 | struct inode *inode, | 76 | struct inode *inode, |
77 | struct ocfs2_xattr_value_buf *vb); | 77 | struct ocfs2_xattr_value_buf *vb); |
78 | void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, | ||
79 | struct inode *inode, | ||
80 | struct buffer_head *bh); | ||
78 | 81 | ||
79 | /* | 82 | /* |
80 | * Read an extent block into *bh. If *bh is NULL, a bh will be | 83 | * Read an extent block into *bh. If *bh is NULL, a bh will be |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 8e1709a679b7..b2c52b3a1484 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -1956,15 +1956,16 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, | |||
1956 | } | 1956 | } |
1957 | 1957 | ||
1958 | const struct address_space_operations ocfs2_aops = { | 1958 | const struct address_space_operations ocfs2_aops = { |
1959 | .readpage = ocfs2_readpage, | 1959 | .readpage = ocfs2_readpage, |
1960 | .readpages = ocfs2_readpages, | 1960 | .readpages = ocfs2_readpages, |
1961 | .writepage = ocfs2_writepage, | 1961 | .writepage = ocfs2_writepage, |
1962 | .write_begin = ocfs2_write_begin, | 1962 | .write_begin = ocfs2_write_begin, |
1963 | .write_end = ocfs2_write_end, | 1963 | .write_end = ocfs2_write_end, |
1964 | .bmap = ocfs2_bmap, | 1964 | .bmap = ocfs2_bmap, |
1965 | .sync_page = block_sync_page, | 1965 | .sync_page = block_sync_page, |
1966 | .direct_IO = ocfs2_direct_IO, | 1966 | .direct_IO = ocfs2_direct_IO, |
1967 | .invalidatepage = ocfs2_invalidatepage, | 1967 | .invalidatepage = ocfs2_invalidatepage, |
1968 | .releasepage = ocfs2_releasepage, | 1968 | .releasepage = ocfs2_releasepage, |
1969 | .migratepage = buffer_migrate_page, | 1969 | .migratepage = buffer_migrate_page, |
1970 | .is_partially_uptodate = block_is_partially_uptodate, | ||
1970 | }; | 1971 | }; |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 04697ba7f73e..4f85eceab376 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/random.h> | 33 | #include <linux/random.h> |
34 | #include <linux/crc32.h> | 34 | #include <linux/crc32.h> |
35 | #include <linux/time.h> | 35 | #include <linux/time.h> |
36 | #include <linux/debugfs.h> | ||
36 | 37 | ||
37 | #include "heartbeat.h" | 38 | #include "heartbeat.h" |
38 | #include "tcp.h" | 39 | #include "tcp.h" |
@@ -60,6 +61,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | |||
60 | static LIST_HEAD(o2hb_node_events); | 61 | static LIST_HEAD(o2hb_node_events); |
61 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); | 62 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); |
62 | 63 | ||
64 | #define O2HB_DEBUG_DIR "o2hb" | ||
65 | #define O2HB_DEBUG_LIVENODES "livenodes" | ||
66 | static struct dentry *o2hb_debug_dir; | ||
67 | static struct dentry *o2hb_debug_livenodes; | ||
68 | |||
63 | static LIST_HEAD(o2hb_all_regions); | 69 | static LIST_HEAD(o2hb_all_regions); |
64 | 70 | ||
65 | static struct o2hb_callback { | 71 | static struct o2hb_callback { |
@@ -905,7 +911,77 @@ static int o2hb_thread(void *data) | |||
905 | return 0; | 911 | return 0; |
906 | } | 912 | } |
907 | 913 | ||
908 | void o2hb_init(void) | 914 | #ifdef CONFIG_DEBUG_FS |
915 | static int o2hb_debug_open(struct inode *inode, struct file *file) | ||
916 | { | ||
917 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
918 | char *buf = NULL; | ||
919 | int i = -1; | ||
920 | int out = 0; | ||
921 | |||
922 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
923 | if (!buf) | ||
924 | goto bail; | ||
925 | |||
926 | o2hb_fill_node_map(map, sizeof(map)); | ||
927 | |||
928 | while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) | ||
929 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); | ||
930 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); | ||
931 | |||
932 | i_size_write(inode, out); | ||
933 | |||
934 | file->private_data = buf; | ||
935 | |||
936 | return 0; | ||
937 | bail: | ||
938 | return -ENOMEM; | ||
939 | } | ||
940 | |||
941 | static int o2hb_debug_release(struct inode *inode, struct file *file) | ||
942 | { | ||
943 | kfree(file->private_data); | ||
944 | return 0; | ||
945 | } | ||
946 | |||
947 | static ssize_t o2hb_debug_read(struct file *file, char __user *buf, | ||
948 | size_t nbytes, loff_t *ppos) | ||
949 | { | ||
950 | return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, | ||
951 | i_size_read(file->f_mapping->host)); | ||
952 | } | ||
953 | #else | ||
954 | static int o2hb_debug_open(struct inode *inode, struct file *file) | ||
955 | { | ||
956 | return 0; | ||
957 | } | ||
958 | static int o2hb_debug_release(struct inode *inode, struct file *file) | ||
959 | { | ||
960 | return 0; | ||
961 | } | ||
962 | static ssize_t o2hb_debug_read(struct file *file, char __user *buf, | ||
963 | size_t nbytes, loff_t *ppos) | ||
964 | { | ||
965 | return 0; | ||
966 | } | ||
967 | #endif /* CONFIG_DEBUG_FS */ | ||
968 | |||
969 | static struct file_operations o2hb_debug_fops = { | ||
970 | .open = o2hb_debug_open, | ||
971 | .release = o2hb_debug_release, | ||
972 | .read = o2hb_debug_read, | ||
973 | .llseek = generic_file_llseek, | ||
974 | }; | ||
975 | |||
976 | void o2hb_exit(void) | ||
977 | { | ||
978 | if (o2hb_debug_livenodes) | ||
979 | debugfs_remove(o2hb_debug_livenodes); | ||
980 | if (o2hb_debug_dir) | ||
981 | debugfs_remove(o2hb_debug_dir); | ||
982 | } | ||
983 | |||
984 | int o2hb_init(void) | ||
909 | { | 985 | { |
910 | int i; | 986 | int i; |
911 | 987 | ||
@@ -918,6 +994,24 @@ void o2hb_init(void) | |||
918 | INIT_LIST_HEAD(&o2hb_node_events); | 994 | INIT_LIST_HEAD(&o2hb_node_events); |
919 | 995 | ||
920 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); | 996 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); |
997 | |||
998 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | ||
999 | if (!o2hb_debug_dir) { | ||
1000 | mlog_errno(-ENOMEM); | ||
1001 | return -ENOMEM; | ||
1002 | } | ||
1003 | |||
1004 | o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES, | ||
1005 | S_IFREG|S_IRUSR, | ||
1006 | o2hb_debug_dir, NULL, | ||
1007 | &o2hb_debug_fops); | ||
1008 | if (!o2hb_debug_livenodes) { | ||
1009 | mlog_errno(-ENOMEM); | ||
1010 | debugfs_remove(o2hb_debug_dir); | ||
1011 | return -ENOMEM; | ||
1012 | } | ||
1013 | |||
1014 | return 0; | ||
921 | } | 1015 | } |
922 | 1016 | ||
923 | /* if we're already in a callback then we're already serialized by the sem */ | 1017 | /* if we're already in a callback then we're already serialized by the sem */ |
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index e511339886b3..2f1649253b49 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -75,7 +75,8 @@ void o2hb_unregister_callback(const char *region_uuid, | |||
75 | struct o2hb_callback_func *hc); | 75 | struct o2hb_callback_func *hc); |
76 | void o2hb_fill_node_map(unsigned long *map, | 76 | void o2hb_fill_node_map(unsigned long *map, |
77 | unsigned bytes); | 77 | unsigned bytes); |
78 | void o2hb_init(void); | 78 | void o2hb_exit(void); |
79 | int o2hb_init(void); | ||
79 | int o2hb_check_node_heartbeating(u8 node_num); | 80 | int o2hb_check_node_heartbeating(u8 node_num); |
80 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); | 81 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); |
81 | int o2hb_check_local_node_heartbeating(void); | 82 | int o2hb_check_local_node_heartbeating(void); |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 70e8fa9e2539..7ee6188bc79a 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -881,6 +881,7 @@ static void __exit exit_o2nm(void) | |||
881 | o2cb_sys_shutdown(); | 881 | o2cb_sys_shutdown(); |
882 | 882 | ||
883 | o2net_exit(); | 883 | o2net_exit(); |
884 | o2hb_exit(); | ||
884 | } | 885 | } |
885 | 886 | ||
886 | static int __init init_o2nm(void) | 887 | static int __init init_o2nm(void) |
@@ -889,11 +890,13 @@ static int __init init_o2nm(void) | |||
889 | 890 | ||
890 | cluster_print_version(); | 891 | cluster_print_version(); |
891 | 892 | ||
892 | o2hb_init(); | 893 | ret = o2hb_init(); |
894 | if (ret) | ||
895 | goto out; | ||
893 | 896 | ||
894 | ret = o2net_init(); | 897 | ret = o2net_init(); |
895 | if (ret) | 898 | if (ret) |
896 | goto out; | 899 | goto out_o2hb; |
897 | 900 | ||
898 | ret = o2net_register_hb_callbacks(); | 901 | ret = o2net_register_hb_callbacks(); |
899 | if (ret) | 902 | if (ret) |
@@ -916,6 +919,8 @@ out_callbacks: | |||
916 | o2net_unregister_hb_callbacks(); | 919 | o2net_unregister_hb_callbacks(); |
917 | out_o2net: | 920 | out_o2net: |
918 | o2net_exit(); | 921 | o2net_exit(); |
922 | out_o2hb: | ||
923 | o2hb_exit(); | ||
919 | out: | 924 | out: |
920 | return ret; | 925 | return ret; |
921 | } | 926 | } |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f2c4098cf337..e71160cda110 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/quotaops.h> | 43 | #include <linux/quotaops.h> |
44 | #include <linux/sort.h> | ||
44 | 45 | ||
45 | #define MLOG_MASK_PREFIX ML_NAMEI | 46 | #define MLOG_MASK_PREFIX ML_NAMEI |
46 | #include <cluster/masklog.h> | 47 | #include <cluster/masklog.h> |
@@ -58,6 +59,7 @@ | |||
58 | #include "namei.h" | 59 | #include "namei.h" |
59 | #include "suballoc.h" | 60 | #include "suballoc.h" |
60 | #include "super.h" | 61 | #include "super.h" |
62 | #include "sysfile.h" | ||
61 | #include "uptodate.h" | 63 | #include "uptodate.h" |
62 | 64 | ||
63 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
@@ -71,11 +73,6 @@ static unsigned char ocfs2_filetype_table[] = { | |||
71 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 73 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
72 | }; | 74 | }; |
73 | 75 | ||
74 | static int ocfs2_extend_dir(struct ocfs2_super *osb, | ||
75 | struct inode *dir, | ||
76 | struct buffer_head *parent_fe_bh, | ||
77 | unsigned int blocks_wanted, | ||
78 | struct buffer_head **new_de_bh); | ||
79 | static int ocfs2_do_extend_dir(struct super_block *sb, | 76 | static int ocfs2_do_extend_dir(struct super_block *sb, |
80 | handle_t *handle, | 77 | handle_t *handle, |
81 | struct inode *dir, | 78 | struct inode *dir, |
@@ -83,22 +80,36 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
83 | struct ocfs2_alloc_context *data_ac, | 80 | struct ocfs2_alloc_context *data_ac, |
84 | struct ocfs2_alloc_context *meta_ac, | 81 | struct ocfs2_alloc_context *meta_ac, |
85 | struct buffer_head **new_bh); | 82 | struct buffer_head **new_bh); |
83 | static int ocfs2_dir_indexed(struct inode *inode); | ||
86 | 84 | ||
87 | /* | 85 | /* |
88 | * These are distinct checks because future versions of the file system will | 86 | * These are distinct checks because future versions of the file system will |
89 | * want to have a trailing dirent structure independent of indexing. | 87 | * want to have a trailing dirent structure independent of indexing. |
90 | */ | 88 | */ |
91 | static int ocfs2_dir_has_trailer(struct inode *dir) | 89 | static int ocfs2_supports_dir_trailer(struct inode *dir) |
92 | { | 90 | { |
91 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
92 | |||
93 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 93 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
94 | return 0; | 94 | return 0; |
95 | 95 | ||
96 | return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb)); | 96 | return ocfs2_meta_ecc(osb) || ocfs2_dir_indexed(dir); |
97 | } | 97 | } |
98 | 98 | ||
99 | static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb) | 99 | /* |
100 | * "new' here refers to the point at which we're creating a new | ||
101 | * directory via "mkdir()", but also when we're expanding an inline | ||
102 | * directory. In either case, we don't yet have the indexing bit set | ||
103 | * on the directory, so the standard checks will fail in when metaecc | ||
104 | * is turned off. Only directory-initialization type functions should | ||
105 | * use this then. Everything else wants ocfs2_supports_dir_trailer() | ||
106 | */ | ||
107 | static int ocfs2_new_dir_wants_trailer(struct inode *dir) | ||
100 | { | 108 | { |
101 | return ocfs2_meta_ecc(osb); | 109 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
110 | |||
111 | return ocfs2_meta_ecc(osb) || | ||
112 | ocfs2_supports_indexed_dirs(osb); | ||
102 | } | 113 | } |
103 | 114 | ||
104 | static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) | 115 | static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) |
@@ -130,7 +141,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir, | |||
130 | { | 141 | { |
131 | unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer); | 142 | unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer); |
132 | 143 | ||
133 | if (!ocfs2_dir_has_trailer(dir)) | 144 | if (!ocfs2_supports_dir_trailer(dir)) |
134 | return 0; | 145 | return 0; |
135 | 146 | ||
136 | if (offset != toff) | 147 | if (offset != toff) |
@@ -140,7 +151,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir, | |||
140 | } | 151 | } |
141 | 152 | ||
142 | static void ocfs2_init_dir_trailer(struct inode *inode, | 153 | static void ocfs2_init_dir_trailer(struct inode *inode, |
143 | struct buffer_head *bh) | 154 | struct buffer_head *bh, u16 rec_len) |
144 | { | 155 | { |
145 | struct ocfs2_dir_block_trailer *trailer; | 156 | struct ocfs2_dir_block_trailer *trailer; |
146 | 157 | ||
@@ -150,6 +161,153 @@ static void ocfs2_init_dir_trailer(struct inode *inode, | |||
150 | cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer)); | 161 | cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer)); |
151 | trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); | 162 | trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); |
152 | trailer->db_blkno = cpu_to_le64(bh->b_blocknr); | 163 | trailer->db_blkno = cpu_to_le64(bh->b_blocknr); |
164 | trailer->db_free_rec_len = cpu_to_le16(rec_len); | ||
165 | } | ||
166 | /* | ||
167 | * Link an unindexed block with a dir trailer structure into the index free | ||
168 | * list. This function will modify dirdata_bh, but assumes you've already | ||
169 | * passed it to the journal. | ||
170 | */ | ||
171 | static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle, | ||
172 | struct buffer_head *dx_root_bh, | ||
173 | struct buffer_head *dirdata_bh) | ||
174 | { | ||
175 | int ret; | ||
176 | struct ocfs2_dx_root_block *dx_root; | ||
177 | struct ocfs2_dir_block_trailer *trailer; | ||
178 | |||
179 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
180 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
181 | if (ret) { | ||
182 | mlog_errno(ret); | ||
183 | goto out; | ||
184 | } | ||
185 | trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); | ||
186 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
187 | |||
188 | trailer->db_free_next = dx_root->dr_free_blk; | ||
189 | dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr); | ||
190 | |||
191 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
192 | |||
193 | out: | ||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res) | ||
198 | { | ||
199 | return res->dl_prev_leaf_bh == NULL; | ||
200 | } | ||
201 | |||
202 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) | ||
203 | { | ||
204 | brelse(res->dl_dx_root_bh); | ||
205 | brelse(res->dl_leaf_bh); | ||
206 | brelse(res->dl_dx_leaf_bh); | ||
207 | brelse(res->dl_prev_leaf_bh); | ||
208 | } | ||
209 | |||
210 | static int ocfs2_dir_indexed(struct inode *inode) | ||
211 | { | ||
212 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL) | ||
213 | return 1; | ||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root) | ||
218 | { | ||
219 | return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Hashing code adapted from ext3 | ||
224 | */ | ||
225 | #define DELTA 0x9E3779B9 | ||
226 | |||
227 | static void TEA_transform(__u32 buf[4], __u32 const in[]) | ||
228 | { | ||
229 | __u32 sum = 0; | ||
230 | __u32 b0 = buf[0], b1 = buf[1]; | ||
231 | __u32 a = in[0], b = in[1], c = in[2], d = in[3]; | ||
232 | int n = 16; | ||
233 | |||
234 | do { | ||
235 | sum += DELTA; | ||
236 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | ||
237 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | ||
238 | } while (--n); | ||
239 | |||
240 | buf[0] += b0; | ||
241 | buf[1] += b1; | ||
242 | } | ||
243 | |||
244 | static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | ||
245 | { | ||
246 | __u32 pad, val; | ||
247 | int i; | ||
248 | |||
249 | pad = (__u32)len | ((__u32)len << 8); | ||
250 | pad |= pad << 16; | ||
251 | |||
252 | val = pad; | ||
253 | if (len > num*4) | ||
254 | len = num * 4; | ||
255 | for (i = 0; i < len; i++) { | ||
256 | if ((i % 4) == 0) | ||
257 | val = pad; | ||
258 | val = msg[i] + (val << 8); | ||
259 | if ((i % 4) == 3) { | ||
260 | *buf++ = val; | ||
261 | val = pad; | ||
262 | num--; | ||
263 | } | ||
264 | } | ||
265 | if (--num >= 0) | ||
266 | *buf++ = val; | ||
267 | while (--num >= 0) | ||
268 | *buf++ = pad; | ||
269 | } | ||
270 | |||
271 | static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len, | ||
272 | struct ocfs2_dx_hinfo *hinfo) | ||
273 | { | ||
274 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
275 | const char *p; | ||
276 | __u32 in[8], buf[4]; | ||
277 | |||
278 | /* | ||
279 | * XXX: Is this really necessary, if the index is never looked | ||
280 | * at by readdir? Is a hash value of '0' a bad idea? | ||
281 | */ | ||
282 | if ((len == 1 && !strncmp(".", name, 1)) || | ||
283 | (len == 2 && !strncmp("..", name, 2))) { | ||
284 | buf[0] = buf[1] = 0; | ||
285 | goto out; | ||
286 | } | ||
287 | |||
288 | #ifdef OCFS2_DEBUG_DX_DIRS | ||
289 | /* | ||
290 | * This makes it very easy to debug indexing problems. We | ||
291 | * should never allow this to be selected without hand editing | ||
292 | * this file though. | ||
293 | */ | ||
294 | buf[0] = buf[1] = len; | ||
295 | goto out; | ||
296 | #endif | ||
297 | |||
298 | memcpy(buf, osb->osb_dx_seed, sizeof(buf)); | ||
299 | |||
300 | p = name; | ||
301 | while (len > 0) { | ||
302 | str2hashbuf(p, len, in, 4); | ||
303 | TEA_transform(buf, in); | ||
304 | len -= 16; | ||
305 | p += 16; | ||
306 | } | ||
307 | |||
308 | out: | ||
309 | hinfo->major_hash = buf[0]; | ||
310 | hinfo->minor_hash = buf[1]; | ||
153 | } | 311 | } |
154 | 312 | ||
155 | /* | 313 | /* |
@@ -312,6 +470,52 @@ static int ocfs2_validate_dir_block(struct super_block *sb, | |||
312 | } | 470 | } |
313 | 471 | ||
314 | /* | 472 | /* |
473 | * Validate a directory trailer. | ||
474 | * | ||
475 | * We check the trailer here rather than in ocfs2_validate_dir_block() | ||
476 | * because that function doesn't have the inode to test. | ||
477 | */ | ||
478 | static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh) | ||
479 | { | ||
480 | int rc = 0; | ||
481 | struct ocfs2_dir_block_trailer *trailer; | ||
482 | |||
483 | trailer = ocfs2_trailer_from_bh(bh, dir->i_sb); | ||
484 | if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) { | ||
485 | rc = -EINVAL; | ||
486 | ocfs2_error(dir->i_sb, | ||
487 | "Invalid dirblock #%llu: " | ||
488 | "signature = %.*s\n", | ||
489 | (unsigned long long)bh->b_blocknr, 7, | ||
490 | trailer->db_signature); | ||
491 | goto out; | ||
492 | } | ||
493 | if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) { | ||
494 | rc = -EINVAL; | ||
495 | ocfs2_error(dir->i_sb, | ||
496 | "Directory block #%llu has an invalid " | ||
497 | "db_blkno of %llu", | ||
498 | (unsigned long long)bh->b_blocknr, | ||
499 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
500 | goto out; | ||
501 | } | ||
502 | if (le64_to_cpu(trailer->db_parent_dinode) != | ||
503 | OCFS2_I(dir)->ip_blkno) { | ||
504 | rc = -EINVAL; | ||
505 | ocfs2_error(dir->i_sb, | ||
506 | "Directory block #%llu on dinode " | ||
507 | "#%llu has an invalid parent_dinode " | ||
508 | "of %llu", | ||
509 | (unsigned long long)bh->b_blocknr, | ||
510 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
511 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
512 | goto out; | ||
513 | } | ||
514 | out: | ||
515 | return rc; | ||
516 | } | ||
517 | |||
518 | /* | ||
315 | * This function forces all errors to -EIO for consistency with its | 519 | * This function forces all errors to -EIO for consistency with its |
316 | * predecessor, ocfs2_bread(). We haven't audited what returning the | 520 | * predecessor, ocfs2_bread(). We haven't audited what returning the |
317 | * real error codes would do to callers. We log the real codes with | 521 | * real error codes would do to callers. We log the real codes with |
@@ -322,7 +526,6 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, | |||
322 | { | 526 | { |
323 | int rc = 0; | 527 | int rc = 0; |
324 | struct buffer_head *tmp = *bh; | 528 | struct buffer_head *tmp = *bh; |
325 | struct ocfs2_dir_block_trailer *trailer; | ||
326 | 529 | ||
327 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, | 530 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, |
328 | ocfs2_validate_dir_block); | 531 | ocfs2_validate_dir_block); |
@@ -331,42 +534,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, | |||
331 | goto out; | 534 | goto out; |
332 | } | 535 | } |
333 | 536 | ||
334 | /* | ||
335 | * We check the trailer here rather than in | ||
336 | * ocfs2_validate_dir_block() because that function doesn't have | ||
337 | * the inode to test. | ||
338 | */ | ||
339 | if (!(flags & OCFS2_BH_READAHEAD) && | 537 | if (!(flags & OCFS2_BH_READAHEAD) && |
340 | ocfs2_dir_has_trailer(inode)) { | 538 | ocfs2_supports_dir_trailer(inode)) { |
341 | trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb); | 539 | rc = ocfs2_check_dir_trailer(inode, tmp); |
342 | if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) { | 540 | if (rc) { |
343 | rc = -EINVAL; | 541 | if (!*bh) |
344 | ocfs2_error(inode->i_sb, | 542 | brelse(tmp); |
345 | "Invalid dirblock #%llu: " | 543 | mlog_errno(rc); |
346 | "signature = %.*s\n", | ||
347 | (unsigned long long)tmp->b_blocknr, 7, | ||
348 | trailer->db_signature); | ||
349 | goto out; | ||
350 | } | ||
351 | if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) { | ||
352 | rc = -EINVAL; | ||
353 | ocfs2_error(inode->i_sb, | ||
354 | "Directory block #%llu has an invalid " | ||
355 | "db_blkno of %llu", | ||
356 | (unsigned long long)tmp->b_blocknr, | ||
357 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
358 | goto out; | ||
359 | } | ||
360 | if (le64_to_cpu(trailer->db_parent_dinode) != | ||
361 | OCFS2_I(inode)->ip_blkno) { | ||
362 | rc = -EINVAL; | ||
363 | ocfs2_error(inode->i_sb, | ||
364 | "Directory block #%llu on dinode " | ||
365 | "#%llu has an invalid parent_dinode " | ||
366 | "of %llu", | ||
367 | (unsigned long long)tmp->b_blocknr, | ||
368 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
369 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
370 | goto out; | 544 | goto out; |
371 | } | 545 | } |
372 | } | 546 | } |
@@ -379,6 +553,141 @@ out: | |||
379 | return rc ? -EIO : 0; | 553 | return rc ? -EIO : 0; |
380 | } | 554 | } |
381 | 555 | ||
556 | /* | ||
557 | * Read the block at 'phys' which belongs to this directory | ||
558 | * inode. This function does no virtual->physical block translation - | ||
559 | * what's passed in is assumed to be a valid directory block. | ||
560 | */ | ||
561 | static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys, | ||
562 | struct buffer_head **bh) | ||
563 | { | ||
564 | int ret; | ||
565 | struct buffer_head *tmp = *bh; | ||
566 | |||
567 | ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block); | ||
568 | if (ret) { | ||
569 | mlog_errno(ret); | ||
570 | goto out; | ||
571 | } | ||
572 | |||
573 | if (ocfs2_supports_dir_trailer(dir)) { | ||
574 | ret = ocfs2_check_dir_trailer(dir, tmp); | ||
575 | if (ret) { | ||
576 | if (!*bh) | ||
577 | brelse(tmp); | ||
578 | mlog_errno(ret); | ||
579 | goto out; | ||
580 | } | ||
581 | } | ||
582 | |||
583 | if (!ret && !*bh) | ||
584 | *bh = tmp; | ||
585 | out: | ||
586 | return ret; | ||
587 | } | ||
588 | |||
589 | static int ocfs2_validate_dx_root(struct super_block *sb, | ||
590 | struct buffer_head *bh) | ||
591 | { | ||
592 | int ret; | ||
593 | struct ocfs2_dx_root_block *dx_root; | ||
594 | |||
595 | BUG_ON(!buffer_uptodate(bh)); | ||
596 | |||
597 | dx_root = (struct ocfs2_dx_root_block *) bh->b_data; | ||
598 | |||
599 | ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check); | ||
600 | if (ret) { | ||
601 | mlog(ML_ERROR, | ||
602 | "Checksum failed for dir index root block %llu\n", | ||
603 | (unsigned long long)bh->b_blocknr); | ||
604 | return ret; | ||
605 | } | ||
606 | |||
607 | if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) { | ||
608 | ocfs2_error(sb, | ||
609 | "Dir Index Root # %llu has bad signature %.*s", | ||
610 | (unsigned long long)le64_to_cpu(dx_root->dr_blkno), | ||
611 | 7, dx_root->dr_signature); | ||
612 | return -EINVAL; | ||
613 | } | ||
614 | |||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di, | ||
619 | struct buffer_head **dx_root_bh) | ||
620 | { | ||
621 | int ret; | ||
622 | u64 blkno = le64_to_cpu(di->i_dx_root); | ||
623 | struct buffer_head *tmp = *dx_root_bh; | ||
624 | |||
625 | ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root); | ||
626 | |||
627 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | ||
628 | if (!ret && !*dx_root_bh) | ||
629 | *dx_root_bh = tmp; | ||
630 | |||
631 | return ret; | ||
632 | } | ||
633 | |||
634 | static int ocfs2_validate_dx_leaf(struct super_block *sb, | ||
635 | struct buffer_head *bh) | ||
636 | { | ||
637 | int ret; | ||
638 | struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data; | ||
639 | |||
640 | BUG_ON(!buffer_uptodate(bh)); | ||
641 | |||
642 | ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check); | ||
643 | if (ret) { | ||
644 | mlog(ML_ERROR, | ||
645 | "Checksum failed for dir index leaf block %llu\n", | ||
646 | (unsigned long long)bh->b_blocknr); | ||
647 | return ret; | ||
648 | } | ||
649 | |||
650 | if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) { | ||
651 | ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s", | ||
652 | 7, dx_leaf->dl_signature); | ||
653 | return -EROFS; | ||
654 | } | ||
655 | |||
656 | return 0; | ||
657 | } | ||
658 | |||
659 | static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno, | ||
660 | struct buffer_head **dx_leaf_bh) | ||
661 | { | ||
662 | int ret; | ||
663 | struct buffer_head *tmp = *dx_leaf_bh; | ||
664 | |||
665 | ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf); | ||
666 | |||
667 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | ||
668 | if (!ret && !*dx_leaf_bh) | ||
669 | *dx_leaf_bh = tmp; | ||
670 | |||
671 | return ret; | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * Read a series of dx_leaf blocks. This expects all buffer_head | ||
676 | * pointers to be NULL on function entry. | ||
677 | */ | ||
678 | static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num, | ||
679 | struct buffer_head **dx_leaf_bhs) | ||
680 | { | ||
681 | int ret; | ||
682 | |||
683 | ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0, | ||
684 | ocfs2_validate_dx_leaf); | ||
685 | if (ret) | ||
686 | mlog_errno(ret); | ||
687 | |||
688 | return ret; | ||
689 | } | ||
690 | |||
382 | static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, | 691 | static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, |
383 | struct inode *dir, | 692 | struct inode *dir, |
384 | struct ocfs2_dir_entry **res_dir) | 693 | struct ocfs2_dir_entry **res_dir) |
@@ -480,39 +789,340 @@ cleanup_and_exit: | |||
480 | return ret; | 789 | return ret; |
481 | } | 790 | } |
482 | 791 | ||
792 | static int ocfs2_dx_dir_lookup_rec(struct inode *inode, | ||
793 | struct ocfs2_extent_list *el, | ||
794 | u32 major_hash, | ||
795 | u32 *ret_cpos, | ||
796 | u64 *ret_phys_blkno, | ||
797 | unsigned int *ret_clen) | ||
798 | { | ||
799 | int ret = 0, i, found; | ||
800 | struct buffer_head *eb_bh = NULL; | ||
801 | struct ocfs2_extent_block *eb; | ||
802 | struct ocfs2_extent_rec *rec = NULL; | ||
803 | |||
804 | if (el->l_tree_depth) { | ||
805 | ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh); | ||
806 | if (ret) { | ||
807 | mlog_errno(ret); | ||
808 | goto out; | ||
809 | } | ||
810 | |||
811 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
812 | el = &eb->h_list; | ||
813 | |||
814 | if (el->l_tree_depth) { | ||
815 | ocfs2_error(inode->i_sb, | ||
816 | "Inode %lu has non zero tree depth in " | ||
817 | "btree tree block %llu\n", inode->i_ino, | ||
818 | (unsigned long long)eb_bh->b_blocknr); | ||
819 | ret = -EROFS; | ||
820 | goto out; | ||
821 | } | ||
822 | } | ||
823 | |||
824 | found = 0; | ||
825 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
826 | rec = &el->l_recs[i]; | ||
827 | |||
828 | if (le32_to_cpu(rec->e_cpos) <= major_hash) { | ||
829 | found = 1; | ||
830 | break; | ||
831 | } | ||
832 | } | ||
833 | |||
834 | if (!found) { | ||
835 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
836 | "record (%u, %u, 0) in btree", inode->i_ino, | ||
837 | le32_to_cpu(rec->e_cpos), | ||
838 | ocfs2_rec_clusters(el, rec)); | ||
839 | ret = -EROFS; | ||
840 | goto out; | ||
841 | } | ||
842 | |||
843 | if (ret_phys_blkno) | ||
844 | *ret_phys_blkno = le64_to_cpu(rec->e_blkno); | ||
845 | if (ret_cpos) | ||
846 | *ret_cpos = le32_to_cpu(rec->e_cpos); | ||
847 | if (ret_clen) | ||
848 | *ret_clen = le16_to_cpu(rec->e_leaf_clusters); | ||
849 | |||
850 | out: | ||
851 | brelse(eb_bh); | ||
852 | return ret; | ||
853 | } | ||
854 | |||
855 | /* | ||
856 | * Returns the block index, from the start of the cluster which this | ||
857 | * hash belongs too. | ||
858 | */ | ||
859 | static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb, | ||
860 | u32 minor_hash) | ||
861 | { | ||
862 | return minor_hash & osb->osb_dx_mask; | ||
863 | } | ||
864 | |||
865 | static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb, | ||
866 | struct ocfs2_dx_hinfo *hinfo) | ||
867 | { | ||
868 | return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash); | ||
869 | } | ||
870 | |||
871 | static int ocfs2_dx_dir_lookup(struct inode *inode, | ||
872 | struct ocfs2_extent_list *el, | ||
873 | struct ocfs2_dx_hinfo *hinfo, | ||
874 | u32 *ret_cpos, | ||
875 | u64 *ret_phys_blkno) | ||
876 | { | ||
877 | int ret = 0; | ||
878 | unsigned int cend, uninitialized_var(clen); | ||
879 | u32 uninitialized_var(cpos); | ||
880 | u64 uninitialized_var(blkno); | ||
881 | u32 name_hash = hinfo->major_hash; | ||
882 | |||
883 | ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno, | ||
884 | &clen); | ||
885 | if (ret) { | ||
886 | mlog_errno(ret); | ||
887 | goto out; | ||
888 | } | ||
889 | |||
890 | cend = cpos + clen; | ||
891 | if (name_hash >= cend) { | ||
892 | /* We want the last cluster */ | ||
893 | blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1); | ||
894 | cpos += clen - 1; | ||
895 | } else { | ||
896 | blkno += ocfs2_clusters_to_blocks(inode->i_sb, | ||
897 | name_hash - cpos); | ||
898 | cpos = name_hash; | ||
899 | } | ||
900 | |||
901 | /* | ||
902 | * We now have the cluster which should hold our entry. To | ||
903 | * find the exact block from the start of the cluster to | ||
904 | * search, we take the lower bits of the hash. | ||
905 | */ | ||
906 | blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo); | ||
907 | |||
908 | if (ret_phys_blkno) | ||
909 | *ret_phys_blkno = blkno; | ||
910 | if (ret_cpos) | ||
911 | *ret_cpos = cpos; | ||
912 | |||
913 | out: | ||
914 | |||
915 | return ret; | ||
916 | } | ||
917 | |||
918 | static int ocfs2_dx_dir_search(const char *name, int namelen, | ||
919 | struct inode *dir, | ||
920 | struct ocfs2_dx_root_block *dx_root, | ||
921 | struct ocfs2_dir_lookup_result *res) | ||
922 | { | ||
923 | int ret, i, found; | ||
924 | u64 uninitialized_var(phys); | ||
925 | struct buffer_head *dx_leaf_bh = NULL; | ||
926 | struct ocfs2_dx_leaf *dx_leaf; | ||
927 | struct ocfs2_dx_entry *dx_entry = NULL; | ||
928 | struct buffer_head *dir_ent_bh = NULL; | ||
929 | struct ocfs2_dir_entry *dir_ent = NULL; | ||
930 | struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo; | ||
931 | struct ocfs2_extent_list *dr_el; | ||
932 | struct ocfs2_dx_entry_list *entry_list; | ||
933 | |||
934 | ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo); | ||
935 | |||
936 | if (ocfs2_dx_root_inline(dx_root)) { | ||
937 | entry_list = &dx_root->dr_entries; | ||
938 | goto search; | ||
939 | } | ||
940 | |||
941 | dr_el = &dx_root->dr_list; | ||
942 | |||
943 | ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys); | ||
944 | if (ret) { | ||
945 | mlog_errno(ret); | ||
946 | goto out; | ||
947 | } | ||
948 | |||
949 | mlog(0, "Dir %llu: name: \"%.*s\", lookup of hash: %u.0x%x " | ||
950 | "returns: %llu\n", | ||
951 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
952 | namelen, name, hinfo->major_hash, hinfo->minor_hash, | ||
953 | (unsigned long long)phys); | ||
954 | |||
955 | ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh); | ||
956 | if (ret) { | ||
957 | mlog_errno(ret); | ||
958 | goto out; | ||
959 | } | ||
960 | |||
961 | dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data; | ||
962 | |||
963 | mlog(0, "leaf info: num_used: %d, count: %d\n", | ||
964 | le16_to_cpu(dx_leaf->dl_list.de_num_used), | ||
965 | le16_to_cpu(dx_leaf->dl_list.de_count)); | ||
966 | |||
967 | entry_list = &dx_leaf->dl_list; | ||
968 | |||
969 | search: | ||
970 | /* | ||
971 | * Empty leaf is legal, so no need to check for that. | ||
972 | */ | ||
973 | found = 0; | ||
974 | for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) { | ||
975 | dx_entry = &entry_list->de_entries[i]; | ||
976 | |||
977 | if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash) | ||
978 | || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash)) | ||
979 | continue; | ||
980 | |||
981 | /* | ||
982 | * Search unindexed leaf block now. We're not | ||
983 | * guaranteed to find anything. | ||
984 | */ | ||
985 | ret = ocfs2_read_dir_block_direct(dir, | ||
986 | le64_to_cpu(dx_entry->dx_dirent_blk), | ||
987 | &dir_ent_bh); | ||
988 | if (ret) { | ||
989 | mlog_errno(ret); | ||
990 | goto out; | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | * XXX: We should check the unindexed block here, | ||
995 | * before using it. | ||
996 | */ | ||
997 | |||
998 | found = ocfs2_search_dirblock(dir_ent_bh, dir, name, namelen, | ||
999 | 0, dir_ent_bh->b_data, | ||
1000 | dir->i_sb->s_blocksize, &dir_ent); | ||
1001 | if (found == 1) | ||
1002 | break; | ||
1003 | |||
1004 | if (found == -1) { | ||
1005 | /* This means we found a bad directory entry. */ | ||
1006 | ret = -EIO; | ||
1007 | mlog_errno(ret); | ||
1008 | goto out; | ||
1009 | } | ||
1010 | |||
1011 | brelse(dir_ent_bh); | ||
1012 | dir_ent_bh = NULL; | ||
1013 | } | ||
1014 | |||
1015 | if (found <= 0) { | ||
1016 | ret = -ENOENT; | ||
1017 | goto out; | ||
1018 | } | ||
1019 | |||
1020 | res->dl_leaf_bh = dir_ent_bh; | ||
1021 | res->dl_entry = dir_ent; | ||
1022 | res->dl_dx_leaf_bh = dx_leaf_bh; | ||
1023 | res->dl_dx_entry = dx_entry; | ||
1024 | |||
1025 | ret = 0; | ||
1026 | out: | ||
1027 | if (ret) { | ||
1028 | brelse(dx_leaf_bh); | ||
1029 | brelse(dir_ent_bh); | ||
1030 | } | ||
1031 | return ret; | ||
1032 | } | ||
1033 | |||
1034 | static int ocfs2_find_entry_dx(const char *name, int namelen, | ||
1035 | struct inode *dir, | ||
1036 | struct ocfs2_dir_lookup_result *lookup) | ||
1037 | { | ||
1038 | int ret; | ||
1039 | struct buffer_head *di_bh = NULL; | ||
1040 | struct ocfs2_dinode *di; | ||
1041 | struct buffer_head *dx_root_bh = NULL; | ||
1042 | struct ocfs2_dx_root_block *dx_root; | ||
1043 | |||
1044 | ret = ocfs2_read_inode_block(dir, &di_bh); | ||
1045 | if (ret) { | ||
1046 | mlog_errno(ret); | ||
1047 | goto out; | ||
1048 | } | ||
1049 | |||
1050 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1051 | |||
1052 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
1053 | if (ret) { | ||
1054 | mlog_errno(ret); | ||
1055 | goto out; | ||
1056 | } | ||
1057 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
1058 | |||
1059 | ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup); | ||
1060 | if (ret) { | ||
1061 | if (ret != -ENOENT) | ||
1062 | mlog_errno(ret); | ||
1063 | goto out; | ||
1064 | } | ||
1065 | |||
1066 | lookup->dl_dx_root_bh = dx_root_bh; | ||
1067 | dx_root_bh = NULL; | ||
1068 | out: | ||
1069 | brelse(di_bh); | ||
1070 | brelse(dx_root_bh); | ||
1071 | return ret; | ||
1072 | } | ||
1073 | |||
483 | /* | 1074 | /* |
484 | * Try to find an entry of the provided name within 'dir'. | 1075 | * Try to find an entry of the provided name within 'dir'. |
485 | * | 1076 | * |
486 | * If nothing was found, NULL is returned. Otherwise, a buffer_head | 1077 | * If nothing was found, -ENOENT is returned. Otherwise, zero is |
487 | * and pointer to the dir entry are passed back. | 1078 | * returned and the struct 'res' will contain information useful to |
1079 | * other directory manipulation functions. | ||
488 | * | 1080 | * |
489 | * Caller can NOT assume anything about the contents of the | 1081 | * Caller can NOT assume anything about the contents of the |
490 | * buffer_head - it is passed back only so that it can be passed into | 1082 | * buffer_heads - they are passed back only so that it can be passed |
491 | * any one of the manipulation functions (add entry, delete entry, | 1083 | * into any one of the manipulation functions (add entry, delete |
492 | * etc). As an example, bh in the extent directory case is a data | 1084 | * entry, etc). As an example, bh in the extent directory case is a |
493 | * block, in the inline-data case it actually points to an inode. | 1085 | * data block, in the inline-data case it actually points to an inode, |
1086 | * in the indexed directory case, multiple buffers are involved. | ||
494 | */ | 1087 | */ |
495 | struct buffer_head *ocfs2_find_entry(const char *name, int namelen, | 1088 | int ocfs2_find_entry(const char *name, int namelen, |
496 | struct inode *dir, | 1089 | struct inode *dir, struct ocfs2_dir_lookup_result *lookup) |
497 | struct ocfs2_dir_entry **res_dir) | ||
498 | { | 1090 | { |
499 | *res_dir = NULL; | 1091 | struct buffer_head *bh; |
1092 | struct ocfs2_dir_entry *res_dir = NULL; | ||
500 | 1093 | ||
1094 | if (ocfs2_dir_indexed(dir)) | ||
1095 | return ocfs2_find_entry_dx(name, namelen, dir, lookup); | ||
1096 | |||
1097 | /* | ||
1098 | * The unindexed dir code only uses part of the lookup | ||
1099 | * structure, so there's no reason to push it down further | ||
1100 | * than this. | ||
1101 | */ | ||
501 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1102 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
502 | return ocfs2_find_entry_id(name, namelen, dir, res_dir); | 1103 | bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir); |
1104 | else | ||
1105 | bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir); | ||
1106 | |||
1107 | if (bh == NULL) | ||
1108 | return -ENOENT; | ||
503 | 1109 | ||
504 | return ocfs2_find_entry_el(name, namelen, dir, res_dir); | 1110 | lookup->dl_leaf_bh = bh; |
1111 | lookup->dl_entry = res_dir; | ||
1112 | return 0; | ||
505 | } | 1113 | } |
506 | 1114 | ||
507 | /* | 1115 | /* |
508 | * Update inode number and type of a previously found directory entry. | 1116 | * Update inode number and type of a previously found directory entry. |
509 | */ | 1117 | */ |
510 | int ocfs2_update_entry(struct inode *dir, handle_t *handle, | 1118 | int ocfs2_update_entry(struct inode *dir, handle_t *handle, |
511 | struct buffer_head *de_bh, struct ocfs2_dir_entry *de, | 1119 | struct ocfs2_dir_lookup_result *res, |
512 | struct inode *new_entry_inode) | 1120 | struct inode *new_entry_inode) |
513 | { | 1121 | { |
514 | int ret; | 1122 | int ret; |
515 | ocfs2_journal_access_func access = ocfs2_journal_access_db; | 1123 | ocfs2_journal_access_func access = ocfs2_journal_access_db; |
1124 | struct ocfs2_dir_entry *de = res->dl_entry; | ||
1125 | struct buffer_head *de_bh = res->dl_leaf_bh; | ||
516 | 1126 | ||
517 | /* | 1127 | /* |
518 | * The same code works fine for both inline-data and extent | 1128 | * The same code works fine for both inline-data and extent |
@@ -538,6 +1148,10 @@ out: | |||
538 | return ret; | 1148 | return ret; |
539 | } | 1149 | } |
540 | 1150 | ||
1151 | /* | ||
1152 | * __ocfs2_delete_entry deletes a directory entry by merging it with the | ||
1153 | * previous entry | ||
1154 | */ | ||
541 | static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, | 1155 | static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, |
542 | struct ocfs2_dir_entry *de_del, | 1156 | struct ocfs2_dir_entry *de_del, |
543 | struct buffer_head *bh, char *first_de, | 1157 | struct buffer_head *bh, char *first_de, |
@@ -587,6 +1201,181 @@ bail: | |||
587 | return status; | 1201 | return status; |
588 | } | 1202 | } |
589 | 1203 | ||
1204 | static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de) | ||
1205 | { | ||
1206 | unsigned int hole; | ||
1207 | |||
1208 | if (le64_to_cpu(de->inode) == 0) | ||
1209 | hole = le16_to_cpu(de->rec_len); | ||
1210 | else | ||
1211 | hole = le16_to_cpu(de->rec_len) - | ||
1212 | OCFS2_DIR_REC_LEN(de->name_len); | ||
1213 | |||
1214 | return hole; | ||
1215 | } | ||
1216 | |||
1217 | static int ocfs2_find_max_rec_len(struct super_block *sb, | ||
1218 | struct buffer_head *dirblock_bh) | ||
1219 | { | ||
1220 | int size, this_hole, largest_hole = 0; | ||
1221 | char *trailer, *de_buf, *limit, *start = dirblock_bh->b_data; | ||
1222 | struct ocfs2_dir_entry *de; | ||
1223 | |||
1224 | trailer = (char *)ocfs2_trailer_from_bh(dirblock_bh, sb); | ||
1225 | size = ocfs2_dir_trailer_blk_off(sb); | ||
1226 | limit = start + size; | ||
1227 | de_buf = start; | ||
1228 | de = (struct ocfs2_dir_entry *)de_buf; | ||
1229 | do { | ||
1230 | if (de_buf != trailer) { | ||
1231 | this_hole = ocfs2_figure_dirent_hole(de); | ||
1232 | if (this_hole > largest_hole) | ||
1233 | largest_hole = this_hole; | ||
1234 | } | ||
1235 | |||
1236 | de_buf += le16_to_cpu(de->rec_len); | ||
1237 | de = (struct ocfs2_dir_entry *)de_buf; | ||
1238 | } while (de_buf < limit); | ||
1239 | |||
1240 | if (largest_hole >= OCFS2_DIR_MIN_REC_LEN) | ||
1241 | return largest_hole; | ||
1242 | return 0; | ||
1243 | } | ||
1244 | |||
1245 | static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, | ||
1246 | int index) | ||
1247 | { | ||
1248 | int num_used = le16_to_cpu(entry_list->de_num_used); | ||
1249 | |||
1250 | if (num_used == 1 || index == (num_used - 1)) | ||
1251 | goto clear; | ||
1252 | |||
1253 | memmove(&entry_list->de_entries[index], | ||
1254 | &entry_list->de_entries[index + 1], | ||
1255 | (num_used - index - 1)*sizeof(struct ocfs2_dx_entry)); | ||
1256 | clear: | ||
1257 | num_used--; | ||
1258 | memset(&entry_list->de_entries[num_used], 0, | ||
1259 | sizeof(struct ocfs2_dx_entry)); | ||
1260 | entry_list->de_num_used = cpu_to_le16(num_used); | ||
1261 | } | ||
1262 | |||
1263 | static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | ||
1264 | struct ocfs2_dir_lookup_result *lookup) | ||
1265 | { | ||
1266 | int ret, index, max_rec_len, add_to_free_list = 0; | ||
1267 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | ||
1268 | struct buffer_head *leaf_bh = lookup->dl_leaf_bh; | ||
1269 | struct ocfs2_dx_leaf *dx_leaf; | ||
1270 | struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry; | ||
1271 | struct ocfs2_dir_block_trailer *trailer; | ||
1272 | struct ocfs2_dx_root_block *dx_root; | ||
1273 | struct ocfs2_dx_entry_list *entry_list; | ||
1274 | |||
1275 | /* | ||
1276 | * This function gets a bit messy because we might have to | ||
1277 | * modify the root block, regardless of whether the indexed | ||
1278 | * entries are stored inline. | ||
1279 | */ | ||
1280 | |||
1281 | /* | ||
1282 | * *Only* set 'entry_list' here, based on where we're looking | ||
1283 | * for the indexed entries. Later, we might still want to | ||
1284 | * journal both blocks, based on free list state. | ||
1285 | */ | ||
1286 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
1287 | if (ocfs2_dx_root_inline(dx_root)) { | ||
1288 | entry_list = &dx_root->dr_entries; | ||
1289 | } else { | ||
1290 | dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data; | ||
1291 | entry_list = &dx_leaf->dl_list; | ||
1292 | } | ||
1293 | |||
1294 | /* Neither of these are a disk corruption - that should have | ||
1295 | * been caught by lookup, before we got here. */ | ||
1296 | BUG_ON(le16_to_cpu(entry_list->de_count) <= 0); | ||
1297 | BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0); | ||
1298 | |||
1299 | index = (char *)dx_entry - (char *)entry_list->de_entries; | ||
1300 | index /= sizeof(*dx_entry); | ||
1301 | |||
1302 | if (index >= le16_to_cpu(entry_list->de_num_used)) { | ||
1303 | mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n", | ||
1304 | (unsigned long long)OCFS2_I(dir)->ip_blkno, index, | ||
1305 | entry_list, dx_entry); | ||
1306 | return -EIO; | ||
1307 | } | ||
1308 | |||
1309 | /* | ||
1310 | * We know that removal of this dirent will leave enough room | ||
1311 | * for a new one, so add this block to the free list if it | ||
1312 | * isn't already there. | ||
1313 | */ | ||
1314 | trailer = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb); | ||
1315 | if (trailer->db_free_rec_len == 0) | ||
1316 | add_to_free_list = 1; | ||
1317 | |||
1318 | /* | ||
1319 | * Add the block holding our index into the journal before | ||
1320 | * removing the unindexed entry. If we get an error return | ||
1321 | * from __ocfs2_delete_entry(), then it hasn't removed the | ||
1322 | * entry yet. Likewise, successful return means we *must* | ||
1323 | * remove the indexed entry. | ||
1324 | * | ||
1325 | * We're also careful to journal the root tree block here as | ||
1326 | * the entry count needs to be updated. Also, we might be | ||
1327 | * adding to the start of the free list. | ||
1328 | */ | ||
1329 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
1330 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1331 | if (ret) { | ||
1332 | mlog_errno(ret); | ||
1333 | goto out; | ||
1334 | } | ||
1335 | |||
1336 | if (!ocfs2_dx_root_inline(dx_root)) { | ||
1337 | ret = ocfs2_journal_access_dl(handle, dir, | ||
1338 | lookup->dl_dx_leaf_bh, | ||
1339 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1340 | if (ret) { | ||
1341 | mlog_errno(ret); | ||
1342 | goto out; | ||
1343 | } | ||
1344 | } | ||
1345 | |||
1346 | mlog(0, "Dir %llu: delete entry at index: %d\n", | ||
1347 | (unsigned long long)OCFS2_I(dir)->ip_blkno, index); | ||
1348 | |||
1349 | ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry, | ||
1350 | leaf_bh, leaf_bh->b_data, leaf_bh->b_size); | ||
1351 | if (ret) { | ||
1352 | mlog_errno(ret); | ||
1353 | goto out; | ||
1354 | } | ||
1355 | |||
1356 | max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, leaf_bh); | ||
1357 | trailer->db_free_rec_len = cpu_to_le16(max_rec_len); | ||
1358 | if (add_to_free_list) { | ||
1359 | trailer->db_free_next = dx_root->dr_free_blk; | ||
1360 | dx_root->dr_free_blk = cpu_to_le64(leaf_bh->b_blocknr); | ||
1361 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
1362 | } | ||
1363 | |||
1364 | /* leaf_bh was journal_accessed for us in __ocfs2_delete_entry */ | ||
1365 | ocfs2_journal_dirty(handle, leaf_bh); | ||
1366 | |||
1367 | le32_add_cpu(&dx_root->dr_num_entries, -1); | ||
1368 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
1369 | |||
1370 | ocfs2_dx_list_remove_entry(entry_list, index); | ||
1371 | |||
1372 | if (!ocfs2_dx_root_inline(dx_root)) | ||
1373 | ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh); | ||
1374 | |||
1375 | out: | ||
1376 | return ret; | ||
1377 | } | ||
1378 | |||
590 | static inline int ocfs2_delete_entry_id(handle_t *handle, | 1379 | static inline int ocfs2_delete_entry_id(handle_t *handle, |
591 | struct inode *dir, | 1380 | struct inode *dir, |
592 | struct ocfs2_dir_entry *de_del, | 1381 | struct ocfs2_dir_entry *de_del, |
@@ -624,18 +1413,22 @@ static inline int ocfs2_delete_entry_el(handle_t *handle, | |||
624 | } | 1413 | } |
625 | 1414 | ||
626 | /* | 1415 | /* |
627 | * ocfs2_delete_entry deletes a directory entry by merging it with the | 1416 | * Delete a directory entry. Hide the details of directory |
628 | * previous entry | 1417 | * implementation from the caller. |
629 | */ | 1418 | */ |
630 | int ocfs2_delete_entry(handle_t *handle, | 1419 | int ocfs2_delete_entry(handle_t *handle, |
631 | struct inode *dir, | 1420 | struct inode *dir, |
632 | struct ocfs2_dir_entry *de_del, | 1421 | struct ocfs2_dir_lookup_result *res) |
633 | struct buffer_head *bh) | ||
634 | { | 1422 | { |
1423 | if (ocfs2_dir_indexed(dir)) | ||
1424 | return ocfs2_delete_entry_dx(handle, dir, res); | ||
1425 | |||
635 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1426 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
636 | return ocfs2_delete_entry_id(handle, dir, de_del, bh); | 1427 | return ocfs2_delete_entry_id(handle, dir, res->dl_entry, |
1428 | res->dl_leaf_bh); | ||
637 | 1429 | ||
638 | return ocfs2_delete_entry_el(handle, dir, de_del, bh); | 1430 | return ocfs2_delete_entry_el(handle, dir, res->dl_entry, |
1431 | res->dl_leaf_bh); | ||
639 | } | 1432 | } |
640 | 1433 | ||
641 | /* | 1434 | /* |
@@ -663,18 +1456,166 @@ static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de, | |||
663 | return 0; | 1456 | return 0; |
664 | } | 1457 | } |
665 | 1458 | ||
1459 | static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf, | ||
1460 | struct ocfs2_dx_entry *dx_new_entry) | ||
1461 | { | ||
1462 | int i; | ||
1463 | |||
1464 | i = le16_to_cpu(dx_leaf->dl_list.de_num_used); | ||
1465 | dx_leaf->dl_list.de_entries[i] = *dx_new_entry; | ||
1466 | |||
1467 | le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1); | ||
1468 | } | ||
1469 | |||
1470 | static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list, | ||
1471 | struct ocfs2_dx_hinfo *hinfo, | ||
1472 | u64 dirent_blk) | ||
1473 | { | ||
1474 | int i; | ||
1475 | struct ocfs2_dx_entry *dx_entry; | ||
1476 | |||
1477 | i = le16_to_cpu(entry_list->de_num_used); | ||
1478 | dx_entry = &entry_list->de_entries[i]; | ||
1479 | |||
1480 | memset(dx_entry, 0, sizeof(*dx_entry)); | ||
1481 | dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash); | ||
1482 | dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash); | ||
1483 | dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk); | ||
1484 | |||
1485 | le16_add_cpu(&entry_list->de_num_used, 1); | ||
1486 | } | ||
1487 | |||
1488 | static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle, | ||
1489 | struct ocfs2_dx_hinfo *hinfo, | ||
1490 | u64 dirent_blk, | ||
1491 | struct buffer_head *dx_leaf_bh) | ||
1492 | { | ||
1493 | int ret; | ||
1494 | struct ocfs2_dx_leaf *dx_leaf; | ||
1495 | |||
1496 | ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, | ||
1497 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1498 | if (ret) { | ||
1499 | mlog_errno(ret); | ||
1500 | goto out; | ||
1501 | } | ||
1502 | |||
1503 | dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
1504 | ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk); | ||
1505 | ocfs2_journal_dirty(handle, dx_leaf_bh); | ||
1506 | |||
1507 | out: | ||
1508 | return ret; | ||
1509 | } | ||
1510 | |||
1511 | static void ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle, | ||
1512 | struct ocfs2_dx_hinfo *hinfo, | ||
1513 | u64 dirent_blk, | ||
1514 | struct ocfs2_dx_root_block *dx_root) | ||
1515 | { | ||
1516 | ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk); | ||
1517 | } | ||
1518 | |||
1519 | static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle, | ||
1520 | struct ocfs2_dir_lookup_result *lookup) | ||
1521 | { | ||
1522 | int ret = 0; | ||
1523 | struct ocfs2_dx_root_block *dx_root; | ||
1524 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | ||
1525 | |||
1526 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
1527 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1528 | if (ret) { | ||
1529 | mlog_errno(ret); | ||
1530 | goto out; | ||
1531 | } | ||
1532 | |||
1533 | dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data; | ||
1534 | if (ocfs2_dx_root_inline(dx_root)) { | ||
1535 | ocfs2_dx_inline_root_insert(dir, handle, | ||
1536 | &lookup->dl_hinfo, | ||
1537 | lookup->dl_leaf_bh->b_blocknr, | ||
1538 | dx_root); | ||
1539 | } else { | ||
1540 | ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo, | ||
1541 | lookup->dl_leaf_bh->b_blocknr, | ||
1542 | lookup->dl_dx_leaf_bh); | ||
1543 | if (ret) | ||
1544 | goto out; | ||
1545 | } | ||
1546 | |||
1547 | le32_add_cpu(&dx_root->dr_num_entries, 1); | ||
1548 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
1549 | |||
1550 | out: | ||
1551 | return ret; | ||
1552 | } | ||
1553 | |||
1554 | static void ocfs2_remove_block_from_free_list(struct inode *dir, | ||
1555 | handle_t *handle, | ||
1556 | struct ocfs2_dir_lookup_result *lookup) | ||
1557 | { | ||
1558 | struct ocfs2_dir_block_trailer *trailer, *prev; | ||
1559 | struct ocfs2_dx_root_block *dx_root; | ||
1560 | struct buffer_head *bh; | ||
1561 | |||
1562 | trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb); | ||
1563 | |||
1564 | if (ocfs2_free_list_at_root(lookup)) { | ||
1565 | bh = lookup->dl_dx_root_bh; | ||
1566 | dx_root = (struct ocfs2_dx_root_block *)bh->b_data; | ||
1567 | dx_root->dr_free_blk = trailer->db_free_next; | ||
1568 | } else { | ||
1569 | bh = lookup->dl_prev_leaf_bh; | ||
1570 | prev = ocfs2_trailer_from_bh(bh, dir->i_sb); | ||
1571 | prev->db_free_next = trailer->db_free_next; | ||
1572 | } | ||
1573 | |||
1574 | trailer->db_free_rec_len = cpu_to_le16(0); | ||
1575 | trailer->db_free_next = cpu_to_le64(0); | ||
1576 | |||
1577 | ocfs2_journal_dirty(handle, bh); | ||
1578 | ocfs2_journal_dirty(handle, lookup->dl_leaf_bh); | ||
1579 | } | ||
1580 | |||
1581 | /* | ||
1582 | * This expects that a journal write has been reserved on | ||
1583 | * lookup->dl_prev_leaf_bh or lookup->dl_dx_root_bh | ||
1584 | */ | ||
1585 | static void ocfs2_recalc_free_list(struct inode *dir, handle_t *handle, | ||
1586 | struct ocfs2_dir_lookup_result *lookup) | ||
1587 | { | ||
1588 | int max_rec_len; | ||
1589 | struct ocfs2_dir_block_trailer *trailer; | ||
1590 | |||
1591 | /* Walk dl_leaf_bh to figure out what the new free rec_len is. */ | ||
1592 | max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, lookup->dl_leaf_bh); | ||
1593 | if (max_rec_len) { | ||
1594 | /* | ||
1595 | * There's still room in this block, so no need to remove it | ||
1596 | * from the free list. In this case, we just want to update | ||
1597 | * the rec len accounting. | ||
1598 | */ | ||
1599 | trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb); | ||
1600 | trailer->db_free_rec_len = cpu_to_le16(max_rec_len); | ||
1601 | ocfs2_journal_dirty(handle, lookup->dl_leaf_bh); | ||
1602 | } else { | ||
1603 | ocfs2_remove_block_from_free_list(dir, handle, lookup); | ||
1604 | } | ||
1605 | } | ||
1606 | |||
666 | /* we don't always have a dentry for what we want to add, so people | 1607 | /* we don't always have a dentry for what we want to add, so people |
667 | * like orphan dir can call this instead. | 1608 | * like orphan dir can call this instead. |
668 | * | 1609 | * |
669 | * If you pass me insert_bh, I'll skip the search of the other dir | 1610 | * The lookup context must have been filled from |
670 | * blocks and put the record in there. | 1611 | * ocfs2_prepare_dir_for_insert. |
671 | */ | 1612 | */ |
672 | int __ocfs2_add_entry(handle_t *handle, | 1613 | int __ocfs2_add_entry(handle_t *handle, |
673 | struct inode *dir, | 1614 | struct inode *dir, |
674 | const char *name, int namelen, | 1615 | const char *name, int namelen, |
675 | struct inode *inode, u64 blkno, | 1616 | struct inode *inode, u64 blkno, |
676 | struct buffer_head *parent_fe_bh, | 1617 | struct buffer_head *parent_fe_bh, |
677 | struct buffer_head *insert_bh) | 1618 | struct ocfs2_dir_lookup_result *lookup) |
678 | { | 1619 | { |
679 | unsigned long offset; | 1620 | unsigned long offset; |
680 | unsigned short rec_len; | 1621 | unsigned short rec_len; |
@@ -683,6 +1624,7 @@ int __ocfs2_add_entry(handle_t *handle, | |||
683 | struct super_block *sb = dir->i_sb; | 1624 | struct super_block *sb = dir->i_sb; |
684 | int retval, status; | 1625 | int retval, status; |
685 | unsigned int size = sb->s_blocksize; | 1626 | unsigned int size = sb->s_blocksize; |
1627 | struct buffer_head *insert_bh = lookup->dl_leaf_bh; | ||
686 | char *data_start = insert_bh->b_data; | 1628 | char *data_start = insert_bh->b_data; |
687 | 1629 | ||
688 | mlog_entry_void(); | 1630 | mlog_entry_void(); |
@@ -690,7 +1632,31 @@ int __ocfs2_add_entry(handle_t *handle, | |||
690 | if (!namelen) | 1632 | if (!namelen) |
691 | return -EINVAL; | 1633 | return -EINVAL; |
692 | 1634 | ||
693 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1635 | if (ocfs2_dir_indexed(dir)) { |
1636 | struct buffer_head *bh; | ||
1637 | |||
1638 | /* | ||
1639 | * An indexed dir may require that we update the free space | ||
1640 | * list. Reserve a write to the previous node in the list so | ||
1641 | * that we don't fail later. | ||
1642 | * | ||
1643 | * XXX: This can be either a dx_root_block, or an unindexed | ||
1644 | * directory tree leaf block. | ||
1645 | */ | ||
1646 | if (ocfs2_free_list_at_root(lookup)) { | ||
1647 | bh = lookup->dl_dx_root_bh; | ||
1648 | retval = ocfs2_journal_access_dr(handle, dir, bh, | ||
1649 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1650 | } else { | ||
1651 | bh = lookup->dl_prev_leaf_bh; | ||
1652 | retval = ocfs2_journal_access_db(handle, dir, bh, | ||
1653 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1654 | } | ||
1655 | if (retval) { | ||
1656 | mlog_errno(retval); | ||
1657 | return retval; | ||
1658 | } | ||
1659 | } else if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
694 | data_start = di->id2.i_data.id_data; | 1660 | data_start = di->id2.i_data.id_data; |
695 | size = i_size_read(dir); | 1661 | size = i_size_read(dir); |
696 | 1662 | ||
@@ -737,10 +1703,22 @@ int __ocfs2_add_entry(handle_t *handle, | |||
737 | status = ocfs2_journal_access_di(handle, dir, | 1703 | status = ocfs2_journal_access_di(handle, dir, |
738 | insert_bh, | 1704 | insert_bh, |
739 | OCFS2_JOURNAL_ACCESS_WRITE); | 1705 | OCFS2_JOURNAL_ACCESS_WRITE); |
740 | else | 1706 | else { |
741 | status = ocfs2_journal_access_db(handle, dir, | 1707 | status = ocfs2_journal_access_db(handle, dir, |
742 | insert_bh, | 1708 | insert_bh, |
743 | OCFS2_JOURNAL_ACCESS_WRITE); | 1709 | OCFS2_JOURNAL_ACCESS_WRITE); |
1710 | |||
1711 | if (ocfs2_dir_indexed(dir)) { | ||
1712 | status = ocfs2_dx_dir_insert(dir, | ||
1713 | handle, | ||
1714 | lookup); | ||
1715 | if (status) { | ||
1716 | mlog_errno(status); | ||
1717 | goto bail; | ||
1718 | } | ||
1719 | } | ||
1720 | } | ||
1721 | |||
744 | /* By now the buffer is marked for journaling */ | 1722 | /* By now the buffer is marked for journaling */ |
745 | offset += le16_to_cpu(de->rec_len); | 1723 | offset += le16_to_cpu(de->rec_len); |
746 | if (le64_to_cpu(de->inode)) { | 1724 | if (le64_to_cpu(de->inode)) { |
@@ -761,6 +1739,9 @@ int __ocfs2_add_entry(handle_t *handle, | |||
761 | de->name_len = namelen; | 1739 | de->name_len = namelen; |
762 | memcpy(de->name, name, namelen); | 1740 | memcpy(de->name, name, namelen); |
763 | 1741 | ||
1742 | if (ocfs2_dir_indexed(dir)) | ||
1743 | ocfs2_recalc_free_list(dir, handle, lookup); | ||
1744 | |||
764 | dir->i_version++; | 1745 | dir->i_version++; |
765 | status = ocfs2_journal_dirty(handle, insert_bh); | 1746 | status = ocfs2_journal_dirty(handle, insert_bh); |
766 | retval = 0; | 1747 | retval = 0; |
@@ -870,6 +1851,10 @@ out: | |||
870 | return 0; | 1851 | return 0; |
871 | } | 1852 | } |
872 | 1853 | ||
1854 | /* | ||
1855 | * NOTE: This function can be called against unindexed directories, | ||
1856 | * and indexed ones. | ||
1857 | */ | ||
873 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, | 1858 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, |
874 | u64 *f_version, | 1859 | u64 *f_version, |
875 | loff_t *f_pos, void *priv, | 1860 | loff_t *f_pos, void *priv, |
@@ -1071,31 +2056,22 @@ int ocfs2_find_files_on_disk(const char *name, | |||
1071 | int namelen, | 2056 | int namelen, |
1072 | u64 *blkno, | 2057 | u64 *blkno, |
1073 | struct inode *inode, | 2058 | struct inode *inode, |
1074 | struct buffer_head **dirent_bh, | 2059 | struct ocfs2_dir_lookup_result *lookup) |
1075 | struct ocfs2_dir_entry **dirent) | ||
1076 | { | 2060 | { |
1077 | int status = -ENOENT; | 2061 | int status = -ENOENT; |
1078 | 2062 | ||
1079 | mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n", | 2063 | mlog(0, "name=%.*s, blkno=%p, inode=%llu\n", namelen, name, blkno, |
1080 | namelen, name, blkno, inode, dirent_bh, dirent); | 2064 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
1081 | 2065 | ||
1082 | *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); | 2066 | status = ocfs2_find_entry(name, namelen, inode, lookup); |
1083 | if (!*dirent_bh || !*dirent) { | 2067 | if (status) |
1084 | status = -ENOENT; | ||
1085 | goto leave; | 2068 | goto leave; |
1086 | } | ||
1087 | 2069 | ||
1088 | *blkno = le64_to_cpu((*dirent)->inode); | 2070 | *blkno = le64_to_cpu(lookup->dl_entry->inode); |
1089 | 2071 | ||
1090 | status = 0; | 2072 | status = 0; |
1091 | leave: | 2073 | leave: |
1092 | if (status < 0) { | ||
1093 | *dirent = NULL; | ||
1094 | brelse(*dirent_bh); | ||
1095 | *dirent_bh = NULL; | ||
1096 | } | ||
1097 | 2074 | ||
1098 | mlog_exit(status); | ||
1099 | return status; | 2075 | return status; |
1100 | } | 2076 | } |
1101 | 2077 | ||
@@ -1107,11 +2083,10 @@ int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, | |||
1107 | int namelen, u64 *blkno) | 2083 | int namelen, u64 *blkno) |
1108 | { | 2084 | { |
1109 | int ret; | 2085 | int ret; |
1110 | struct buffer_head *bh = NULL; | 2086 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
1111 | struct ocfs2_dir_entry *dirent = NULL; | ||
1112 | 2087 | ||
1113 | ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &bh, &dirent); | 2088 | ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &lookup); |
1114 | brelse(bh); | 2089 | ocfs2_free_dir_lookup_result(&lookup); |
1115 | 2090 | ||
1116 | return ret; | 2091 | return ret; |
1117 | } | 2092 | } |
@@ -1128,20 +2103,18 @@ int ocfs2_check_dir_for_entry(struct inode *dir, | |||
1128 | int namelen) | 2103 | int namelen) |
1129 | { | 2104 | { |
1130 | int ret; | 2105 | int ret; |
1131 | struct buffer_head *dirent_bh = NULL; | 2106 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
1132 | struct ocfs2_dir_entry *dirent = NULL; | ||
1133 | 2107 | ||
1134 | mlog_entry("dir %llu, name '%.*s'\n", | 2108 | mlog_entry("dir %llu, name '%.*s'\n", |
1135 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); | 2109 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); |
1136 | 2110 | ||
1137 | ret = -EEXIST; | 2111 | ret = -EEXIST; |
1138 | dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent); | 2112 | if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) |
1139 | if (dirent_bh) | ||
1140 | goto bail; | 2113 | goto bail; |
1141 | 2114 | ||
1142 | ret = 0; | 2115 | ret = 0; |
1143 | bail: | 2116 | bail: |
1144 | brelse(dirent_bh); | 2117 | ocfs2_free_dir_lookup_result(&lookup); |
1145 | 2118 | ||
1146 | mlog_exit(ret); | 2119 | mlog_exit(ret); |
1147 | return ret; | 2120 | return ret; |
@@ -1151,6 +2124,7 @@ struct ocfs2_empty_dir_priv { | |||
1151 | unsigned seen_dot; | 2124 | unsigned seen_dot; |
1152 | unsigned seen_dot_dot; | 2125 | unsigned seen_dot_dot; |
1153 | unsigned seen_other; | 2126 | unsigned seen_other; |
2127 | unsigned dx_dir; | ||
1154 | }; | 2128 | }; |
1155 | static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, | 2129 | static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, |
1156 | loff_t pos, u64 ino, unsigned type) | 2130 | loff_t pos, u64 ino, unsigned type) |
@@ -1160,6 +2134,13 @@ static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, | |||
1160 | /* | 2134 | /* |
1161 | * Check the positions of "." and ".." records to be sure | 2135 | * Check the positions of "." and ".." records to be sure |
1162 | * they're in the correct place. | 2136 | * they're in the correct place. |
2137 | * | ||
2138 | * Indexed directories don't need to proceed past the first | ||
2139 | * two entries, so we end the scan after seeing '..'. Despite | ||
2140 | * that, we allow the scan to proceed In the event that we | ||
2141 | * have a corrupted indexed directory (no dot or dot dot | ||
2142 | * entries). This allows us to double check for existing | ||
2143 | * entries which might not have been found in the index. | ||
1163 | */ | 2144 | */ |
1164 | if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) { | 2145 | if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) { |
1165 | p->seen_dot = 1; | 2146 | p->seen_dot = 1; |
@@ -1169,16 +2150,57 @@ static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, | |||
1169 | if (name_len == 2 && !strncmp("..", name, 2) && | 2150 | if (name_len == 2 && !strncmp("..", name, 2) && |
1170 | pos == OCFS2_DIR_REC_LEN(1)) { | 2151 | pos == OCFS2_DIR_REC_LEN(1)) { |
1171 | p->seen_dot_dot = 1; | 2152 | p->seen_dot_dot = 1; |
2153 | |||
2154 | if (p->dx_dir && p->seen_dot) | ||
2155 | return 1; | ||
2156 | |||
1172 | return 0; | 2157 | return 0; |
1173 | } | 2158 | } |
1174 | 2159 | ||
1175 | p->seen_other = 1; | 2160 | p->seen_other = 1; |
1176 | return 1; | 2161 | return 1; |
1177 | } | 2162 | } |
2163 | |||
2164 | static int ocfs2_empty_dir_dx(struct inode *inode, | ||
2165 | struct ocfs2_empty_dir_priv *priv) | ||
2166 | { | ||
2167 | int ret; | ||
2168 | struct buffer_head *di_bh = NULL; | ||
2169 | struct buffer_head *dx_root_bh = NULL; | ||
2170 | struct ocfs2_dinode *di; | ||
2171 | struct ocfs2_dx_root_block *dx_root; | ||
2172 | |||
2173 | priv->dx_dir = 1; | ||
2174 | |||
2175 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
2176 | if (ret) { | ||
2177 | mlog_errno(ret); | ||
2178 | goto out; | ||
2179 | } | ||
2180 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2181 | |||
2182 | ret = ocfs2_read_dx_root(inode, di, &dx_root_bh); | ||
2183 | if (ret) { | ||
2184 | mlog_errno(ret); | ||
2185 | goto out; | ||
2186 | } | ||
2187 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2188 | |||
2189 | if (le32_to_cpu(dx_root->dr_num_entries) != 2) | ||
2190 | priv->seen_other = 1; | ||
2191 | |||
2192 | out: | ||
2193 | brelse(di_bh); | ||
2194 | brelse(dx_root_bh); | ||
2195 | return ret; | ||
2196 | } | ||
2197 | |||
1178 | /* | 2198 | /* |
1179 | * routine to check that the specified directory is empty (for rmdir) | 2199 | * routine to check that the specified directory is empty (for rmdir) |
1180 | * | 2200 | * |
1181 | * Returns 1 if dir is empty, zero otherwise. | 2201 | * Returns 1 if dir is empty, zero otherwise. |
2202 | * | ||
2203 | * XXX: This is a performance problem for unindexed directories. | ||
1182 | */ | 2204 | */ |
1183 | int ocfs2_empty_dir(struct inode *inode) | 2205 | int ocfs2_empty_dir(struct inode *inode) |
1184 | { | 2206 | { |
@@ -1188,6 +2210,16 @@ int ocfs2_empty_dir(struct inode *inode) | |||
1188 | 2210 | ||
1189 | memset(&priv, 0, sizeof(priv)); | 2211 | memset(&priv, 0, sizeof(priv)); |
1190 | 2212 | ||
2213 | if (ocfs2_dir_indexed(inode)) { | ||
2214 | ret = ocfs2_empty_dir_dx(inode, &priv); | ||
2215 | if (ret) | ||
2216 | mlog_errno(ret); | ||
2217 | /* | ||
2218 | * We still run ocfs2_dir_foreach to get the checks | ||
2219 | * for "." and "..". | ||
2220 | */ | ||
2221 | } | ||
2222 | |||
1191 | ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir); | 2223 | ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir); |
1192 | if (ret) | 2224 | if (ret) |
1193 | mlog_errno(ret); | 2225 | mlog_errno(ret); |
@@ -1280,7 +2312,8 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1280 | struct inode *parent, | 2312 | struct inode *parent, |
1281 | struct inode *inode, | 2313 | struct inode *inode, |
1282 | struct buffer_head *fe_bh, | 2314 | struct buffer_head *fe_bh, |
1283 | struct ocfs2_alloc_context *data_ac) | 2315 | struct ocfs2_alloc_context *data_ac, |
2316 | struct buffer_head **ret_new_bh) | ||
1284 | { | 2317 | { |
1285 | int status; | 2318 | int status; |
1286 | unsigned int size = osb->sb->s_blocksize; | 2319 | unsigned int size = osb->sb->s_blocksize; |
@@ -1289,7 +2322,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1289 | 2322 | ||
1290 | mlog_entry_void(); | 2323 | mlog_entry_void(); |
1291 | 2324 | ||
1292 | if (ocfs2_supports_dir_trailer(osb)) | 2325 | if (ocfs2_new_dir_wants_trailer(inode)) |
1293 | size = ocfs2_dir_trailer_blk_off(parent->i_sb); | 2326 | size = ocfs2_dir_trailer_blk_off(parent->i_sb); |
1294 | 2327 | ||
1295 | status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, | 2328 | status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, |
@@ -1310,8 +2343,19 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1310 | memset(new_bh->b_data, 0, osb->sb->s_blocksize); | 2343 | memset(new_bh->b_data, 0, osb->sb->s_blocksize); |
1311 | 2344 | ||
1312 | de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size); | 2345 | de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size); |
1313 | if (ocfs2_supports_dir_trailer(osb)) | 2346 | if (ocfs2_new_dir_wants_trailer(inode)) { |
1314 | ocfs2_init_dir_trailer(inode, new_bh); | 2347 | int size = le16_to_cpu(de->rec_len); |
2348 | |||
2349 | /* | ||
2350 | * Figure out the size of the hole left over after | ||
2351 | * insertion of '.' and '..'. The trailer wants this | ||
2352 | * information. | ||
2353 | */ | ||
2354 | size -= OCFS2_DIR_REC_LEN(2); | ||
2355 | size -= sizeof(struct ocfs2_dir_block_trailer); | ||
2356 | |||
2357 | ocfs2_init_dir_trailer(inode, new_bh, size); | ||
2358 | } | ||
1315 | 2359 | ||
1316 | status = ocfs2_journal_dirty(handle, new_bh); | 2360 | status = ocfs2_journal_dirty(handle, new_bh); |
1317 | if (status < 0) { | 2361 | if (status < 0) { |
@@ -1329,6 +2373,10 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1329 | } | 2373 | } |
1330 | 2374 | ||
1331 | status = 0; | 2375 | status = 0; |
2376 | if (ret_new_bh) { | ||
2377 | *ret_new_bh = new_bh; | ||
2378 | new_bh = NULL; | ||
2379 | } | ||
1332 | bail: | 2380 | bail: |
1333 | brelse(new_bh); | 2381 | brelse(new_bh); |
1334 | 2382 | ||
@@ -1336,20 +2384,427 @@ bail: | |||
1336 | return status; | 2384 | return status; |
1337 | } | 2385 | } |
1338 | 2386 | ||
2387 | static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | ||
2388 | handle_t *handle, struct inode *dir, | ||
2389 | struct buffer_head *di_bh, | ||
2390 | struct buffer_head *dirdata_bh, | ||
2391 | struct ocfs2_alloc_context *meta_ac, | ||
2392 | int dx_inline, u32 num_entries, | ||
2393 | struct buffer_head **ret_dx_root_bh) | ||
2394 | { | ||
2395 | int ret; | ||
2396 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
2397 | u16 dr_suballoc_bit; | ||
2398 | u64 dr_blkno; | ||
2399 | unsigned int num_bits; | ||
2400 | struct buffer_head *dx_root_bh = NULL; | ||
2401 | struct ocfs2_dx_root_block *dx_root; | ||
2402 | struct ocfs2_dir_block_trailer *trailer = | ||
2403 | ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); | ||
2404 | |||
2405 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, | ||
2406 | &num_bits, &dr_blkno); | ||
2407 | if (ret) { | ||
2408 | mlog_errno(ret); | ||
2409 | goto out; | ||
2410 | } | ||
2411 | |||
2412 | mlog(0, "Dir %llu, attach new index block: %llu\n", | ||
2413 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
2414 | (unsigned long long)dr_blkno); | ||
2415 | |||
2416 | dx_root_bh = sb_getblk(osb->sb, dr_blkno); | ||
2417 | if (dx_root_bh == NULL) { | ||
2418 | ret = -EIO; | ||
2419 | goto out; | ||
2420 | } | ||
2421 | ocfs2_set_new_buffer_uptodate(dir, dx_root_bh); | ||
2422 | |||
2423 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
2424 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2425 | if (ret < 0) { | ||
2426 | mlog_errno(ret); | ||
2427 | goto out; | ||
2428 | } | ||
2429 | |||
2430 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2431 | memset(dx_root, 0, osb->sb->s_blocksize); | ||
2432 | strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); | ||
2433 | dx_root->dr_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
2434 | dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); | ||
2435 | dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); | ||
2436 | dx_root->dr_blkno = cpu_to_le64(dr_blkno); | ||
2437 | dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno); | ||
2438 | dx_root->dr_num_entries = cpu_to_le32(num_entries); | ||
2439 | if (le16_to_cpu(trailer->db_free_rec_len)) | ||
2440 | dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr); | ||
2441 | else | ||
2442 | dx_root->dr_free_blk = cpu_to_le64(0); | ||
2443 | |||
2444 | if (dx_inline) { | ||
2445 | dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE; | ||
2446 | dx_root->dr_entries.de_count = | ||
2447 | cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb)); | ||
2448 | } else { | ||
2449 | dx_root->dr_list.l_count = | ||
2450 | cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); | ||
2451 | } | ||
2452 | |||
2453 | ret = ocfs2_journal_dirty(handle, dx_root_bh); | ||
2454 | if (ret) | ||
2455 | mlog_errno(ret); | ||
2456 | |||
2457 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | ||
2458 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2459 | if (ret) { | ||
2460 | mlog_errno(ret); | ||
2461 | goto out; | ||
2462 | } | ||
2463 | |||
2464 | di->i_dx_root = cpu_to_le64(dr_blkno); | ||
2465 | |||
2466 | OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; | ||
2467 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); | ||
2468 | |||
2469 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
2470 | if (ret) | ||
2471 | mlog_errno(ret); | ||
2472 | |||
2473 | *ret_dx_root_bh = dx_root_bh; | ||
2474 | dx_root_bh = NULL; | ||
2475 | |||
2476 | out: | ||
2477 | brelse(dx_root_bh); | ||
2478 | return ret; | ||
2479 | } | ||
2480 | |||
2481 | static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb, | ||
2482 | handle_t *handle, struct inode *dir, | ||
2483 | struct buffer_head **dx_leaves, | ||
2484 | int num_dx_leaves, u64 start_blk) | ||
2485 | { | ||
2486 | int ret, i; | ||
2487 | struct ocfs2_dx_leaf *dx_leaf; | ||
2488 | struct buffer_head *bh; | ||
2489 | |||
2490 | for (i = 0; i < num_dx_leaves; i++) { | ||
2491 | bh = sb_getblk(osb->sb, start_blk + i); | ||
2492 | if (bh == NULL) { | ||
2493 | ret = -EIO; | ||
2494 | goto out; | ||
2495 | } | ||
2496 | dx_leaves[i] = bh; | ||
2497 | |||
2498 | ocfs2_set_new_buffer_uptodate(dir, bh); | ||
2499 | |||
2500 | ret = ocfs2_journal_access_dl(handle, dir, bh, | ||
2501 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2502 | if (ret < 0) { | ||
2503 | mlog_errno(ret); | ||
2504 | goto out; | ||
2505 | } | ||
2506 | |||
2507 | dx_leaf = (struct ocfs2_dx_leaf *) bh->b_data; | ||
2508 | |||
2509 | memset(dx_leaf, 0, osb->sb->s_blocksize); | ||
2510 | strcpy(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE); | ||
2511 | dx_leaf->dl_fs_generation = cpu_to_le32(osb->fs_generation); | ||
2512 | dx_leaf->dl_blkno = cpu_to_le64(bh->b_blocknr); | ||
2513 | dx_leaf->dl_list.de_count = | ||
2514 | cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb)); | ||
2515 | |||
2516 | mlog(0, | ||
2517 | "Dir %llu, format dx_leaf: %llu, entry count: %u\n", | ||
2518 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
2519 | (unsigned long long)bh->b_blocknr, | ||
2520 | le16_to_cpu(dx_leaf->dl_list.de_count)); | ||
2521 | |||
2522 | ocfs2_journal_dirty(handle, bh); | ||
2523 | } | ||
2524 | |||
2525 | ret = 0; | ||
2526 | out: | ||
2527 | return ret; | ||
2528 | } | ||
2529 | |||
2530 | /* | ||
2531 | * Allocates and formats a new cluster for use in an indexed dir | ||
2532 | * leaf. This version will not do the extent insert, so that it can be | ||
2533 | * used by operations which need careful ordering. | ||
2534 | */ | ||
2535 | static int __ocfs2_dx_dir_new_cluster(struct inode *dir, | ||
2536 | u32 cpos, handle_t *handle, | ||
2537 | struct ocfs2_alloc_context *data_ac, | ||
2538 | struct buffer_head **dx_leaves, | ||
2539 | int num_dx_leaves, u64 *ret_phys_blkno) | ||
2540 | { | ||
2541 | int ret; | ||
2542 | u32 phys, num; | ||
2543 | u64 phys_blkno; | ||
2544 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2545 | |||
2546 | /* | ||
2547 | * XXX: For create, this should claim cluster for the index | ||
2548 | * *before* the unindexed insert so that we have a better | ||
2549 | * chance of contiguousness as the directory grows in number | ||
2550 | * of entries. | ||
2551 | */ | ||
2552 | ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num); | ||
2553 | if (ret) { | ||
2554 | mlog_errno(ret); | ||
2555 | goto out; | ||
2556 | } | ||
2557 | |||
2558 | /* | ||
2559 | * Format the new cluster first. That way, we're inserting | ||
2560 | * valid data. | ||
2561 | */ | ||
2562 | phys_blkno = ocfs2_clusters_to_blocks(osb->sb, phys); | ||
2563 | ret = ocfs2_dx_dir_format_cluster(osb, handle, dir, dx_leaves, | ||
2564 | num_dx_leaves, phys_blkno); | ||
2565 | if (ret) { | ||
2566 | mlog_errno(ret); | ||
2567 | goto out; | ||
2568 | } | ||
2569 | |||
2570 | *ret_phys_blkno = phys_blkno; | ||
2571 | out: | ||
2572 | return ret; | ||
2573 | } | ||
2574 | |||
2575 | static int ocfs2_dx_dir_new_cluster(struct inode *dir, | ||
2576 | struct ocfs2_extent_tree *et, | ||
2577 | u32 cpos, handle_t *handle, | ||
2578 | struct ocfs2_alloc_context *data_ac, | ||
2579 | struct ocfs2_alloc_context *meta_ac, | ||
2580 | struct buffer_head **dx_leaves, | ||
2581 | int num_dx_leaves) | ||
2582 | { | ||
2583 | int ret; | ||
2584 | u64 phys_blkno; | ||
2585 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2586 | |||
2587 | ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves, | ||
2588 | num_dx_leaves, &phys_blkno); | ||
2589 | if (ret) { | ||
2590 | mlog_errno(ret); | ||
2591 | goto out; | ||
2592 | } | ||
2593 | |||
2594 | ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0, | ||
2595 | meta_ac); | ||
2596 | if (ret) | ||
2597 | mlog_errno(ret); | ||
2598 | out: | ||
2599 | return ret; | ||
2600 | } | ||
2601 | |||
2602 | static struct buffer_head **ocfs2_dx_dir_kmalloc_leaves(struct super_block *sb, | ||
2603 | int *ret_num_leaves) | ||
2604 | { | ||
2605 | int num_dx_leaves = ocfs2_clusters_to_blocks(sb, 1); | ||
2606 | struct buffer_head **dx_leaves; | ||
2607 | |||
2608 | dx_leaves = kcalloc(num_dx_leaves, sizeof(struct buffer_head *), | ||
2609 | GFP_NOFS); | ||
2610 | if (dx_leaves && ret_num_leaves) | ||
2611 | *ret_num_leaves = num_dx_leaves; | ||
2612 | |||
2613 | return dx_leaves; | ||
2614 | } | ||
2615 | |||
2616 | static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb, | ||
2617 | handle_t *handle, | ||
2618 | struct inode *parent, | ||
2619 | struct inode *inode, | ||
2620 | struct buffer_head *di_bh, | ||
2621 | struct ocfs2_alloc_context *data_ac, | ||
2622 | struct ocfs2_alloc_context *meta_ac) | ||
2623 | { | ||
2624 | int ret; | ||
2625 | struct buffer_head *leaf_bh = NULL; | ||
2626 | struct buffer_head *dx_root_bh = NULL; | ||
2627 | struct ocfs2_dx_hinfo hinfo; | ||
2628 | struct ocfs2_dx_root_block *dx_root; | ||
2629 | struct ocfs2_dx_entry_list *entry_list; | ||
2630 | |||
2631 | /* | ||
2632 | * Our strategy is to create the directory as though it were | ||
2633 | * unindexed, then add the index block. This works with very | ||
2634 | * little complication since the state of a new directory is a | ||
2635 | * very well known quantity. | ||
2636 | * | ||
2637 | * Essentially, we have two dirents ("." and ".."), in the 1st | ||
2638 | * block which need indexing. These are easily inserted into | ||
2639 | * the index block. | ||
2640 | */ | ||
2641 | |||
2642 | ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh, | ||
2643 | data_ac, &leaf_bh); | ||
2644 | if (ret) { | ||
2645 | mlog_errno(ret); | ||
2646 | goto out; | ||
2647 | } | ||
2648 | |||
2649 | ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, leaf_bh, | ||
2650 | meta_ac, 1, 2, &dx_root_bh); | ||
2651 | if (ret) { | ||
2652 | mlog_errno(ret); | ||
2653 | goto out; | ||
2654 | } | ||
2655 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2656 | entry_list = &dx_root->dr_entries; | ||
2657 | |||
2658 | /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */ | ||
2659 | ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo); | ||
2660 | ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr); | ||
2661 | |||
2662 | ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo); | ||
2663 | ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr); | ||
2664 | |||
2665 | out: | ||
2666 | brelse(dx_root_bh); | ||
2667 | brelse(leaf_bh); | ||
2668 | return ret; | ||
2669 | } | ||
2670 | |||
1339 | int ocfs2_fill_new_dir(struct ocfs2_super *osb, | 2671 | int ocfs2_fill_new_dir(struct ocfs2_super *osb, |
1340 | handle_t *handle, | 2672 | handle_t *handle, |
1341 | struct inode *parent, | 2673 | struct inode *parent, |
1342 | struct inode *inode, | 2674 | struct inode *inode, |
1343 | struct buffer_head *fe_bh, | 2675 | struct buffer_head *fe_bh, |
1344 | struct ocfs2_alloc_context *data_ac) | 2676 | struct ocfs2_alloc_context *data_ac, |
2677 | struct ocfs2_alloc_context *meta_ac) | ||
2678 | |||
1345 | { | 2679 | { |
1346 | BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL); | 2680 | BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL); |
1347 | 2681 | ||
1348 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 2682 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
1349 | return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh); | 2683 | return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh); |
1350 | 2684 | ||
2685 | if (ocfs2_supports_indexed_dirs(osb)) | ||
2686 | return ocfs2_fill_new_dir_dx(osb, handle, parent, inode, fe_bh, | ||
2687 | data_ac, meta_ac); | ||
2688 | |||
1351 | return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh, | 2689 | return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh, |
1352 | data_ac); | 2690 | data_ac, NULL); |
2691 | } | ||
2692 | |||
2693 | static int ocfs2_dx_dir_index_block(struct inode *dir, | ||
2694 | handle_t *handle, | ||
2695 | struct buffer_head **dx_leaves, | ||
2696 | int num_dx_leaves, | ||
2697 | u32 *num_dx_entries, | ||
2698 | struct buffer_head *dirent_bh) | ||
2699 | { | ||
2700 | int ret, namelen, i; | ||
2701 | char *de_buf, *limit; | ||
2702 | struct ocfs2_dir_entry *de; | ||
2703 | struct buffer_head *dx_leaf_bh; | ||
2704 | struct ocfs2_dx_hinfo hinfo; | ||
2705 | u64 dirent_blk = dirent_bh->b_blocknr; | ||
2706 | |||
2707 | de_buf = dirent_bh->b_data; | ||
2708 | limit = de_buf + dir->i_sb->s_blocksize; | ||
2709 | |||
2710 | while (de_buf < limit) { | ||
2711 | de = (struct ocfs2_dir_entry *)de_buf; | ||
2712 | |||
2713 | namelen = de->name_len; | ||
2714 | if (!namelen || !de->inode) | ||
2715 | goto inc; | ||
2716 | |||
2717 | ocfs2_dx_dir_name_hash(dir, de->name, namelen, &hinfo); | ||
2718 | |||
2719 | i = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), &hinfo); | ||
2720 | dx_leaf_bh = dx_leaves[i]; | ||
2721 | |||
2722 | ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &hinfo, | ||
2723 | dirent_blk, dx_leaf_bh); | ||
2724 | if (ret) { | ||
2725 | mlog_errno(ret); | ||
2726 | goto out; | ||
2727 | } | ||
2728 | |||
2729 | *num_dx_entries = *num_dx_entries + 1; | ||
2730 | |||
2731 | inc: | ||
2732 | de_buf += le16_to_cpu(de->rec_len); | ||
2733 | } | ||
2734 | |||
2735 | out: | ||
2736 | return ret; | ||
2737 | } | ||
2738 | |||
2739 | /* | ||
2740 | * XXX: This expects dx_root_bh to already be part of the transaction. | ||
2741 | */ | ||
2742 | static void ocfs2_dx_dir_index_root_block(struct inode *dir, | ||
2743 | struct buffer_head *dx_root_bh, | ||
2744 | struct buffer_head *dirent_bh) | ||
2745 | { | ||
2746 | char *de_buf, *limit; | ||
2747 | struct ocfs2_dx_root_block *dx_root; | ||
2748 | struct ocfs2_dir_entry *de; | ||
2749 | struct ocfs2_dx_hinfo hinfo; | ||
2750 | u64 dirent_blk = dirent_bh->b_blocknr; | ||
2751 | |||
2752 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2753 | |||
2754 | de_buf = dirent_bh->b_data; | ||
2755 | limit = de_buf + dir->i_sb->s_blocksize; | ||
2756 | |||
2757 | while (de_buf < limit) { | ||
2758 | de = (struct ocfs2_dir_entry *)de_buf; | ||
2759 | |||
2760 | if (!de->name_len || !de->inode) | ||
2761 | goto inc; | ||
2762 | |||
2763 | ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo); | ||
2764 | |||
2765 | mlog(0, | ||
2766 | "dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n", | ||
2767 | (unsigned long long)dir->i_ino, hinfo.major_hash, | ||
2768 | hinfo.minor_hash, | ||
2769 | le16_to_cpu(dx_root->dr_entries.de_num_used), | ||
2770 | de->name_len, de->name); | ||
2771 | |||
2772 | ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo, | ||
2773 | dirent_blk); | ||
2774 | |||
2775 | le32_add_cpu(&dx_root->dr_num_entries, 1); | ||
2776 | inc: | ||
2777 | de_buf += le16_to_cpu(de->rec_len); | ||
2778 | } | ||
2779 | } | ||
2780 | |||
2781 | /* | ||
2782 | * Count the number of inline directory entries in di_bh and compare | ||
2783 | * them against the number of entries we can hold in an inline dx root | ||
2784 | * block. | ||
2785 | */ | ||
2786 | static int ocfs2_new_dx_should_be_inline(struct inode *dir, | ||
2787 | struct buffer_head *di_bh) | ||
2788 | { | ||
2789 | int dirent_count = 0; | ||
2790 | char *de_buf, *limit; | ||
2791 | struct ocfs2_dir_entry *de; | ||
2792 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2793 | |||
2794 | de_buf = di->id2.i_data.id_data; | ||
2795 | limit = de_buf + i_size_read(dir); | ||
2796 | |||
2797 | while (de_buf < limit) { | ||
2798 | de = (struct ocfs2_dir_entry *)de_buf; | ||
2799 | |||
2800 | if (de->name_len && de->inode) | ||
2801 | dirent_count++; | ||
2802 | |||
2803 | de_buf += le16_to_cpu(de->rec_len); | ||
2804 | } | ||
2805 | |||
2806 | /* We are careful to leave room for one extra record. */ | ||
2807 | return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb); | ||
1353 | } | 2808 | } |
1354 | 2809 | ||
1355 | /* | 2810 | /* |
@@ -1358,18 +2813,26 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb, | |||
1358 | * expansion from an inline directory to one with extents. The first dir block | 2813 | * expansion from an inline directory to one with extents. The first dir block |
1359 | * in that case is taken from the inline data portion of the inode block. | 2814 | * in that case is taken from the inline data portion of the inode block. |
1360 | * | 2815 | * |
2816 | * This will also return the largest amount of contiguous space for a dirent | ||
2817 | * in the block. That value is *not* necessarily the last dirent, even after | ||
2818 | * expansion. The directory indexing code wants this value for free space | ||
2819 | * accounting. We do this here since we're already walking the entire dir | ||
2820 | * block. | ||
2821 | * | ||
1361 | * We add the dir trailer if this filesystem wants it. | 2822 | * We add the dir trailer if this filesystem wants it. |
1362 | */ | 2823 | */ |
1363 | static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | 2824 | static unsigned int ocfs2_expand_last_dirent(char *start, unsigned int old_size, |
1364 | struct super_block *sb) | 2825 | struct inode *dir) |
1365 | { | 2826 | { |
2827 | struct super_block *sb = dir->i_sb; | ||
1366 | struct ocfs2_dir_entry *de; | 2828 | struct ocfs2_dir_entry *de; |
1367 | struct ocfs2_dir_entry *prev_de; | 2829 | struct ocfs2_dir_entry *prev_de; |
1368 | char *de_buf, *limit; | 2830 | char *de_buf, *limit; |
1369 | unsigned int new_size = sb->s_blocksize; | 2831 | unsigned int new_size = sb->s_blocksize; |
1370 | unsigned int bytes; | 2832 | unsigned int bytes, this_hole; |
2833 | unsigned int largest_hole = 0; | ||
1371 | 2834 | ||
1372 | if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) | 2835 | if (ocfs2_new_dir_wants_trailer(dir)) |
1373 | new_size = ocfs2_dir_trailer_blk_off(sb); | 2836 | new_size = ocfs2_dir_trailer_blk_off(sb); |
1374 | 2837 | ||
1375 | bytes = new_size - old_size; | 2838 | bytes = new_size - old_size; |
@@ -1378,12 +2841,26 @@ static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | |||
1378 | de_buf = start; | 2841 | de_buf = start; |
1379 | de = (struct ocfs2_dir_entry *)de_buf; | 2842 | de = (struct ocfs2_dir_entry *)de_buf; |
1380 | do { | 2843 | do { |
2844 | this_hole = ocfs2_figure_dirent_hole(de); | ||
2845 | if (this_hole > largest_hole) | ||
2846 | largest_hole = this_hole; | ||
2847 | |||
1381 | prev_de = de; | 2848 | prev_de = de; |
1382 | de_buf += le16_to_cpu(de->rec_len); | 2849 | de_buf += le16_to_cpu(de->rec_len); |
1383 | de = (struct ocfs2_dir_entry *)de_buf; | 2850 | de = (struct ocfs2_dir_entry *)de_buf; |
1384 | } while (de_buf < limit); | 2851 | } while (de_buf < limit); |
1385 | 2852 | ||
1386 | le16_add_cpu(&prev_de->rec_len, bytes); | 2853 | le16_add_cpu(&prev_de->rec_len, bytes); |
2854 | |||
2855 | /* We need to double check this after modification of the final | ||
2856 | * dirent. */ | ||
2857 | this_hole = ocfs2_figure_dirent_hole(prev_de); | ||
2858 | if (this_hole > largest_hole) | ||
2859 | largest_hole = this_hole; | ||
2860 | |||
2861 | if (largest_hole >= OCFS2_DIR_MIN_REC_LEN) | ||
2862 | return largest_hole; | ||
2863 | return 0; | ||
1387 | } | 2864 | } |
1388 | 2865 | ||
1389 | /* | 2866 | /* |
@@ -1396,29 +2873,61 @@ static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | |||
1396 | */ | 2873 | */ |
1397 | static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | 2874 | static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, |
1398 | unsigned int blocks_wanted, | 2875 | unsigned int blocks_wanted, |
2876 | struct ocfs2_dir_lookup_result *lookup, | ||
1399 | struct buffer_head **first_block_bh) | 2877 | struct buffer_head **first_block_bh) |
1400 | { | 2878 | { |
1401 | u32 alloc, bit_off, len; | 2879 | u32 alloc, dx_alloc, bit_off, len, num_dx_entries = 0; |
1402 | struct super_block *sb = dir->i_sb; | 2880 | struct super_block *sb = dir->i_sb; |
1403 | int ret, credits = ocfs2_inline_to_extents_credits(sb); | 2881 | int ret, i, num_dx_leaves = 0, dx_inline = 0, |
1404 | u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits; | 2882 | credits = ocfs2_inline_to_extents_credits(sb); |
2883 | u64 dx_insert_blkno, blkno, | ||
2884 | bytes = blocks_wanted << sb->s_blocksize_bits; | ||
1405 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 2885 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
1406 | struct ocfs2_inode_info *oi = OCFS2_I(dir); | 2886 | struct ocfs2_inode_info *oi = OCFS2_I(dir); |
1407 | struct ocfs2_alloc_context *data_ac; | 2887 | struct ocfs2_alloc_context *data_ac; |
2888 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
1408 | struct buffer_head *dirdata_bh = NULL; | 2889 | struct buffer_head *dirdata_bh = NULL; |
2890 | struct buffer_head *dx_root_bh = NULL; | ||
2891 | struct buffer_head **dx_leaves = NULL; | ||
1409 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 2892 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
1410 | handle_t *handle; | 2893 | handle_t *handle; |
1411 | struct ocfs2_extent_tree et; | 2894 | struct ocfs2_extent_tree et; |
1412 | int did_quota = 0; | 2895 | struct ocfs2_extent_tree dx_et; |
2896 | int did_quota = 0, bytes_allocated = 0; | ||
1413 | 2897 | ||
1414 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); | 2898 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); |
1415 | 2899 | ||
1416 | alloc = ocfs2_clusters_for_bytes(sb, bytes); | 2900 | alloc = ocfs2_clusters_for_bytes(sb, bytes); |
2901 | dx_alloc = 0; | ||
2902 | |||
2903 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
2904 | credits += ocfs2_add_dir_index_credits(sb); | ||
2905 | |||
2906 | dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh); | ||
2907 | if (!dx_inline) { | ||
2908 | /* Add one more cluster for an index leaf */ | ||
2909 | dx_alloc++; | ||
2910 | dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb, | ||
2911 | &num_dx_leaves); | ||
2912 | if (!dx_leaves) { | ||
2913 | ret = -ENOMEM; | ||
2914 | mlog_errno(ret); | ||
2915 | goto out; | ||
2916 | } | ||
2917 | } | ||
2918 | |||
2919 | /* This gets us the dx_root */ | ||
2920 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | ||
2921 | if (ret) { | ||
2922 | mlog_errno(ret); | ||
2923 | goto out; | ||
2924 | } | ||
2925 | } | ||
1417 | 2926 | ||
1418 | /* | 2927 | /* |
1419 | * We should never need more than 2 clusters for this - | 2928 | * We should never need more than 2 clusters for the unindexed |
1420 | * maximum dirent size is far less than one block. In fact, | 2929 | * tree - maximum dirent size is far less than one block. In |
1421 | * the only time we'd need more than one cluster is if | 2930 | * fact, the only time we'd need more than one cluster is if |
1422 | * blocksize == clustersize and the dirent won't fit in the | 2931 | * blocksize == clustersize and the dirent won't fit in the |
1423 | * extra space that the expansion to a single block gives. As | 2932 | * extra space that the expansion to a single block gives. As |
1424 | * of today, that only happens on 4k/4k file systems. | 2933 | * of today, that only happens on 4k/4k file systems. |
@@ -1435,7 +2944,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1435 | 2944 | ||
1436 | /* | 2945 | /* |
1437 | * Prepare for worst case allocation scenario of two separate | 2946 | * Prepare for worst case allocation scenario of two separate |
1438 | * extents. | 2947 | * extents in the unindexed tree. |
1439 | */ | 2948 | */ |
1440 | if (alloc == 2) | 2949 | if (alloc == 2) |
1441 | credits += OCFS2_SUBALLOC_ALLOC; | 2950 | credits += OCFS2_SUBALLOC_ALLOC; |
@@ -1448,11 +2957,29 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1448 | } | 2957 | } |
1449 | 2958 | ||
1450 | if (vfs_dq_alloc_space_nodirty(dir, | 2959 | if (vfs_dq_alloc_space_nodirty(dir, |
1451 | ocfs2_clusters_to_bytes(osb->sb, alloc))) { | 2960 | ocfs2_clusters_to_bytes(osb->sb, |
2961 | alloc + dx_alloc))) { | ||
1452 | ret = -EDQUOT; | 2962 | ret = -EDQUOT; |
1453 | goto out_commit; | 2963 | goto out_commit; |
1454 | } | 2964 | } |
1455 | did_quota = 1; | 2965 | did_quota = 1; |
2966 | |||
2967 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { | ||
2968 | /* | ||
2969 | * Allocate our index cluster first, to maximize the | ||
2970 | * possibility that unindexed leaves grow | ||
2971 | * contiguously. | ||
2972 | */ | ||
2973 | ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, | ||
2974 | dx_leaves, num_dx_leaves, | ||
2975 | &dx_insert_blkno); | ||
2976 | if (ret) { | ||
2977 | mlog_errno(ret); | ||
2978 | goto out_commit; | ||
2979 | } | ||
2980 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
2981 | } | ||
2982 | |||
1456 | /* | 2983 | /* |
1457 | * Try to claim as many clusters as the bitmap can give though | 2984 | * Try to claim as many clusters as the bitmap can give though |
1458 | * if we only get one now, that's enough to continue. The rest | 2985 | * if we only get one now, that's enough to continue. The rest |
@@ -1463,6 +2990,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1463 | mlog_errno(ret); | 2990 | mlog_errno(ret); |
1464 | goto out_commit; | 2991 | goto out_commit; |
1465 | } | 2992 | } |
2993 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
1466 | 2994 | ||
1467 | /* | 2995 | /* |
1468 | * Operations are carefully ordered so that we set up the new | 2996 | * Operations are carefully ordered so that we set up the new |
@@ -1489,9 +3017,16 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1489 | memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); | 3017 | memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); |
1490 | memset(dirdata_bh->b_data + i_size_read(dir), 0, | 3018 | memset(dirdata_bh->b_data + i_size_read(dir), 0, |
1491 | sb->s_blocksize - i_size_read(dir)); | 3019 | sb->s_blocksize - i_size_read(dir)); |
1492 | ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb); | 3020 | i = ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), dir); |
1493 | if (ocfs2_supports_dir_trailer(osb)) | 3021 | if (ocfs2_new_dir_wants_trailer(dir)) { |
1494 | ocfs2_init_dir_trailer(dir, dirdata_bh); | 3022 | /* |
3023 | * Prepare the dir trailer up front. It will otherwise look | ||
3024 | * like a valid dirent. Even if inserting the index fails | ||
3025 | * (unlikely), then all we'll have done is given first dir | ||
3026 | * block a small amount of fragmentation. | ||
3027 | */ | ||
3028 | ocfs2_init_dir_trailer(dir, dirdata_bh, i); | ||
3029 | } | ||
1495 | 3030 | ||
1496 | ret = ocfs2_journal_dirty(handle, dirdata_bh); | 3031 | ret = ocfs2_journal_dirty(handle, dirdata_bh); |
1497 | if (ret) { | 3032 | if (ret) { |
@@ -1499,6 +3034,24 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1499 | goto out_commit; | 3034 | goto out_commit; |
1500 | } | 3035 | } |
1501 | 3036 | ||
3037 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { | ||
3038 | /* | ||
3039 | * Dx dirs with an external cluster need to do this up | ||
3040 | * front. Inline dx root's get handled later, after | ||
3041 | * we've allocated our root block. We get passed back | ||
3042 | * a total number of items so that dr_num_entries can | ||
3043 | * be correctly set once the dx_root has been | ||
3044 | * allocated. | ||
3045 | */ | ||
3046 | ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves, | ||
3047 | num_dx_leaves, &num_dx_entries, | ||
3048 | dirdata_bh); | ||
3049 | if (ret) { | ||
3050 | mlog_errno(ret); | ||
3051 | goto out_commit; | ||
3052 | } | ||
3053 | } | ||
3054 | |||
1502 | /* | 3055 | /* |
1503 | * Set extent, i_size, etc on the directory. After this, the | 3056 | * Set extent, i_size, etc on the directory. After this, the |
1504 | * inode should contain the same exact dirents as before and | 3057 | * inode should contain the same exact dirents as before and |
@@ -1551,6 +3104,27 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1551 | goto out_commit; | 3104 | goto out_commit; |
1552 | } | 3105 | } |
1553 | 3106 | ||
3107 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
3108 | ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, | ||
3109 | dirdata_bh, meta_ac, dx_inline, | ||
3110 | num_dx_entries, &dx_root_bh); | ||
3111 | if (ret) { | ||
3112 | mlog_errno(ret); | ||
3113 | goto out_commit; | ||
3114 | } | ||
3115 | |||
3116 | if (dx_inline) { | ||
3117 | ocfs2_dx_dir_index_root_block(dir, dx_root_bh, | ||
3118 | dirdata_bh); | ||
3119 | } else { | ||
3120 | ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh); | ||
3121 | ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0, | ||
3122 | dx_insert_blkno, 1, 0, NULL); | ||
3123 | if (ret) | ||
3124 | mlog_errno(ret); | ||
3125 | } | ||
3126 | } | ||
3127 | |||
1554 | /* | 3128 | /* |
1555 | * We asked for two clusters, but only got one in the 1st | 3129 | * We asked for two clusters, but only got one in the 1st |
1556 | * pass. Claim the 2nd cluster as a separate extent. | 3130 | * pass. Claim the 2nd cluster as a separate extent. |
@@ -1570,15 +3144,32 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1570 | mlog_errno(ret); | 3144 | mlog_errno(ret); |
1571 | goto out_commit; | 3145 | goto out_commit; |
1572 | } | 3146 | } |
3147 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
1573 | } | 3148 | } |
1574 | 3149 | ||
1575 | *first_block_bh = dirdata_bh; | 3150 | *first_block_bh = dirdata_bh; |
1576 | dirdata_bh = NULL; | 3151 | dirdata_bh = NULL; |
3152 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
3153 | unsigned int off; | ||
3154 | |||
3155 | if (!dx_inline) { | ||
3156 | /* | ||
3157 | * We need to return the correct block within the | ||
3158 | * cluster which should hold our entry. | ||
3159 | */ | ||
3160 | off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), | ||
3161 | &lookup->dl_hinfo); | ||
3162 | get_bh(dx_leaves[off]); | ||
3163 | lookup->dl_dx_leaf_bh = dx_leaves[off]; | ||
3164 | } | ||
3165 | lookup->dl_dx_root_bh = dx_root_bh; | ||
3166 | dx_root_bh = NULL; | ||
3167 | } | ||
1577 | 3168 | ||
1578 | out_commit: | 3169 | out_commit: |
1579 | if (ret < 0 && did_quota) | 3170 | if (ret < 0 && did_quota) |
1580 | vfs_dq_free_space_nodirty(dir, | 3171 | vfs_dq_free_space_nodirty(dir, bytes_allocated); |
1581 | ocfs2_clusters_to_bytes(osb->sb, 2)); | 3172 | |
1582 | ocfs2_commit_trans(osb, handle); | 3173 | ocfs2_commit_trans(osb, handle); |
1583 | 3174 | ||
1584 | out_sem: | 3175 | out_sem: |
@@ -1587,8 +3178,17 @@ out_sem: | |||
1587 | out: | 3178 | out: |
1588 | if (data_ac) | 3179 | if (data_ac) |
1589 | ocfs2_free_alloc_context(data_ac); | 3180 | ocfs2_free_alloc_context(data_ac); |
3181 | if (meta_ac) | ||
3182 | ocfs2_free_alloc_context(meta_ac); | ||
3183 | |||
3184 | if (dx_leaves) { | ||
3185 | for (i = 0; i < num_dx_leaves; i++) | ||
3186 | brelse(dx_leaves[i]); | ||
3187 | kfree(dx_leaves); | ||
3188 | } | ||
1590 | 3189 | ||
1591 | brelse(dirdata_bh); | 3190 | brelse(dirdata_bh); |
3191 | brelse(dx_root_bh); | ||
1592 | 3192 | ||
1593 | return ret; | 3193 | return ret; |
1594 | } | 3194 | } |
@@ -1658,11 +3258,14 @@ bail: | |||
1658 | * is to be turned into an extent based one. The size of the dirent to | 3258 | * is to be turned into an extent based one. The size of the dirent to |
1659 | * insert might be larger than the space gained by growing to just one | 3259 | * insert might be larger than the space gained by growing to just one |
1660 | * block, so we may have to grow the inode by two blocks in that case. | 3260 | * block, so we may have to grow the inode by two blocks in that case. |
3261 | * | ||
3262 | * If the directory is already indexed, dx_root_bh must be provided. | ||
1661 | */ | 3263 | */ |
1662 | static int ocfs2_extend_dir(struct ocfs2_super *osb, | 3264 | static int ocfs2_extend_dir(struct ocfs2_super *osb, |
1663 | struct inode *dir, | 3265 | struct inode *dir, |
1664 | struct buffer_head *parent_fe_bh, | 3266 | struct buffer_head *parent_fe_bh, |
1665 | unsigned int blocks_wanted, | 3267 | unsigned int blocks_wanted, |
3268 | struct ocfs2_dir_lookup_result *lookup, | ||
1666 | struct buffer_head **new_de_bh) | 3269 | struct buffer_head **new_de_bh) |
1667 | { | 3270 | { |
1668 | int status = 0; | 3271 | int status = 0; |
@@ -1677,17 +3280,29 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1677 | struct ocfs2_dir_entry * de; | 3280 | struct ocfs2_dir_entry * de; |
1678 | struct super_block *sb = osb->sb; | 3281 | struct super_block *sb = osb->sb; |
1679 | struct ocfs2_extent_tree et; | 3282 | struct ocfs2_extent_tree et; |
3283 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | ||
1680 | 3284 | ||
1681 | mlog_entry_void(); | 3285 | mlog_entry_void(); |
1682 | 3286 | ||
1683 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 3287 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
3288 | /* | ||
3289 | * This would be a code error as an inline directory should | ||
3290 | * never have an index root. | ||
3291 | */ | ||
3292 | BUG_ON(dx_root_bh); | ||
3293 | |||
1684 | status = ocfs2_expand_inline_dir(dir, parent_fe_bh, | 3294 | status = ocfs2_expand_inline_dir(dir, parent_fe_bh, |
1685 | blocks_wanted, &new_bh); | 3295 | blocks_wanted, lookup, |
3296 | &new_bh); | ||
1686 | if (status) { | 3297 | if (status) { |
1687 | mlog_errno(status); | 3298 | mlog_errno(status); |
1688 | goto bail; | 3299 | goto bail; |
1689 | } | 3300 | } |
1690 | 3301 | ||
3302 | /* Expansion from inline to an indexed directory will | ||
3303 | * have given us this. */ | ||
3304 | dx_root_bh = lookup->dl_dx_root_bh; | ||
3305 | |||
1691 | if (blocks_wanted == 1) { | 3306 | if (blocks_wanted == 1) { |
1692 | /* | 3307 | /* |
1693 | * If the new dirent will fit inside the space | 3308 | * If the new dirent will fit inside the space |
@@ -1751,6 +3366,10 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1751 | } | 3366 | } |
1752 | 3367 | ||
1753 | do_extend: | 3368 | do_extend: |
3369 | if (ocfs2_dir_indexed(dir)) | ||
3370 | credits++; /* For attaching the new dirent block to the | ||
3371 | * dx_root */ | ||
3372 | |||
1754 | down_write(&OCFS2_I(dir)->ip_alloc_sem); | 3373 | down_write(&OCFS2_I(dir)->ip_alloc_sem); |
1755 | drop_alloc_sem = 1; | 3374 | drop_alloc_sem = 1; |
1756 | 3375 | ||
@@ -1781,9 +3400,19 @@ do_extend: | |||
1781 | 3400 | ||
1782 | de = (struct ocfs2_dir_entry *) new_bh->b_data; | 3401 | de = (struct ocfs2_dir_entry *) new_bh->b_data; |
1783 | de->inode = 0; | 3402 | de->inode = 0; |
1784 | if (ocfs2_dir_has_trailer(dir)) { | 3403 | if (ocfs2_supports_dir_trailer(dir)) { |
1785 | de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb)); | 3404 | de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb)); |
1786 | ocfs2_init_dir_trailer(dir, new_bh); | 3405 | |
3406 | ocfs2_init_dir_trailer(dir, new_bh, le16_to_cpu(de->rec_len)); | ||
3407 | |||
3408 | if (ocfs2_dir_indexed(dir)) { | ||
3409 | status = ocfs2_dx_dir_link_trailer(dir, handle, | ||
3410 | dx_root_bh, new_bh); | ||
3411 | if (status) { | ||
3412 | mlog_errno(status); | ||
3413 | goto bail; | ||
3414 | } | ||
3415 | } | ||
1787 | } else { | 3416 | } else { |
1788 | de->rec_len = cpu_to_le16(sb->s_blocksize); | 3417 | de->rec_len = cpu_to_le16(sb->s_blocksize); |
1789 | } | 3418 | } |
@@ -1839,7 +3468,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, | |||
1839 | * This calculates how many free bytes we'd have in block zero, should | 3468 | * This calculates how many free bytes we'd have in block zero, should |
1840 | * this function force expansion to an extent tree. | 3469 | * this function force expansion to an extent tree. |
1841 | */ | 3470 | */ |
1842 | if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) | 3471 | if (ocfs2_new_dir_wants_trailer(dir)) |
1843 | free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir); | 3472 | free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir); |
1844 | else | 3473 | else |
1845 | free_space = dir->i_sb->s_blocksize - i_size_read(dir); | 3474 | free_space = dir->i_sb->s_blocksize - i_size_read(dir); |
@@ -1970,12 +3599,766 @@ bail: | |||
1970 | return status; | 3599 | return status; |
1971 | } | 3600 | } |
1972 | 3601 | ||
3602 | static int dx_leaf_sort_cmp(const void *a, const void *b) | ||
3603 | { | ||
3604 | const struct ocfs2_dx_entry *entry1 = a; | ||
3605 | const struct ocfs2_dx_entry *entry2 = b; | ||
3606 | u32 major_hash1 = le32_to_cpu(entry1->dx_major_hash); | ||
3607 | u32 major_hash2 = le32_to_cpu(entry2->dx_major_hash); | ||
3608 | u32 minor_hash1 = le32_to_cpu(entry1->dx_minor_hash); | ||
3609 | u32 minor_hash2 = le32_to_cpu(entry2->dx_minor_hash); | ||
3610 | |||
3611 | if (major_hash1 > major_hash2) | ||
3612 | return 1; | ||
3613 | if (major_hash1 < major_hash2) | ||
3614 | return -1; | ||
3615 | |||
3616 | /* | ||
3617 | * It is not strictly necessary to sort by minor | ||
3618 | */ | ||
3619 | if (minor_hash1 > minor_hash2) | ||
3620 | return 1; | ||
3621 | if (minor_hash1 < minor_hash2) | ||
3622 | return -1; | ||
3623 | return 0; | ||
3624 | } | ||
3625 | |||
3626 | static void dx_leaf_sort_swap(void *a, void *b, int size) | ||
3627 | { | ||
3628 | struct ocfs2_dx_entry *entry1 = a; | ||
3629 | struct ocfs2_dx_entry *entry2 = b; | ||
3630 | struct ocfs2_dx_entry tmp; | ||
3631 | |||
3632 | BUG_ON(size != sizeof(*entry1)); | ||
3633 | |||
3634 | tmp = *entry1; | ||
3635 | *entry1 = *entry2; | ||
3636 | *entry2 = tmp; | ||
3637 | } | ||
3638 | |||
3639 | static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf) | ||
3640 | { | ||
3641 | struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; | ||
3642 | int i, num = le16_to_cpu(dl_list->de_num_used); | ||
3643 | |||
3644 | for (i = 0; i < (num - 1); i++) { | ||
3645 | if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) != | ||
3646 | le32_to_cpu(dl_list->de_entries[i + 1].dx_major_hash)) | ||
3647 | return 0; | ||
3648 | } | ||
3649 | |||
3650 | return 1; | ||
3651 | } | ||
3652 | |||
3653 | /* | ||
3654 | * Find the optimal value to split this leaf on. This expects the leaf | ||
3655 | * entries to be in sorted order. | ||
3656 | * | ||
3657 | * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is | ||
3658 | * the hash we want to insert. | ||
3659 | * | ||
3660 | * This function is only concerned with the major hash - that which | ||
3661 | * determines which cluster an item belongs to. | ||
3662 | */ | ||
3663 | static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf, | ||
3664 | u32 leaf_cpos, u32 insert_hash, | ||
3665 | u32 *split_hash) | ||
3666 | { | ||
3667 | struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; | ||
3668 | int i, num_used = le16_to_cpu(dl_list->de_num_used); | ||
3669 | int allsame; | ||
3670 | |||
3671 | /* | ||
3672 | * There's a couple rare, but nasty corner cases we have to | ||
3673 | * check for here. All of them involve a leaf where all value | ||
3674 | * have the same hash, which is what we look for first. | ||
3675 | * | ||
3676 | * Most of the time, all of the above is false, and we simply | ||
3677 | * pick the median value for a split. | ||
3678 | */ | ||
3679 | allsame = ocfs2_dx_leaf_same_major(dx_leaf); | ||
3680 | if (allsame) { | ||
3681 | u32 val = le32_to_cpu(dl_list->de_entries[0].dx_major_hash); | ||
3682 | |||
3683 | if (val == insert_hash) { | ||
3684 | /* | ||
3685 | * No matter where we would choose to split, | ||
3686 | * the new entry would want to occupy the same | ||
3687 | * block as these. Since there's no space left | ||
3688 | * in their existing block, we know there | ||
3689 | * won't be space after the split. | ||
3690 | */ | ||
3691 | return -ENOSPC; | ||
3692 | } | ||
3693 | |||
3694 | if (val == leaf_cpos) { | ||
3695 | /* | ||
3696 | * Because val is the same as leaf_cpos (which | ||
3697 | * is the smallest value this leaf can have), | ||
3698 | * yet is not equal to insert_hash, then we | ||
3699 | * know that insert_hash *must* be larger than | ||
3700 | * val (and leaf_cpos). At least cpos+1 in value. | ||
3701 | * | ||
3702 | * We also know then, that there cannot be an | ||
3703 | * adjacent extent (otherwise we'd be looking | ||
3704 | * at it). Choosing this value gives us a | ||
3705 | * chance to get some contiguousness. | ||
3706 | */ | ||
3707 | *split_hash = leaf_cpos + 1; | ||
3708 | return 0; | ||
3709 | } | ||
3710 | |||
3711 | if (val > insert_hash) { | ||
3712 | /* | ||
3713 | * val can not be the same as insert hash, and | ||
3714 | * also must be larger than leaf_cpos. Also, | ||
3715 | * we know that there can't be a leaf between | ||
3716 | * cpos and val, otherwise the entries with | ||
3717 | * hash 'val' would be there. | ||
3718 | */ | ||
3719 | *split_hash = val; | ||
3720 | return 0; | ||
3721 | } | ||
3722 | |||
3723 | *split_hash = insert_hash; | ||
3724 | return 0; | ||
3725 | } | ||
3726 | |||
3727 | /* | ||
3728 | * Since the records are sorted and the checks above | ||
3729 | * guaranteed that not all records in this block are the same, | ||
3730 | * we simple travel forward, from the median, and pick the 1st | ||
3731 | * record whose value is larger than leaf_cpos. | ||
3732 | */ | ||
3733 | for (i = (num_used / 2); i < num_used; i++) | ||
3734 | if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) > | ||
3735 | leaf_cpos) | ||
3736 | break; | ||
3737 | |||
3738 | BUG_ON(i == num_used); /* Should be impossible */ | ||
3739 | *split_hash = le32_to_cpu(dl_list->de_entries[i].dx_major_hash); | ||
3740 | return 0; | ||
3741 | } | ||
3742 | |||
3743 | /* | ||
3744 | * Transfer all entries in orig_dx_leaves whose major hash is equal to or | ||
3745 | * larger than split_hash into new_dx_leaves. We use a temporary | ||
3746 | * buffer (tmp_dx_leaf) to make the changes to the original leaf blocks. | ||
3747 | * | ||
3748 | * Since the block offset inside a leaf (cluster) is a constant mask | ||
3749 | * of minor_hash, we can optimize - an item at block offset X within | ||
3750 | * the original cluster, will be at offset X within the new cluster. | ||
3751 | */ | ||
3752 | static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash, | ||
3753 | handle_t *handle, | ||
3754 | struct ocfs2_dx_leaf *tmp_dx_leaf, | ||
3755 | struct buffer_head **orig_dx_leaves, | ||
3756 | struct buffer_head **new_dx_leaves, | ||
3757 | int num_dx_leaves) | ||
3758 | { | ||
3759 | int i, j, num_used; | ||
3760 | u32 major_hash; | ||
3761 | struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf; | ||
3762 | struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list; | ||
3763 | struct ocfs2_dx_entry *dx_entry; | ||
3764 | |||
3765 | tmp_list = &tmp_dx_leaf->dl_list; | ||
3766 | |||
3767 | for (i = 0; i < num_dx_leaves; i++) { | ||
3768 | orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data; | ||
3769 | orig_list = &orig_dx_leaf->dl_list; | ||
3770 | new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data; | ||
3771 | new_list = &new_dx_leaf->dl_list; | ||
3772 | |||
3773 | num_used = le16_to_cpu(orig_list->de_num_used); | ||
3774 | |||
3775 | memcpy(tmp_dx_leaf, orig_dx_leaf, dir->i_sb->s_blocksize); | ||
3776 | tmp_list->de_num_used = cpu_to_le16(0); | ||
3777 | memset(&tmp_list->de_entries, 0, sizeof(*dx_entry)*num_used); | ||
3778 | |||
3779 | for (j = 0; j < num_used; j++) { | ||
3780 | dx_entry = &orig_list->de_entries[j]; | ||
3781 | major_hash = le32_to_cpu(dx_entry->dx_major_hash); | ||
3782 | if (major_hash >= split_hash) | ||
3783 | ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf, | ||
3784 | dx_entry); | ||
3785 | else | ||
3786 | ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf, | ||
3787 | dx_entry); | ||
3788 | } | ||
3789 | memcpy(orig_dx_leaf, tmp_dx_leaf, dir->i_sb->s_blocksize); | ||
3790 | |||
3791 | ocfs2_journal_dirty(handle, orig_dx_leaves[i]); | ||
3792 | ocfs2_journal_dirty(handle, new_dx_leaves[i]); | ||
3793 | } | ||
3794 | } | ||
3795 | |||
3796 | static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb, | ||
3797 | struct ocfs2_dx_root_block *dx_root) | ||
3798 | { | ||
3799 | int credits = ocfs2_clusters_to_blocks(osb->sb, 2); | ||
3800 | |||
3801 | credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1); | ||
3802 | credits += ocfs2_quota_trans_credits(osb->sb); | ||
3803 | return credits; | ||
3804 | } | ||
3805 | |||
3806 | /* | ||
3807 | * Find the median value in dx_leaf_bh and allocate a new leaf to move | ||
3808 | * half our entries into. | ||
3809 | */ | ||
3810 | static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | ||
3811 | struct buffer_head *dx_root_bh, | ||
3812 | struct buffer_head *dx_leaf_bh, | ||
3813 | struct ocfs2_dx_hinfo *hinfo, u32 leaf_cpos, | ||
3814 | u64 leaf_blkno) | ||
3815 | { | ||
3816 | struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
3817 | int credits, ret, i, num_used, did_quota = 0; | ||
3818 | u32 cpos, split_hash, insert_hash = hinfo->major_hash; | ||
3819 | u64 orig_leaves_start; | ||
3820 | int num_dx_leaves; | ||
3821 | struct buffer_head **orig_dx_leaves = NULL; | ||
3822 | struct buffer_head **new_dx_leaves = NULL; | ||
3823 | struct ocfs2_alloc_context *data_ac = NULL, *meta_ac = NULL; | ||
3824 | struct ocfs2_extent_tree et; | ||
3825 | handle_t *handle = NULL; | ||
3826 | struct ocfs2_dx_root_block *dx_root; | ||
3827 | struct ocfs2_dx_leaf *tmp_dx_leaf = NULL; | ||
3828 | |||
3829 | mlog(0, "DX Dir: %llu, rebalance leaf leaf_blkno: %llu insert: %u\n", | ||
3830 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
3831 | (unsigned long long)leaf_blkno, insert_hash); | ||
3832 | |||
3833 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | ||
3834 | |||
3835 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
3836 | /* | ||
3837 | * XXX: This is a rather large limit. We should use a more | ||
3838 | * realistic value. | ||
3839 | */ | ||
3840 | if (le32_to_cpu(dx_root->dr_clusters) == UINT_MAX) | ||
3841 | return -ENOSPC; | ||
3842 | |||
3843 | num_used = le16_to_cpu(dx_leaf->dl_list.de_num_used); | ||
3844 | if (num_used < le16_to_cpu(dx_leaf->dl_list.de_count)) { | ||
3845 | mlog(ML_ERROR, "DX Dir: %llu, Asked to rebalance empty leaf: " | ||
3846 | "%llu, %d\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
3847 | (unsigned long long)leaf_blkno, num_used); | ||
3848 | ret = -EIO; | ||
3849 | goto out; | ||
3850 | } | ||
3851 | |||
3852 | orig_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves); | ||
3853 | if (!orig_dx_leaves) { | ||
3854 | ret = -ENOMEM; | ||
3855 | mlog_errno(ret); | ||
3856 | goto out; | ||
3857 | } | ||
3858 | |||
3859 | new_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, NULL); | ||
3860 | if (!new_dx_leaves) { | ||
3861 | ret = -ENOMEM; | ||
3862 | mlog_errno(ret); | ||
3863 | goto out; | ||
3864 | } | ||
3865 | |||
3866 | ret = ocfs2_lock_allocators(dir, &et, 1, 0, &data_ac, &meta_ac); | ||
3867 | if (ret) { | ||
3868 | if (ret != -ENOSPC) | ||
3869 | mlog_errno(ret); | ||
3870 | goto out; | ||
3871 | } | ||
3872 | |||
3873 | credits = ocfs2_dx_dir_rebalance_credits(osb, dx_root); | ||
3874 | handle = ocfs2_start_trans(osb, credits); | ||
3875 | if (IS_ERR(handle)) { | ||
3876 | ret = PTR_ERR(handle); | ||
3877 | handle = NULL; | ||
3878 | mlog_errno(ret); | ||
3879 | goto out; | ||
3880 | } | ||
3881 | |||
3882 | if (vfs_dq_alloc_space_nodirty(dir, | ||
3883 | ocfs2_clusters_to_bytes(dir->i_sb, 1))) { | ||
3884 | ret = -EDQUOT; | ||
3885 | goto out_commit; | ||
3886 | } | ||
3887 | did_quota = 1; | ||
3888 | |||
3889 | ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, | ||
3890 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3891 | if (ret) { | ||
3892 | mlog_errno(ret); | ||
3893 | goto out_commit; | ||
3894 | } | ||
3895 | |||
3896 | /* | ||
3897 | * This block is changing anyway, so we can sort it in place. | ||
3898 | */ | ||
3899 | sort(dx_leaf->dl_list.de_entries, num_used, | ||
3900 | sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, | ||
3901 | dx_leaf_sort_swap); | ||
3902 | |||
3903 | ret = ocfs2_journal_dirty(handle, dx_leaf_bh); | ||
3904 | if (ret) { | ||
3905 | mlog_errno(ret); | ||
3906 | goto out_commit; | ||
3907 | } | ||
3908 | |||
3909 | ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash, | ||
3910 | &split_hash); | ||
3911 | if (ret) { | ||
3912 | mlog_errno(ret); | ||
3913 | goto out_commit; | ||
3914 | } | ||
3915 | |||
3916 | mlog(0, "Split leaf (%u) at %u, insert major hash is %u\n", | ||
3917 | leaf_cpos, split_hash, insert_hash); | ||
3918 | |||
3919 | /* | ||
3920 | * We have to carefully order operations here. There are items | ||
3921 | * which want to be in the new cluster before insert, but in | ||
3922 | * order to put those items in the new cluster, we alter the | ||
3923 | * old cluster. A failure to insert gets nasty. | ||
3924 | * | ||
3925 | * So, start by reserving writes to the old | ||
3926 | * cluster. ocfs2_dx_dir_new_cluster will reserve writes on | ||
3927 | * the new cluster for us, before inserting it. The insert | ||
3928 | * won't happen if there's an error before that. Once the | ||
3929 | * insert is done then, we can transfer from one leaf into the | ||
3930 | * other without fear of hitting any error. | ||
3931 | */ | ||
3932 | |||
3933 | /* | ||
3934 | * The leaf transfer wants some scratch space so that we don't | ||
3935 | * wind up doing a bunch of expensive memmove(). | ||
3936 | */ | ||
3937 | tmp_dx_leaf = kmalloc(osb->sb->s_blocksize, GFP_NOFS); | ||
3938 | if (!tmp_dx_leaf) { | ||
3939 | ret = -ENOMEM; | ||
3940 | mlog_errno(ret); | ||
3941 | goto out_commit; | ||
3942 | } | ||
3943 | |||
3944 | orig_leaves_start = ocfs2_block_to_cluster_start(dir->i_sb, leaf_blkno); | ||
3945 | ret = ocfs2_read_dx_leaves(dir, orig_leaves_start, num_dx_leaves, | ||
3946 | orig_dx_leaves); | ||
3947 | if (ret) { | ||
3948 | mlog_errno(ret); | ||
3949 | goto out_commit; | ||
3950 | } | ||
3951 | |||
3952 | for (i = 0; i < num_dx_leaves; i++) { | ||
3953 | ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i], | ||
3954 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3955 | if (ret) { | ||
3956 | mlog_errno(ret); | ||
3957 | goto out_commit; | ||
3958 | } | ||
3959 | } | ||
3960 | |||
3961 | cpos = split_hash; | ||
3962 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | ||
3963 | data_ac, meta_ac, new_dx_leaves, | ||
3964 | num_dx_leaves); | ||
3965 | if (ret) { | ||
3966 | mlog_errno(ret); | ||
3967 | goto out_commit; | ||
3968 | } | ||
3969 | |||
3970 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, | ||
3971 | orig_dx_leaves, new_dx_leaves, num_dx_leaves); | ||
3972 | |||
3973 | out_commit: | ||
3974 | if (ret < 0 && did_quota) | ||
3975 | vfs_dq_free_space_nodirty(dir, | ||
3976 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | ||
3977 | |||
3978 | ocfs2_commit_trans(osb, handle); | ||
3979 | |||
3980 | out: | ||
3981 | if (orig_dx_leaves || new_dx_leaves) { | ||
3982 | for (i = 0; i < num_dx_leaves; i++) { | ||
3983 | if (orig_dx_leaves) | ||
3984 | brelse(orig_dx_leaves[i]); | ||
3985 | if (new_dx_leaves) | ||
3986 | brelse(new_dx_leaves[i]); | ||
3987 | } | ||
3988 | kfree(orig_dx_leaves); | ||
3989 | kfree(new_dx_leaves); | ||
3990 | } | ||
3991 | |||
3992 | if (meta_ac) | ||
3993 | ocfs2_free_alloc_context(meta_ac); | ||
3994 | if (data_ac) | ||
3995 | ocfs2_free_alloc_context(data_ac); | ||
3996 | |||
3997 | kfree(tmp_dx_leaf); | ||
3998 | return ret; | ||
3999 | } | ||
4000 | |||
4001 | static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, | ||
4002 | struct buffer_head *di_bh, | ||
4003 | struct buffer_head *dx_root_bh, | ||
4004 | const char *name, int namelen, | ||
4005 | struct ocfs2_dir_lookup_result *lookup) | ||
4006 | { | ||
4007 | int ret, rebalanced = 0; | ||
4008 | struct ocfs2_dx_root_block *dx_root; | ||
4009 | struct buffer_head *dx_leaf_bh = NULL; | ||
4010 | struct ocfs2_dx_leaf *dx_leaf; | ||
4011 | u64 blkno; | ||
4012 | u32 leaf_cpos; | ||
4013 | |||
4014 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4015 | |||
4016 | restart_search: | ||
4017 | ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo, | ||
4018 | &leaf_cpos, &blkno); | ||
4019 | if (ret) { | ||
4020 | mlog_errno(ret); | ||
4021 | goto out; | ||
4022 | } | ||
4023 | |||
4024 | ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh); | ||
4025 | if (ret) { | ||
4026 | mlog_errno(ret); | ||
4027 | goto out; | ||
4028 | } | ||
4029 | |||
4030 | dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
4031 | |||
4032 | if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >= | ||
4033 | le16_to_cpu(dx_leaf->dl_list.de_count)) { | ||
4034 | if (rebalanced) { | ||
4035 | /* | ||
4036 | * Rebalancing should have provided us with | ||
4037 | * space in an appropriate leaf. | ||
4038 | * | ||
4039 | * XXX: Is this an abnormal condition then? | ||
4040 | * Should we print a message here? | ||
4041 | */ | ||
4042 | ret = -ENOSPC; | ||
4043 | goto out; | ||
4044 | } | ||
4045 | |||
4046 | ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh, | ||
4047 | &lookup->dl_hinfo, leaf_cpos, | ||
4048 | blkno); | ||
4049 | if (ret) { | ||
4050 | if (ret != -ENOSPC) | ||
4051 | mlog_errno(ret); | ||
4052 | goto out; | ||
4053 | } | ||
4054 | |||
4055 | /* | ||
4056 | * Restart the lookup. The rebalance might have | ||
4057 | * changed which block our item fits into. Mark our | ||
4058 | * progress, so we only execute this once. | ||
4059 | */ | ||
4060 | brelse(dx_leaf_bh); | ||
4061 | dx_leaf_bh = NULL; | ||
4062 | rebalanced = 1; | ||
4063 | goto restart_search; | ||
4064 | } | ||
4065 | |||
4066 | lookup->dl_dx_leaf_bh = dx_leaf_bh; | ||
4067 | dx_leaf_bh = NULL; | ||
4068 | |||
4069 | out: | ||
4070 | brelse(dx_leaf_bh); | ||
4071 | return ret; | ||
4072 | } | ||
4073 | |||
4074 | static int ocfs2_search_dx_free_list(struct inode *dir, | ||
4075 | struct buffer_head *dx_root_bh, | ||
4076 | int namelen, | ||
4077 | struct ocfs2_dir_lookup_result *lookup) | ||
4078 | { | ||
4079 | int ret = -ENOSPC; | ||
4080 | struct buffer_head *leaf_bh = NULL, *prev_leaf_bh = NULL; | ||
4081 | struct ocfs2_dir_block_trailer *db; | ||
4082 | u64 next_block; | ||
4083 | int rec_len = OCFS2_DIR_REC_LEN(namelen); | ||
4084 | struct ocfs2_dx_root_block *dx_root; | ||
4085 | |||
4086 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4087 | next_block = le64_to_cpu(dx_root->dr_free_blk); | ||
4088 | |||
4089 | while (next_block) { | ||
4090 | brelse(prev_leaf_bh); | ||
4091 | prev_leaf_bh = leaf_bh; | ||
4092 | leaf_bh = NULL; | ||
4093 | |||
4094 | ret = ocfs2_read_dir_block_direct(dir, next_block, &leaf_bh); | ||
4095 | if (ret) { | ||
4096 | mlog_errno(ret); | ||
4097 | goto out; | ||
4098 | } | ||
4099 | |||
4100 | db = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb); | ||
4101 | if (rec_len <= le16_to_cpu(db->db_free_rec_len)) { | ||
4102 | lookup->dl_leaf_bh = leaf_bh; | ||
4103 | lookup->dl_prev_leaf_bh = prev_leaf_bh; | ||
4104 | leaf_bh = NULL; | ||
4105 | prev_leaf_bh = NULL; | ||
4106 | break; | ||
4107 | } | ||
4108 | |||
4109 | next_block = le64_to_cpu(db->db_free_next); | ||
4110 | } | ||
4111 | |||
4112 | if (!next_block) | ||
4113 | ret = -ENOSPC; | ||
4114 | |||
4115 | out: | ||
4116 | |||
4117 | brelse(leaf_bh); | ||
4118 | brelse(prev_leaf_bh); | ||
4119 | return ret; | ||
4120 | } | ||
4121 | |||
4122 | static int ocfs2_expand_inline_dx_root(struct inode *dir, | ||
4123 | struct buffer_head *dx_root_bh) | ||
4124 | { | ||
4125 | int ret, num_dx_leaves, i, j, did_quota = 0; | ||
4126 | struct buffer_head **dx_leaves = NULL; | ||
4127 | struct ocfs2_extent_tree et; | ||
4128 | u64 insert_blkno; | ||
4129 | struct ocfs2_alloc_context *data_ac = NULL; | ||
4130 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
4131 | handle_t *handle = NULL; | ||
4132 | struct ocfs2_dx_root_block *dx_root; | ||
4133 | struct ocfs2_dx_entry_list *entry_list; | ||
4134 | struct ocfs2_dx_entry *dx_entry; | ||
4135 | struct ocfs2_dx_leaf *target_leaf; | ||
4136 | |||
4137 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); | ||
4138 | if (ret) { | ||
4139 | mlog_errno(ret); | ||
4140 | goto out; | ||
4141 | } | ||
4142 | |||
4143 | dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves); | ||
4144 | if (!dx_leaves) { | ||
4145 | ret = -ENOMEM; | ||
4146 | mlog_errno(ret); | ||
4147 | goto out; | ||
4148 | } | ||
4149 | |||
4150 | handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb)); | ||
4151 | if (IS_ERR(handle)) { | ||
4152 | ret = PTR_ERR(handle); | ||
4153 | mlog_errno(ret); | ||
4154 | goto out; | ||
4155 | } | ||
4156 | |||
4157 | if (vfs_dq_alloc_space_nodirty(dir, | ||
4158 | ocfs2_clusters_to_bytes(osb->sb, 1))) { | ||
4159 | ret = -EDQUOT; | ||
4160 | goto out_commit; | ||
4161 | } | ||
4162 | did_quota = 1; | ||
4163 | |||
4164 | /* | ||
4165 | * We do this up front, before the allocation, so that a | ||
4166 | * failure to add the dx_root_bh to the journal won't result | ||
4167 | * us losing clusters. | ||
4168 | */ | ||
4169 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
4170 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4171 | if (ret) { | ||
4172 | mlog_errno(ret); | ||
4173 | goto out_commit; | ||
4174 | } | ||
4175 | |||
4176 | ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves, | ||
4177 | num_dx_leaves, &insert_blkno); | ||
4178 | if (ret) { | ||
4179 | mlog_errno(ret); | ||
4180 | goto out_commit; | ||
4181 | } | ||
4182 | |||
4183 | /* | ||
4184 | * Transfer the entries from our dx_root into the appropriate | ||
4185 | * block | ||
4186 | */ | ||
4187 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
4188 | entry_list = &dx_root->dr_entries; | ||
4189 | |||
4190 | for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) { | ||
4191 | dx_entry = &entry_list->de_entries[i]; | ||
4192 | |||
4193 | j = __ocfs2_dx_dir_hash_idx(osb, | ||
4194 | le32_to_cpu(dx_entry->dx_minor_hash)); | ||
4195 | target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data; | ||
4196 | |||
4197 | ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry); | ||
4198 | |||
4199 | /* Each leaf has been passed to the journal already | ||
4200 | * via __ocfs2_dx_dir_new_cluster() */ | ||
4201 | } | ||
4202 | |||
4203 | dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE; | ||
4204 | memset(&dx_root->dr_list, 0, osb->sb->s_blocksize - | ||
4205 | offsetof(struct ocfs2_dx_root_block, dr_list)); | ||
4206 | dx_root->dr_list.l_count = | ||
4207 | cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); | ||
4208 | |||
4209 | /* This should never fail considering we start with an empty | ||
4210 | * dx_root. */ | ||
4211 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | ||
4212 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, | ||
4213 | insert_blkno, 1, 0, NULL); | ||
4214 | if (ret) | ||
4215 | mlog_errno(ret); | ||
4216 | did_quota = 0; | ||
4217 | |||
4218 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
4219 | |||
4220 | out_commit: | ||
4221 | if (ret < 0 && did_quota) | ||
4222 | vfs_dq_free_space_nodirty(dir, | ||
4223 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | ||
4224 | |||
4225 | ocfs2_commit_trans(osb, handle); | ||
4226 | |||
4227 | out: | ||
4228 | if (data_ac) | ||
4229 | ocfs2_free_alloc_context(data_ac); | ||
4230 | |||
4231 | if (dx_leaves) { | ||
4232 | for (i = 0; i < num_dx_leaves; i++) | ||
4233 | brelse(dx_leaves[i]); | ||
4234 | kfree(dx_leaves); | ||
4235 | } | ||
4236 | return ret; | ||
4237 | } | ||
4238 | |||
4239 | static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh) | ||
4240 | { | ||
4241 | struct ocfs2_dx_root_block *dx_root; | ||
4242 | struct ocfs2_dx_entry_list *entry_list; | ||
4243 | |||
4244 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
4245 | entry_list = &dx_root->dr_entries; | ||
4246 | |||
4247 | if (le16_to_cpu(entry_list->de_num_used) >= | ||
4248 | le16_to_cpu(entry_list->de_count)) | ||
4249 | return -ENOSPC; | ||
4250 | |||
4251 | return 0; | ||
4252 | } | ||
4253 | |||
4254 | static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir, | ||
4255 | struct buffer_head *di_bh, | ||
4256 | const char *name, | ||
4257 | int namelen, | ||
4258 | struct ocfs2_dir_lookup_result *lookup) | ||
4259 | { | ||
4260 | int ret, free_dx_root = 1; | ||
4261 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
4262 | struct buffer_head *dx_root_bh = NULL; | ||
4263 | struct buffer_head *leaf_bh = NULL; | ||
4264 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4265 | struct ocfs2_dx_root_block *dx_root; | ||
4266 | |||
4267 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
4268 | if (ret) { | ||
4269 | mlog_errno(ret); | ||
4270 | goto out; | ||
4271 | } | ||
4272 | |||
4273 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4274 | if (le32_to_cpu(dx_root->dr_num_entries) == OCFS2_DX_ENTRIES_MAX) { | ||
4275 | ret = -ENOSPC; | ||
4276 | mlog_errno(ret); | ||
4277 | goto out; | ||
4278 | } | ||
4279 | |||
4280 | if (ocfs2_dx_root_inline(dx_root)) { | ||
4281 | ret = ocfs2_inline_dx_has_space(dx_root_bh); | ||
4282 | |||
4283 | if (ret == 0) | ||
4284 | goto search_el; | ||
4285 | |||
4286 | /* | ||
4287 | * We ran out of room in the root block. Expand it to | ||
4288 | * an extent, then allow ocfs2_find_dir_space_dx to do | ||
4289 | * the rest. | ||
4290 | */ | ||
4291 | ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh); | ||
4292 | if (ret) { | ||
4293 | mlog_errno(ret); | ||
4294 | goto out; | ||
4295 | } | ||
4296 | } | ||
4297 | |||
4298 | /* | ||
4299 | * Insert preparation for an indexed directory is split into two | ||
4300 | * steps. The call to find_dir_space_dx reserves room in the index for | ||
4301 | * an additional item. If we run out of space there, it's a real error | ||
4302 | * we can't continue on. | ||
4303 | */ | ||
4304 | ret = ocfs2_find_dir_space_dx(osb, dir, di_bh, dx_root_bh, name, | ||
4305 | namelen, lookup); | ||
4306 | if (ret) { | ||
4307 | mlog_errno(ret); | ||
4308 | goto out; | ||
4309 | } | ||
4310 | |||
4311 | search_el: | ||
4312 | /* | ||
4313 | * Next, we need to find space in the unindexed tree. This call | ||
4314 | * searches using the free space linked list. If the unindexed tree | ||
4315 | * lacks sufficient space, we'll expand it below. The expansion code | ||
4316 | * is smart enough to add any new blocks to the free space list. | ||
4317 | */ | ||
4318 | ret = ocfs2_search_dx_free_list(dir, dx_root_bh, namelen, lookup); | ||
4319 | if (ret && ret != -ENOSPC) { | ||
4320 | mlog_errno(ret); | ||
4321 | goto out; | ||
4322 | } | ||
4323 | |||
4324 | /* Do this up here - ocfs2_extend_dir might need the dx_root */ | ||
4325 | lookup->dl_dx_root_bh = dx_root_bh; | ||
4326 | free_dx_root = 0; | ||
4327 | |||
4328 | if (ret == -ENOSPC) { | ||
4329 | ret = ocfs2_extend_dir(osb, dir, di_bh, 1, lookup, &leaf_bh); | ||
4330 | |||
4331 | if (ret) { | ||
4332 | mlog_errno(ret); | ||
4333 | goto out; | ||
4334 | } | ||
4335 | |||
4336 | /* | ||
4337 | * We make the assumption here that new leaf blocks are added | ||
4338 | * to the front of our free list. | ||
4339 | */ | ||
4340 | lookup->dl_prev_leaf_bh = NULL; | ||
4341 | lookup->dl_leaf_bh = leaf_bh; | ||
4342 | } | ||
4343 | |||
4344 | out: | ||
4345 | if (free_dx_root) | ||
4346 | brelse(dx_root_bh); | ||
4347 | return ret; | ||
4348 | } | ||
4349 | |||
4350 | /* | ||
4351 | * Get a directory ready for insert. Any directory allocation required | ||
4352 | * happens here. Success returns zero, and enough context in the dir | ||
4353 | * lookup result that ocfs2_add_entry() will be able complete the task | ||
4354 | * with minimal performance impact. | ||
4355 | */ | ||
1973 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | 4356 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, |
1974 | struct inode *dir, | 4357 | struct inode *dir, |
1975 | struct buffer_head *parent_fe_bh, | 4358 | struct buffer_head *parent_fe_bh, |
1976 | const char *name, | 4359 | const char *name, |
1977 | int namelen, | 4360 | int namelen, |
1978 | struct buffer_head **ret_de_bh) | 4361 | struct ocfs2_dir_lookup_result *lookup) |
1979 | { | 4362 | { |
1980 | int ret; | 4363 | int ret; |
1981 | unsigned int blocks_wanted = 1; | 4364 | unsigned int blocks_wanted = 1; |
@@ -1984,14 +4367,34 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
1984 | mlog(0, "getting ready to insert namelen %d into dir %llu\n", | 4367 | mlog(0, "getting ready to insert namelen %d into dir %llu\n", |
1985 | namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); | 4368 | namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); |
1986 | 4369 | ||
1987 | *ret_de_bh = NULL; | ||
1988 | |||
1989 | if (!namelen) { | 4370 | if (!namelen) { |
1990 | ret = -EINVAL; | 4371 | ret = -EINVAL; |
1991 | mlog_errno(ret); | 4372 | mlog_errno(ret); |
1992 | goto out; | 4373 | goto out; |
1993 | } | 4374 | } |
1994 | 4375 | ||
4376 | /* | ||
4377 | * Do this up front to reduce confusion. | ||
4378 | * | ||
4379 | * The directory might start inline, then be turned into an | ||
4380 | * indexed one, in which case we'd need to hash deep inside | ||
4381 | * ocfs2_find_dir_space_id(). Since | ||
4382 | * ocfs2_prepare_dx_dir_for_insert() also needs this hash | ||
4383 | * done, there seems no point in spreading out the calls. We | ||
4384 | * can optimize away the case where the file system doesn't | ||
4385 | * support indexing. | ||
4386 | */ | ||
4387 | if (ocfs2_supports_indexed_dirs(osb)) | ||
4388 | ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo); | ||
4389 | |||
4390 | if (ocfs2_dir_indexed(dir)) { | ||
4391 | ret = ocfs2_prepare_dx_dir_for_insert(dir, parent_fe_bh, | ||
4392 | name, namelen, lookup); | ||
4393 | if (ret) | ||
4394 | mlog_errno(ret); | ||
4395 | goto out; | ||
4396 | } | ||
4397 | |||
1995 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 4398 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
1996 | ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name, | 4399 | ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name, |
1997 | namelen, &bh, &blocks_wanted); | 4400 | namelen, &bh, &blocks_wanted); |
@@ -2010,7 +4413,7 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
2010 | BUG_ON(bh); | 4413 | BUG_ON(bh); |
2011 | 4414 | ||
2012 | ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted, | 4415 | ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted, |
2013 | &bh); | 4416 | lookup, &bh); |
2014 | if (ret) { | 4417 | if (ret) { |
2015 | if (ret != -ENOSPC) | 4418 | if (ret != -ENOSPC) |
2016 | mlog_errno(ret); | 4419 | mlog_errno(ret); |
@@ -2020,9 +4423,154 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
2020 | BUG_ON(!bh); | 4423 | BUG_ON(!bh); |
2021 | } | 4424 | } |
2022 | 4425 | ||
2023 | *ret_de_bh = bh; | 4426 | lookup->dl_leaf_bh = bh; |
2024 | bh = NULL; | 4427 | bh = NULL; |
2025 | out: | 4428 | out: |
2026 | brelse(bh); | 4429 | brelse(bh); |
2027 | return ret; | 4430 | return ret; |
2028 | } | 4431 | } |
4432 | |||
4433 | static int ocfs2_dx_dir_remove_index(struct inode *dir, | ||
4434 | struct buffer_head *di_bh, | ||
4435 | struct buffer_head *dx_root_bh) | ||
4436 | { | ||
4437 | int ret; | ||
4438 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
4439 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4440 | struct ocfs2_dx_root_block *dx_root; | ||
4441 | struct inode *dx_alloc_inode = NULL; | ||
4442 | struct buffer_head *dx_alloc_bh = NULL; | ||
4443 | handle_t *handle; | ||
4444 | u64 blk; | ||
4445 | u16 bit; | ||
4446 | u64 bg_blkno; | ||
4447 | |||
4448 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
4449 | |||
4450 | dx_alloc_inode = ocfs2_get_system_file_inode(osb, | ||
4451 | EXTENT_ALLOC_SYSTEM_INODE, | ||
4452 | le16_to_cpu(dx_root->dr_suballoc_slot)); | ||
4453 | if (!dx_alloc_inode) { | ||
4454 | ret = -ENOMEM; | ||
4455 | mlog_errno(ret); | ||
4456 | goto out; | ||
4457 | } | ||
4458 | mutex_lock(&dx_alloc_inode->i_mutex); | ||
4459 | |||
4460 | ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1); | ||
4461 | if (ret) { | ||
4462 | mlog_errno(ret); | ||
4463 | goto out_mutex; | ||
4464 | } | ||
4465 | |||
4466 | handle = ocfs2_start_trans(osb, OCFS2_DX_ROOT_REMOVE_CREDITS); | ||
4467 | if (IS_ERR(handle)) { | ||
4468 | ret = PTR_ERR(handle); | ||
4469 | mlog_errno(ret); | ||
4470 | goto out_unlock; | ||
4471 | } | ||
4472 | |||
4473 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | ||
4474 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4475 | if (ret) { | ||
4476 | mlog_errno(ret); | ||
4477 | goto out_commit; | ||
4478 | } | ||
4479 | |||
4480 | OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; | ||
4481 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); | ||
4482 | di->i_dx_root = cpu_to_le64(0ULL); | ||
4483 | |||
4484 | ocfs2_journal_dirty(handle, di_bh); | ||
4485 | |||
4486 | blk = le64_to_cpu(dx_root->dr_blkno); | ||
4487 | bit = le16_to_cpu(dx_root->dr_suballoc_bit); | ||
4488 | bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
4489 | ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh, | ||
4490 | bit, bg_blkno, 1); | ||
4491 | if (ret) | ||
4492 | mlog_errno(ret); | ||
4493 | |||
4494 | out_commit: | ||
4495 | ocfs2_commit_trans(osb, handle); | ||
4496 | |||
4497 | out_unlock: | ||
4498 | ocfs2_inode_unlock(dx_alloc_inode, 1); | ||
4499 | |||
4500 | out_mutex: | ||
4501 | mutex_unlock(&dx_alloc_inode->i_mutex); | ||
4502 | brelse(dx_alloc_bh); | ||
4503 | out: | ||
4504 | iput(dx_alloc_inode); | ||
4505 | return ret; | ||
4506 | } | ||
4507 | |||
4508 | int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) | ||
4509 | { | ||
4510 | int ret; | ||
4511 | unsigned int uninitialized_var(clen); | ||
4512 | u32 major_hash = UINT_MAX, p_cpos, uninitialized_var(cpos); | ||
4513 | u64 uninitialized_var(blkno); | ||
4514 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
4515 | struct buffer_head *dx_root_bh = NULL; | ||
4516 | struct ocfs2_dx_root_block *dx_root; | ||
4517 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4518 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
4519 | struct ocfs2_extent_tree et; | ||
4520 | |||
4521 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
4522 | |||
4523 | if (!ocfs2_dir_indexed(dir)) | ||
4524 | return 0; | ||
4525 | |||
4526 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
4527 | if (ret) { | ||
4528 | mlog_errno(ret); | ||
4529 | goto out; | ||
4530 | } | ||
4531 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4532 | |||
4533 | if (ocfs2_dx_root_inline(dx_root)) | ||
4534 | goto remove_index; | ||
4535 | |||
4536 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | ||
4537 | |||
4538 | /* XXX: What if dr_clusters is too large? */ | ||
4539 | while (le32_to_cpu(dx_root->dr_clusters)) { | ||
4540 | ret = ocfs2_dx_dir_lookup_rec(dir, &dx_root->dr_list, | ||
4541 | major_hash, &cpos, &blkno, &clen); | ||
4542 | if (ret) { | ||
4543 | mlog_errno(ret); | ||
4544 | goto out; | ||
4545 | } | ||
4546 | |||
4547 | p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); | ||
4548 | |||
4549 | ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, | ||
4550 | &dealloc); | ||
4551 | if (ret) { | ||
4552 | mlog_errno(ret); | ||
4553 | goto out; | ||
4554 | } | ||
4555 | |||
4556 | if (cpos == 0) | ||
4557 | break; | ||
4558 | |||
4559 | major_hash = cpos - 1; | ||
4560 | } | ||
4561 | |||
4562 | remove_index: | ||
4563 | ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh); | ||
4564 | if (ret) { | ||
4565 | mlog_errno(ret); | ||
4566 | goto out; | ||
4567 | } | ||
4568 | |||
4569 | ocfs2_remove_from_cache(dir, dx_root_bh); | ||
4570 | out: | ||
4571 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
4572 | ocfs2_run_deallocs(osb, &dealloc); | ||
4573 | |||
4574 | brelse(dx_root_bh); | ||
4575 | return ret; | ||
4576 | } | ||
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h index c511e2e18e9f..e683f3deb645 100644 --- a/fs/ocfs2/dir.h +++ b/fs/ocfs2/dir.h | |||
@@ -26,44 +26,70 @@ | |||
26 | #ifndef OCFS2_DIR_H | 26 | #ifndef OCFS2_DIR_H |
27 | #define OCFS2_DIR_H | 27 | #define OCFS2_DIR_H |
28 | 28 | ||
29 | struct buffer_head *ocfs2_find_entry(const char *name, | 29 | struct ocfs2_dx_hinfo { |
30 | int namelen, | 30 | u32 major_hash; |
31 | struct inode *dir, | 31 | u32 minor_hash; |
32 | struct ocfs2_dir_entry **res_dir); | 32 | }; |
33 | |||
34 | struct ocfs2_dir_lookup_result { | ||
35 | struct buffer_head *dl_leaf_bh; /* Unindexed leaf | ||
36 | * block */ | ||
37 | struct ocfs2_dir_entry *dl_entry; /* Target dirent in | ||
38 | * unindexed leaf */ | ||
39 | |||
40 | struct buffer_head *dl_dx_root_bh; /* Root of indexed | ||
41 | * tree */ | ||
42 | |||
43 | struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */ | ||
44 | struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in | ||
45 | * indexed leaf */ | ||
46 | struct ocfs2_dx_hinfo dl_hinfo; /* Name hash results */ | ||
47 | |||
48 | struct buffer_head *dl_prev_leaf_bh;/* Previous entry in | ||
49 | * dir free space | ||
50 | * list. NULL if | ||
51 | * previous entry is | ||
52 | * dx root block. */ | ||
53 | }; | ||
54 | |||
55 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res); | ||
56 | |||
57 | int ocfs2_find_entry(const char *name, int namelen, | ||
58 | struct inode *dir, | ||
59 | struct ocfs2_dir_lookup_result *lookup); | ||
33 | int ocfs2_delete_entry(handle_t *handle, | 60 | int ocfs2_delete_entry(handle_t *handle, |
34 | struct inode *dir, | 61 | struct inode *dir, |
35 | struct ocfs2_dir_entry *de_del, | 62 | struct ocfs2_dir_lookup_result *res); |
36 | struct buffer_head *bh); | ||
37 | int __ocfs2_add_entry(handle_t *handle, | 63 | int __ocfs2_add_entry(handle_t *handle, |
38 | struct inode *dir, | 64 | struct inode *dir, |
39 | const char *name, int namelen, | 65 | const char *name, int namelen, |
40 | struct inode *inode, u64 blkno, | 66 | struct inode *inode, u64 blkno, |
41 | struct buffer_head *parent_fe_bh, | 67 | struct buffer_head *parent_fe_bh, |
42 | struct buffer_head *insert_bh); | 68 | struct ocfs2_dir_lookup_result *lookup); |
43 | static inline int ocfs2_add_entry(handle_t *handle, | 69 | static inline int ocfs2_add_entry(handle_t *handle, |
44 | struct dentry *dentry, | 70 | struct dentry *dentry, |
45 | struct inode *inode, u64 blkno, | 71 | struct inode *inode, u64 blkno, |
46 | struct buffer_head *parent_fe_bh, | 72 | struct buffer_head *parent_fe_bh, |
47 | struct buffer_head *insert_bh) | 73 | struct ocfs2_dir_lookup_result *lookup) |
48 | { | 74 | { |
49 | return __ocfs2_add_entry(handle, dentry->d_parent->d_inode, | 75 | return __ocfs2_add_entry(handle, dentry->d_parent->d_inode, |
50 | dentry->d_name.name, dentry->d_name.len, | 76 | dentry->d_name.name, dentry->d_name.len, |
51 | inode, blkno, parent_fe_bh, insert_bh); | 77 | inode, blkno, parent_fe_bh, lookup); |
52 | } | 78 | } |
53 | int ocfs2_update_entry(struct inode *dir, handle_t *handle, | 79 | int ocfs2_update_entry(struct inode *dir, handle_t *handle, |
54 | struct buffer_head *de_bh, struct ocfs2_dir_entry *de, | 80 | struct ocfs2_dir_lookup_result *res, |
55 | struct inode *new_entry_inode); | 81 | struct inode *new_entry_inode); |
56 | 82 | ||
57 | int ocfs2_check_dir_for_entry(struct inode *dir, | 83 | int ocfs2_check_dir_for_entry(struct inode *dir, |
58 | const char *name, | 84 | const char *name, |
59 | int namelen); | 85 | int namelen); |
60 | int ocfs2_empty_dir(struct inode *inode); | 86 | int ocfs2_empty_dir(struct inode *inode); |
87 | |||
61 | int ocfs2_find_files_on_disk(const char *name, | 88 | int ocfs2_find_files_on_disk(const char *name, |
62 | int namelen, | 89 | int namelen, |
63 | u64 *blkno, | 90 | u64 *blkno, |
64 | struct inode *inode, | 91 | struct inode *inode, |
65 | struct buffer_head **dirent_bh, | 92 | struct ocfs2_dir_lookup_result *res); |
66 | struct ocfs2_dir_entry **dirent); | ||
67 | int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, | 93 | int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, |
68 | int namelen, u64 *blkno); | 94 | int namelen, u64 *blkno); |
69 | int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir); | 95 | int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir); |
@@ -74,14 +100,17 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
74 | struct buffer_head *parent_fe_bh, | 100 | struct buffer_head *parent_fe_bh, |
75 | const char *name, | 101 | const char *name, |
76 | int namelen, | 102 | int namelen, |
77 | struct buffer_head **ret_de_bh); | 103 | struct ocfs2_dir_lookup_result *lookup); |
78 | struct ocfs2_alloc_context; | 104 | struct ocfs2_alloc_context; |
79 | int ocfs2_fill_new_dir(struct ocfs2_super *osb, | 105 | int ocfs2_fill_new_dir(struct ocfs2_super *osb, |
80 | handle_t *handle, | 106 | handle_t *handle, |
81 | struct inode *parent, | 107 | struct inode *parent, |
82 | struct inode *inode, | 108 | struct inode *inode, |
83 | struct buffer_head *fe_bh, | 109 | struct buffer_head *fe_bh, |
84 | struct ocfs2_alloc_context *data_ac); | 110 | struct ocfs2_alloc_context *data_ac, |
111 | struct ocfs2_alloc_context *meta_ac); | ||
112 | |||
113 | int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh); | ||
85 | 114 | ||
86 | struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize, | 115 | struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize, |
87 | void *data); | 116 | void *data); |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index bb53714813ab..0102be35980c 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -52,16 +52,12 @@ | |||
52 | enum dlm_mle_type { | 52 | enum dlm_mle_type { |
53 | DLM_MLE_BLOCK, | 53 | DLM_MLE_BLOCK, |
54 | DLM_MLE_MASTER, | 54 | DLM_MLE_MASTER, |
55 | DLM_MLE_MIGRATION | 55 | DLM_MLE_MIGRATION, |
56 | }; | 56 | DLM_MLE_NUM_TYPES |
57 | |||
58 | struct dlm_lock_name { | ||
59 | u8 len; | ||
60 | u8 name[DLM_LOCKID_NAME_MAX]; | ||
61 | }; | 57 | }; |
62 | 58 | ||
63 | struct dlm_master_list_entry { | 59 | struct dlm_master_list_entry { |
64 | struct list_head list; | 60 | struct hlist_node master_hash_node; |
65 | struct list_head hb_events; | 61 | struct list_head hb_events; |
66 | struct dlm_ctxt *dlm; | 62 | struct dlm_ctxt *dlm; |
67 | spinlock_t spinlock; | 63 | spinlock_t spinlock; |
@@ -78,10 +74,10 @@ struct dlm_master_list_entry { | |||
78 | enum dlm_mle_type type; | 74 | enum dlm_mle_type type; |
79 | struct o2hb_callback_func mle_hb_up; | 75 | struct o2hb_callback_func mle_hb_up; |
80 | struct o2hb_callback_func mle_hb_down; | 76 | struct o2hb_callback_func mle_hb_down; |
81 | union { | 77 | struct dlm_lock_resource *mleres; |
82 | struct dlm_lock_resource *res; | 78 | unsigned char mname[DLM_LOCKID_NAME_MAX]; |
83 | struct dlm_lock_name name; | 79 | unsigned int mnamelen; |
84 | } u; | 80 | unsigned int mnamehash; |
85 | }; | 81 | }; |
86 | 82 | ||
87 | enum dlm_ast_type { | 83 | enum dlm_ast_type { |
@@ -151,13 +147,14 @@ struct dlm_ctxt | |||
151 | unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 147 | unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
152 | struct dlm_recovery_ctxt reco; | 148 | struct dlm_recovery_ctxt reco; |
153 | spinlock_t master_lock; | 149 | spinlock_t master_lock; |
154 | struct list_head master_list; | 150 | struct hlist_head **master_hash; |
155 | struct list_head mle_hb_events; | 151 | struct list_head mle_hb_events; |
156 | 152 | ||
157 | /* these give a really vague idea of the system load */ | 153 | /* these give a really vague idea of the system load */ |
158 | atomic_t local_resources; | 154 | atomic_t mle_tot_count[DLM_MLE_NUM_TYPES]; |
159 | atomic_t remote_resources; | 155 | atomic_t mle_cur_count[DLM_MLE_NUM_TYPES]; |
160 | atomic_t unknown_resources; | 156 | atomic_t res_tot_count; |
157 | atomic_t res_cur_count; | ||
161 | 158 | ||
162 | struct dlm_debug_ctxt *dlm_debug_ctxt; | 159 | struct dlm_debug_ctxt *dlm_debug_ctxt; |
163 | struct dentry *dlm_debugfs_subroot; | 160 | struct dentry *dlm_debugfs_subroot; |
@@ -195,6 +192,13 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned | |||
195 | return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE); | 192 | return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE); |
196 | } | 193 | } |
197 | 194 | ||
195 | static inline struct hlist_head *dlm_master_hash(struct dlm_ctxt *dlm, | ||
196 | unsigned i) | ||
197 | { | ||
198 | return dlm->master_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + | ||
199 | (i % DLM_BUCKETS_PER_PAGE); | ||
200 | } | ||
201 | |||
198 | /* these keventd work queue items are for less-frequently | 202 | /* these keventd work queue items are for less-frequently |
199 | * called functions that cannot be directly called from the | 203 | * called functions that cannot be directly called from the |
200 | * net message handlers for some reason, usually because | 204 | * net message handlers for some reason, usually because |
@@ -848,9 +852,7 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, | |||
848 | unsigned int len); | 852 | unsigned int len); |
849 | 853 | ||
850 | int dlm_is_host_down(int errno); | 854 | int dlm_is_host_down(int errno); |
851 | void dlm_change_lockres_owner(struct dlm_ctxt *dlm, | 855 | |
852 | struct dlm_lock_resource *res, | ||
853 | u8 owner); | ||
854 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | 856 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, |
855 | const char *lockid, | 857 | const char *lockid, |
856 | int namelen, | 858 | int namelen, |
@@ -1008,6 +1010,9 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) | |||
1008 | DLM_LOCK_RES_MIGRATING)); | 1010 | DLM_LOCK_RES_MIGRATING)); |
1009 | } | 1011 | } |
1010 | 1012 | ||
1013 | void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle); | ||
1014 | void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle); | ||
1015 | |||
1011 | /* create/destroy slab caches */ | 1016 | /* create/destroy slab caches */ |
1012 | int dlm_init_master_caches(void); | 1017 | int dlm_init_master_caches(void); |
1013 | void dlm_destroy_master_caches(void); | 1018 | void dlm_destroy_master_caches(void); |
@@ -1110,6 +1115,23 @@ static inline int dlm_node_iter_next(struct dlm_node_iter *iter) | |||
1110 | return bit; | 1115 | return bit; |
1111 | } | 1116 | } |
1112 | 1117 | ||
1118 | static inline void dlm_set_lockres_owner(struct dlm_ctxt *dlm, | ||
1119 | struct dlm_lock_resource *res, | ||
1120 | u8 owner) | ||
1121 | { | ||
1122 | assert_spin_locked(&res->spinlock); | ||
1123 | |||
1124 | res->owner = owner; | ||
1125 | } | ||
1113 | 1126 | ||
1127 | static inline void dlm_change_lockres_owner(struct dlm_ctxt *dlm, | ||
1128 | struct dlm_lock_resource *res, | ||
1129 | u8 owner) | ||
1130 | { | ||
1131 | assert_spin_locked(&res->spinlock); | ||
1132 | |||
1133 | if (owner != res->owner) | ||
1134 | dlm_set_lockres_owner(dlm, res, owner); | ||
1135 | } | ||
1114 | 1136 | ||
1115 | #endif /* DLMCOMMON_H */ | 1137 | #endif /* DLMCOMMON_H */ |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index b32f60a5acfb..df52f706f669 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -287,18 +287,8 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes, | |||
287 | static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) | 287 | static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) |
288 | { | 288 | { |
289 | int out = 0; | 289 | int out = 0; |
290 | unsigned int namelen; | ||
291 | const char *name; | ||
292 | char *mle_type; | 290 | char *mle_type; |
293 | 291 | ||
294 | if (mle->type != DLM_MLE_MASTER) { | ||
295 | namelen = mle->u.name.len; | ||
296 | name = mle->u.name.name; | ||
297 | } else { | ||
298 | namelen = mle->u.res->lockname.len; | ||
299 | name = mle->u.res->lockname.name; | ||
300 | } | ||
301 | |||
302 | if (mle->type == DLM_MLE_BLOCK) | 292 | if (mle->type == DLM_MLE_BLOCK) |
303 | mle_type = "BLK"; | 293 | mle_type = "BLK"; |
304 | else if (mle->type == DLM_MLE_MASTER) | 294 | else if (mle->type == DLM_MLE_MASTER) |
@@ -306,7 +296,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) | |||
306 | else | 296 | else |
307 | mle_type = "MIG"; | 297 | mle_type = "MIG"; |
308 | 298 | ||
309 | out += stringify_lockname(name, namelen, buf + out, len - out); | 299 | out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out); |
310 | out += snprintf(buf + out, len - out, | 300 | out += snprintf(buf + out, len - out, |
311 | "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", | 301 | "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", |
312 | mle_type, mle->master, mle->new_master, | 302 | mle_type, mle->master, mle->new_master, |
@@ -501,23 +491,33 @@ static struct file_operations debug_purgelist_fops = { | |||
501 | static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | 491 | static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) |
502 | { | 492 | { |
503 | struct dlm_master_list_entry *mle; | 493 | struct dlm_master_list_entry *mle; |
504 | int out = 0; | 494 | struct hlist_head *bucket; |
505 | unsigned long total = 0; | 495 | struct hlist_node *list; |
496 | int i, out = 0; | ||
497 | unsigned long total = 0, longest = 0, bktcnt; | ||
506 | 498 | ||
507 | out += snprintf(db->buf + out, db->len - out, | 499 | out += snprintf(db->buf + out, db->len - out, |
508 | "Dumping MLEs for Domain: %s\n", dlm->name); | 500 | "Dumping MLEs for Domain: %s\n", dlm->name); |
509 | 501 | ||
510 | spin_lock(&dlm->master_lock); | 502 | spin_lock(&dlm->master_lock); |
511 | list_for_each_entry(mle, &dlm->master_list, list) { | 503 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
512 | ++total; | 504 | bucket = dlm_master_hash(dlm, i); |
513 | if (db->len - out < 200) | 505 | hlist_for_each(list, bucket) { |
514 | continue; | 506 | mle = hlist_entry(list, struct dlm_master_list_entry, |
515 | out += dump_mle(mle, db->buf + out, db->len - out); | 507 | master_hash_node); |
508 | ++total; | ||
509 | ++bktcnt; | ||
510 | if (db->len - out < 200) | ||
511 | continue; | ||
512 | out += dump_mle(mle, db->buf + out, db->len - out); | ||
513 | } | ||
514 | longest = max(longest, bktcnt); | ||
515 | bktcnt = 0; | ||
516 | } | 516 | } |
517 | spin_unlock(&dlm->master_lock); | 517 | spin_unlock(&dlm->master_lock); |
518 | 518 | ||
519 | out += snprintf(db->buf + out, db->len - out, | 519 | out += snprintf(db->buf + out, db->len - out, |
520 | "Total on list: %ld\n", total); | 520 | "Total: %ld, Longest: %ld\n", total, longest); |
521 | return out; | 521 | return out; |
522 | } | 522 | } |
523 | 523 | ||
@@ -756,12 +756,8 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
756 | int out = 0; | 756 | int out = 0; |
757 | struct dlm_reco_node_data *node; | 757 | struct dlm_reco_node_data *node; |
758 | char *state; | 758 | char *state; |
759 | int lres, rres, ures, tres; | 759 | int cur_mles = 0, tot_mles = 0; |
760 | 760 | int i; | |
761 | lres = atomic_read(&dlm->local_resources); | ||
762 | rres = atomic_read(&dlm->remote_resources); | ||
763 | ures = atomic_read(&dlm->unknown_resources); | ||
764 | tres = lres + rres + ures; | ||
765 | 761 | ||
766 | spin_lock(&dlm->spinlock); | 762 | spin_lock(&dlm->spinlock); |
767 | 763 | ||
@@ -804,21 +800,48 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
804 | db->buf + out, db->len - out); | 800 | db->buf + out, db->len - out); |
805 | out += snprintf(db->buf + out, db->len - out, "\n"); | 801 | out += snprintf(db->buf + out, db->len - out, "\n"); |
806 | 802 | ||
807 | /* Mastered Resources Total: xxx Locally: xxx Remotely: ... */ | 803 | /* Lock Resources: xxx (xxx) */ |
804 | out += snprintf(db->buf + out, db->len - out, | ||
805 | "Lock Resources: %d (%d)\n", | ||
806 | atomic_read(&dlm->res_cur_count), | ||
807 | atomic_read(&dlm->res_tot_count)); | ||
808 | |||
809 | for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) | ||
810 | tot_mles += atomic_read(&dlm->mle_tot_count[i]); | ||
811 | |||
812 | for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) | ||
813 | cur_mles += atomic_read(&dlm->mle_cur_count[i]); | ||
814 | |||
815 | /* MLEs: xxx (xxx) */ | ||
816 | out += snprintf(db->buf + out, db->len - out, | ||
817 | "MLEs: %d (%d)\n", cur_mles, tot_mles); | ||
818 | |||
819 | /* Blocking: xxx (xxx) */ | ||
820 | out += snprintf(db->buf + out, db->len - out, | ||
821 | " Blocking: %d (%d)\n", | ||
822 | atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]), | ||
823 | atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK])); | ||
824 | |||
825 | /* Mastery: xxx (xxx) */ | ||
826 | out += snprintf(db->buf + out, db->len - out, | ||
827 | " Mastery: %d (%d)\n", | ||
828 | atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]), | ||
829 | atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER])); | ||
830 | |||
831 | /* Migration: xxx (xxx) */ | ||
808 | out += snprintf(db->buf + out, db->len - out, | 832 | out += snprintf(db->buf + out, db->len - out, |
809 | "Mastered Resources Total: %d Locally: %d " | 833 | " Migration: %d (%d)\n", |
810 | "Remotely: %d Unknown: %d\n", | 834 | atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]), |
811 | tres, lres, rres, ures); | 835 | atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION])); |
812 | 836 | ||
813 | /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ | 837 | /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ |
814 | out += snprintf(db->buf + out, db->len - out, | 838 | out += snprintf(db->buf + out, db->len - out, |
815 | "Lists: Dirty=%s Purge=%s PendingASTs=%s " | 839 | "Lists: Dirty=%s Purge=%s PendingASTs=%s " |
816 | "PendingBASTs=%s Master=%s\n", | 840 | "PendingBASTs=%s\n", |
817 | (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), | 841 | (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), |
818 | (list_empty(&dlm->purge_list) ? "Empty" : "InUse"), | 842 | (list_empty(&dlm->purge_list) ? "Empty" : "InUse"), |
819 | (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), | 843 | (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), |
820 | (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"), | 844 | (list_empty(&dlm->pending_basts) ? "Empty" : "InUse")); |
821 | (list_empty(&dlm->master_list) ? "Empty" : "InUse")); | ||
822 | 845 | ||
823 | /* Purge Count: xxx Refs: xxx */ | 846 | /* Purge Count: xxx Refs: xxx */ |
824 | out += snprintf(db->buf + out, db->len - out, | 847 | out += snprintf(db->buf + out, db->len - out, |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index d8d578f45613..4d9e6b288dd8 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -304,6 +304,9 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) | |||
304 | if (dlm->lockres_hash) | 304 | if (dlm->lockres_hash) |
305 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | 305 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); |
306 | 306 | ||
307 | if (dlm->master_hash) | ||
308 | dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); | ||
309 | |||
307 | if (dlm->name) | 310 | if (dlm->name) |
308 | kfree(dlm->name); | 311 | kfree(dlm->name); |
309 | 312 | ||
@@ -1534,12 +1537,27 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
1534 | for (i = 0; i < DLM_HASH_BUCKETS; i++) | 1537 | for (i = 0; i < DLM_HASH_BUCKETS; i++) |
1535 | INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); | 1538 | INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); |
1536 | 1539 | ||
1540 | dlm->master_hash = (struct hlist_head **) | ||
1541 | dlm_alloc_pagevec(DLM_HASH_PAGES); | ||
1542 | if (!dlm->master_hash) { | ||
1543 | mlog_errno(-ENOMEM); | ||
1544 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | ||
1545 | kfree(dlm->name); | ||
1546 | kfree(dlm); | ||
1547 | dlm = NULL; | ||
1548 | goto leave; | ||
1549 | } | ||
1550 | |||
1551 | for (i = 0; i < DLM_HASH_BUCKETS; i++) | ||
1552 | INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); | ||
1553 | |||
1537 | strcpy(dlm->name, domain); | 1554 | strcpy(dlm->name, domain); |
1538 | dlm->key = key; | 1555 | dlm->key = key; |
1539 | dlm->node_num = o2nm_this_node(); | 1556 | dlm->node_num = o2nm_this_node(); |
1540 | 1557 | ||
1541 | ret = dlm_create_debugfs_subroot(dlm); | 1558 | ret = dlm_create_debugfs_subroot(dlm); |
1542 | if (ret < 0) { | 1559 | if (ret < 0) { |
1560 | dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); | ||
1543 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | 1561 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); |
1544 | kfree(dlm->name); | 1562 | kfree(dlm->name); |
1545 | kfree(dlm); | 1563 | kfree(dlm); |
@@ -1579,7 +1597,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
1579 | init_waitqueue_head(&dlm->reco.event); | 1597 | init_waitqueue_head(&dlm->reco.event); |
1580 | init_waitqueue_head(&dlm->ast_wq); | 1598 | init_waitqueue_head(&dlm->ast_wq); |
1581 | init_waitqueue_head(&dlm->migration_wq); | 1599 | init_waitqueue_head(&dlm->migration_wq); |
1582 | INIT_LIST_HEAD(&dlm->master_list); | ||
1583 | INIT_LIST_HEAD(&dlm->mle_hb_events); | 1600 | INIT_LIST_HEAD(&dlm->mle_hb_events); |
1584 | 1601 | ||
1585 | dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; | 1602 | dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; |
@@ -1587,9 +1604,13 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
1587 | 1604 | ||
1588 | dlm->reco.new_master = O2NM_INVALID_NODE_NUM; | 1605 | dlm->reco.new_master = O2NM_INVALID_NODE_NUM; |
1589 | dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; | 1606 | dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; |
1590 | atomic_set(&dlm->local_resources, 0); | 1607 | |
1591 | atomic_set(&dlm->remote_resources, 0); | 1608 | atomic_set(&dlm->res_tot_count, 0); |
1592 | atomic_set(&dlm->unknown_resources, 0); | 1609 | atomic_set(&dlm->res_cur_count, 0); |
1610 | for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) { | ||
1611 | atomic_set(&dlm->mle_tot_count[i], 0); | ||
1612 | atomic_set(&dlm->mle_cur_count[i], 0); | ||
1613 | } | ||
1593 | 1614 | ||
1594 | spin_lock_init(&dlm->work_lock); | 1615 | spin_lock_init(&dlm->work_lock); |
1595 | INIT_LIST_HEAD(&dlm->work_list); | 1616 | INIT_LIST_HEAD(&dlm->work_list); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 0a2813947853..f8b653fcd4dd 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -73,22 +73,13 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm, | |||
73 | const char *name, | 73 | const char *name, |
74 | unsigned int namelen) | 74 | unsigned int namelen) |
75 | { | 75 | { |
76 | struct dlm_lock_resource *res; | ||
77 | |||
78 | if (dlm != mle->dlm) | 76 | if (dlm != mle->dlm) |
79 | return 0; | 77 | return 0; |
80 | 78 | ||
81 | if (mle->type == DLM_MLE_BLOCK || | 79 | if (namelen != mle->mnamelen || |
82 | mle->type == DLM_MLE_MIGRATION) { | 80 | memcmp(name, mle->mname, namelen) != 0) |
83 | if (namelen != mle->u.name.len || | 81 | return 0; |
84 | memcmp(name, mle->u.name.name, namelen)!=0) | 82 | |
85 | return 0; | ||
86 | } else { | ||
87 | res = mle->u.res; | ||
88 | if (namelen != res->lockname.len || | ||
89 | memcmp(res->lockname.name, name, namelen) != 0) | ||
90 | return 0; | ||
91 | } | ||
92 | return 1; | 83 | return 1; |
93 | } | 84 | } |
94 | 85 | ||
@@ -283,7 +274,7 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, | |||
283 | 274 | ||
284 | mle->dlm = dlm; | 275 | mle->dlm = dlm; |
285 | mle->type = type; | 276 | mle->type = type; |
286 | INIT_LIST_HEAD(&mle->list); | 277 | INIT_HLIST_NODE(&mle->master_hash_node); |
287 | INIT_LIST_HEAD(&mle->hb_events); | 278 | INIT_LIST_HEAD(&mle->hb_events); |
288 | memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); | 279 | memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); |
289 | spin_lock_init(&mle->spinlock); | 280 | spin_lock_init(&mle->spinlock); |
@@ -295,19 +286,27 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, | |||
295 | mle->new_master = O2NM_MAX_NODES; | 286 | mle->new_master = O2NM_MAX_NODES; |
296 | mle->inuse = 0; | 287 | mle->inuse = 0; |
297 | 288 | ||
289 | BUG_ON(mle->type != DLM_MLE_BLOCK && | ||
290 | mle->type != DLM_MLE_MASTER && | ||
291 | mle->type != DLM_MLE_MIGRATION); | ||
292 | |||
298 | if (mle->type == DLM_MLE_MASTER) { | 293 | if (mle->type == DLM_MLE_MASTER) { |
299 | BUG_ON(!res); | 294 | BUG_ON(!res); |
300 | mle->u.res = res; | 295 | mle->mleres = res; |
301 | } else if (mle->type == DLM_MLE_BLOCK) { | 296 | memcpy(mle->mname, res->lockname.name, res->lockname.len); |
302 | BUG_ON(!name); | 297 | mle->mnamelen = res->lockname.len; |
303 | memcpy(mle->u.name.name, name, namelen); | 298 | mle->mnamehash = res->lockname.hash; |
304 | mle->u.name.len = namelen; | 299 | } else { |
305 | } else /* DLM_MLE_MIGRATION */ { | ||
306 | BUG_ON(!name); | 300 | BUG_ON(!name); |
307 | memcpy(mle->u.name.name, name, namelen); | 301 | mle->mleres = NULL; |
308 | mle->u.name.len = namelen; | 302 | memcpy(mle->mname, name, namelen); |
303 | mle->mnamelen = namelen; | ||
304 | mle->mnamehash = dlm_lockid_hash(name, namelen); | ||
309 | } | 305 | } |
310 | 306 | ||
307 | atomic_inc(&dlm->mle_tot_count[mle->type]); | ||
308 | atomic_inc(&dlm->mle_cur_count[mle->type]); | ||
309 | |||
311 | /* copy off the node_map and register hb callbacks on our copy */ | 310 | /* copy off the node_map and register hb callbacks on our copy */ |
312 | memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map)); | 311 | memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map)); |
313 | memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map)); | 312 | memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map)); |
@@ -318,6 +317,24 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, | |||
318 | __dlm_mle_attach_hb_events(dlm, mle); | 317 | __dlm_mle_attach_hb_events(dlm, mle); |
319 | } | 318 | } |
320 | 319 | ||
320 | void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) | ||
321 | { | ||
322 | assert_spin_locked(&dlm->spinlock); | ||
323 | assert_spin_locked(&dlm->master_lock); | ||
324 | |||
325 | if (!hlist_unhashed(&mle->master_hash_node)) | ||
326 | hlist_del_init(&mle->master_hash_node); | ||
327 | } | ||
328 | |||
329 | void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) | ||
330 | { | ||
331 | struct hlist_head *bucket; | ||
332 | |||
333 | assert_spin_locked(&dlm->master_lock); | ||
334 | |||
335 | bucket = dlm_master_hash(dlm, mle->mnamehash); | ||
336 | hlist_add_head(&mle->master_hash_node, bucket); | ||
337 | } | ||
321 | 338 | ||
322 | /* returns 1 if found, 0 if not */ | 339 | /* returns 1 if found, 0 if not */ |
323 | static int dlm_find_mle(struct dlm_ctxt *dlm, | 340 | static int dlm_find_mle(struct dlm_ctxt *dlm, |
@@ -325,10 +342,17 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, | |||
325 | char *name, unsigned int namelen) | 342 | char *name, unsigned int namelen) |
326 | { | 343 | { |
327 | struct dlm_master_list_entry *tmpmle; | 344 | struct dlm_master_list_entry *tmpmle; |
345 | struct hlist_head *bucket; | ||
346 | struct hlist_node *list; | ||
347 | unsigned int hash; | ||
328 | 348 | ||
329 | assert_spin_locked(&dlm->master_lock); | 349 | assert_spin_locked(&dlm->master_lock); |
330 | 350 | ||
331 | list_for_each_entry(tmpmle, &dlm->master_list, list) { | 351 | hash = dlm_lockid_hash(name, namelen); |
352 | bucket = dlm_master_hash(dlm, hash); | ||
353 | hlist_for_each(list, bucket) { | ||
354 | tmpmle = hlist_entry(list, struct dlm_master_list_entry, | ||
355 | master_hash_node); | ||
332 | if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) | 356 | if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) |
333 | continue; | 357 | continue; |
334 | dlm_get_mle(tmpmle); | 358 | dlm_get_mle(tmpmle); |
@@ -408,24 +432,20 @@ static void dlm_mle_release(struct kref *kref) | |||
408 | mle = container_of(kref, struct dlm_master_list_entry, mle_refs); | 432 | mle = container_of(kref, struct dlm_master_list_entry, mle_refs); |
409 | dlm = mle->dlm; | 433 | dlm = mle->dlm; |
410 | 434 | ||
411 | if (mle->type != DLM_MLE_MASTER) { | ||
412 | mlog(0, "calling mle_release for %.*s, type %d\n", | ||
413 | mle->u.name.len, mle->u.name.name, mle->type); | ||
414 | } else { | ||
415 | mlog(0, "calling mle_release for %.*s, type %d\n", | ||
416 | mle->u.res->lockname.len, | ||
417 | mle->u.res->lockname.name, mle->type); | ||
418 | } | ||
419 | assert_spin_locked(&dlm->spinlock); | 435 | assert_spin_locked(&dlm->spinlock); |
420 | assert_spin_locked(&dlm->master_lock); | 436 | assert_spin_locked(&dlm->master_lock); |
421 | 437 | ||
438 | mlog(0, "Releasing mle for %.*s, type %d\n", mle->mnamelen, mle->mname, | ||
439 | mle->type); | ||
440 | |||
422 | /* remove from list if not already */ | 441 | /* remove from list if not already */ |
423 | if (!list_empty(&mle->list)) | 442 | __dlm_unlink_mle(dlm, mle); |
424 | list_del_init(&mle->list); | ||
425 | 443 | ||
426 | /* detach the mle from the domain node up/down events */ | 444 | /* detach the mle from the domain node up/down events */ |
427 | __dlm_mle_detach_hb_events(dlm, mle); | 445 | __dlm_mle_detach_hb_events(dlm, mle); |
428 | 446 | ||
447 | atomic_dec(&dlm->mle_cur_count[mle->type]); | ||
448 | |||
429 | /* NOTE: kfree under spinlock here. | 449 | /* NOTE: kfree under spinlock here. |
430 | * if this is bad, we can move this to a freelist. */ | 450 | * if this is bad, we can move this to a freelist. */ |
431 | kmem_cache_free(dlm_mle_cache, mle); | 451 | kmem_cache_free(dlm_mle_cache, mle); |
@@ -465,43 +485,6 @@ void dlm_destroy_master_caches(void) | |||
465 | kmem_cache_destroy(dlm_lockres_cache); | 485 | kmem_cache_destroy(dlm_lockres_cache); |
466 | } | 486 | } |
467 | 487 | ||
468 | static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, | ||
469 | struct dlm_lock_resource *res, | ||
470 | u8 owner) | ||
471 | { | ||
472 | assert_spin_locked(&res->spinlock); | ||
473 | |||
474 | mlog_entry("%.*s, %u\n", res->lockname.len, res->lockname.name, owner); | ||
475 | |||
476 | if (owner == dlm->node_num) | ||
477 | atomic_inc(&dlm->local_resources); | ||
478 | else if (owner == DLM_LOCK_RES_OWNER_UNKNOWN) | ||
479 | atomic_inc(&dlm->unknown_resources); | ||
480 | else | ||
481 | atomic_inc(&dlm->remote_resources); | ||
482 | |||
483 | res->owner = owner; | ||
484 | } | ||
485 | |||
486 | void dlm_change_lockres_owner(struct dlm_ctxt *dlm, | ||
487 | struct dlm_lock_resource *res, u8 owner) | ||
488 | { | ||
489 | assert_spin_locked(&res->spinlock); | ||
490 | |||
491 | if (owner == res->owner) | ||
492 | return; | ||
493 | |||
494 | if (res->owner == dlm->node_num) | ||
495 | atomic_dec(&dlm->local_resources); | ||
496 | else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) | ||
497 | atomic_dec(&dlm->unknown_resources); | ||
498 | else | ||
499 | atomic_dec(&dlm->remote_resources); | ||
500 | |||
501 | dlm_set_lockres_owner(dlm, res, owner); | ||
502 | } | ||
503 | |||
504 | |||
505 | static void dlm_lockres_release(struct kref *kref) | 488 | static void dlm_lockres_release(struct kref *kref) |
506 | { | 489 | { |
507 | struct dlm_lock_resource *res; | 490 | struct dlm_lock_resource *res; |
@@ -527,6 +510,8 @@ static void dlm_lockres_release(struct kref *kref) | |||
527 | } | 510 | } |
528 | spin_unlock(&dlm->track_lock); | 511 | spin_unlock(&dlm->track_lock); |
529 | 512 | ||
513 | atomic_dec(&dlm->res_cur_count); | ||
514 | |||
530 | dlm_put(dlm); | 515 | dlm_put(dlm); |
531 | 516 | ||
532 | if (!hlist_unhashed(&res->hash_node) || | 517 | if (!hlist_unhashed(&res->hash_node) || |
@@ -607,6 +592,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
607 | 592 | ||
608 | kref_init(&res->refs); | 593 | kref_init(&res->refs); |
609 | 594 | ||
595 | atomic_inc(&dlm->res_tot_count); | ||
596 | atomic_inc(&dlm->res_cur_count); | ||
597 | |||
610 | /* just for consistency */ | 598 | /* just for consistency */ |
611 | spin_lock(&res->spinlock); | 599 | spin_lock(&res->spinlock); |
612 | dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN); | 600 | dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN); |
@@ -843,7 +831,7 @@ lookup: | |||
843 | alloc_mle = NULL; | 831 | alloc_mle = NULL; |
844 | dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0); | 832 | dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0); |
845 | set_bit(dlm->node_num, mle->maybe_map); | 833 | set_bit(dlm->node_num, mle->maybe_map); |
846 | list_add(&mle->list, &dlm->master_list); | 834 | __dlm_insert_mle(dlm, mle); |
847 | 835 | ||
848 | /* still holding the dlm spinlock, check the recovery map | 836 | /* still holding the dlm spinlock, check the recovery map |
849 | * to see if there are any nodes that still need to be | 837 | * to see if there are any nodes that still need to be |
@@ -1270,7 +1258,7 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, | |||
1270 | res->lockname.len, | 1258 | res->lockname.len, |
1271 | res->lockname.name); | 1259 | res->lockname.name); |
1272 | mle->type = DLM_MLE_MASTER; | 1260 | mle->type = DLM_MLE_MASTER; |
1273 | mle->u.res = res; | 1261 | mle->mleres = res; |
1274 | } | 1262 | } |
1275 | } | 1263 | } |
1276 | } | 1264 | } |
@@ -1315,14 +1303,8 @@ static int dlm_do_master_request(struct dlm_lock_resource *res, | |||
1315 | 1303 | ||
1316 | BUG_ON(mle->type == DLM_MLE_MIGRATION); | 1304 | BUG_ON(mle->type == DLM_MLE_MIGRATION); |
1317 | 1305 | ||
1318 | if (mle->type != DLM_MLE_MASTER) { | 1306 | request.namelen = (u8)mle->mnamelen; |
1319 | request.namelen = mle->u.name.len; | 1307 | memcpy(request.name, mle->mname, request.namelen); |
1320 | memcpy(request.name, mle->u.name.name, request.namelen); | ||
1321 | } else { | ||
1322 | request.namelen = mle->u.res->lockname.len; | ||
1323 | memcpy(request.name, mle->u.res->lockname.name, | ||
1324 | request.namelen); | ||
1325 | } | ||
1326 | 1308 | ||
1327 | again: | 1309 | again: |
1328 | ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request, | 1310 | ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request, |
@@ -1575,7 +1557,7 @@ way_up_top: | |||
1575 | // "add the block.\n"); | 1557 | // "add the block.\n"); |
1576 | dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen); | 1558 | dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen); |
1577 | set_bit(request->node_idx, mle->maybe_map); | 1559 | set_bit(request->node_idx, mle->maybe_map); |
1578 | list_add(&mle->list, &dlm->master_list); | 1560 | __dlm_insert_mle(dlm, mle); |
1579 | response = DLM_MASTER_RESP_NO; | 1561 | response = DLM_MASTER_RESP_NO; |
1580 | } else { | 1562 | } else { |
1581 | // mlog(0, "mle was found\n"); | 1563 | // mlog(0, "mle was found\n"); |
@@ -1967,7 +1949,7 @@ ok: | |||
1967 | assert->node_idx, rr, extra_ref, mle->inuse); | 1949 | assert->node_idx, rr, extra_ref, mle->inuse); |
1968 | dlm_print_one_mle(mle); | 1950 | dlm_print_one_mle(mle); |
1969 | } | 1951 | } |
1970 | list_del_init(&mle->list); | 1952 | __dlm_unlink_mle(dlm, mle); |
1971 | __dlm_mle_detach_hb_events(dlm, mle); | 1953 | __dlm_mle_detach_hb_events(dlm, mle); |
1972 | __dlm_put_mle(mle); | 1954 | __dlm_put_mle(mle); |
1973 | if (extra_ref) { | 1955 | if (extra_ref) { |
@@ -3159,10 +3141,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
3159 | tmp->master = master; | 3141 | tmp->master = master; |
3160 | atomic_set(&tmp->woken, 1); | 3142 | atomic_set(&tmp->woken, 1); |
3161 | wake_up(&tmp->wq); | 3143 | wake_up(&tmp->wq); |
3162 | /* remove it from the list so that only one | 3144 | /* remove it so that only one mle will be found */ |
3163 | * mle will be found */ | 3145 | __dlm_unlink_mle(dlm, tmp); |
3164 | list_del_init(&tmp->list); | ||
3165 | /* this was obviously WRONG. mle is uninited here. should be tmp. */ | ||
3166 | __dlm_mle_detach_hb_events(dlm, tmp); | 3146 | __dlm_mle_detach_hb_events(dlm, tmp); |
3167 | ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; | 3147 | ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; |
3168 | mlog(0, "%s:%.*s: master=%u, newmaster=%u, " | 3148 | mlog(0, "%s:%.*s: master=%u, newmaster=%u, " |
@@ -3181,137 +3161,164 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
3181 | mle->master = master; | 3161 | mle->master = master; |
3182 | /* do this for consistency with other mle types */ | 3162 | /* do this for consistency with other mle types */ |
3183 | set_bit(new_master, mle->maybe_map); | 3163 | set_bit(new_master, mle->maybe_map); |
3184 | list_add(&mle->list, &dlm->master_list); | 3164 | __dlm_insert_mle(dlm, mle); |
3185 | 3165 | ||
3186 | return ret; | 3166 | return ret; |
3187 | } | 3167 | } |
3188 | 3168 | ||
3189 | 3169 | /* | |
3190 | void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) | 3170 | * Sets the owner of the lockres, associated to the mle, to UNKNOWN |
3171 | */ | ||
3172 | static struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm, | ||
3173 | struct dlm_master_list_entry *mle) | ||
3191 | { | 3174 | { |
3192 | struct dlm_master_list_entry *mle, *next; | ||
3193 | struct dlm_lock_resource *res; | 3175 | struct dlm_lock_resource *res; |
3194 | unsigned int hash; | ||
3195 | 3176 | ||
3196 | mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); | 3177 | /* Find the lockres associated to the mle and set its owner to UNK */ |
3197 | top: | 3178 | res = __dlm_lookup_lockres(dlm, mle->mname, mle->mnamelen, |
3198 | assert_spin_locked(&dlm->spinlock); | 3179 | mle->mnamehash); |
3180 | if (res) { | ||
3181 | spin_unlock(&dlm->master_lock); | ||
3199 | 3182 | ||
3200 | /* clean the master list */ | 3183 | /* move lockres onto recovery list */ |
3201 | spin_lock(&dlm->master_lock); | 3184 | spin_lock(&res->spinlock); |
3202 | list_for_each_entry_safe(mle, next, &dlm->master_list, list) { | 3185 | dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN); |
3203 | BUG_ON(mle->type != DLM_MLE_BLOCK && | 3186 | dlm_move_lockres_to_recovery_list(dlm, res); |
3204 | mle->type != DLM_MLE_MASTER && | 3187 | spin_unlock(&res->spinlock); |
3205 | mle->type != DLM_MLE_MIGRATION); | 3188 | dlm_lockres_put(res); |
3206 | |||
3207 | /* MASTER mles are initiated locally. the waiting | ||
3208 | * process will notice the node map change | ||
3209 | * shortly. let that happen as normal. */ | ||
3210 | if (mle->type == DLM_MLE_MASTER) | ||
3211 | continue; | ||
3212 | 3189 | ||
3190 | /* about to get rid of mle, detach from heartbeat */ | ||
3191 | __dlm_mle_detach_hb_events(dlm, mle); | ||
3213 | 3192 | ||
3214 | /* BLOCK mles are initiated by other nodes. | 3193 | /* dump the mle */ |
3215 | * need to clean up if the dead node would have | 3194 | spin_lock(&dlm->master_lock); |
3216 | * been the master. */ | 3195 | __dlm_put_mle(mle); |
3217 | if (mle->type == DLM_MLE_BLOCK) { | 3196 | spin_unlock(&dlm->master_lock); |
3218 | int bit; | 3197 | } |
3219 | 3198 | ||
3220 | spin_lock(&mle->spinlock); | 3199 | return res; |
3221 | bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); | 3200 | } |
3222 | if (bit != dead_node) { | ||
3223 | mlog(0, "mle found, but dead node %u would " | ||
3224 | "not have been master\n", dead_node); | ||
3225 | spin_unlock(&mle->spinlock); | ||
3226 | } else { | ||
3227 | /* must drop the refcount by one since the | ||
3228 | * assert_master will never arrive. this | ||
3229 | * may result in the mle being unlinked and | ||
3230 | * freed, but there may still be a process | ||
3231 | * waiting in the dlmlock path which is fine. */ | ||
3232 | mlog(0, "node %u was expected master\n", | ||
3233 | dead_node); | ||
3234 | atomic_set(&mle->woken, 1); | ||
3235 | spin_unlock(&mle->spinlock); | ||
3236 | wake_up(&mle->wq); | ||
3237 | /* do not need events any longer, so detach | ||
3238 | * from heartbeat */ | ||
3239 | __dlm_mle_detach_hb_events(dlm, mle); | ||
3240 | __dlm_put_mle(mle); | ||
3241 | } | ||
3242 | continue; | ||
3243 | } | ||
3244 | 3201 | ||
3245 | /* everything else is a MIGRATION mle */ | 3202 | static void dlm_clean_migration_mle(struct dlm_ctxt *dlm, |
3246 | 3203 | struct dlm_master_list_entry *mle) | |
3247 | /* the rule for MIGRATION mles is that the master | 3204 | { |
3248 | * becomes UNKNOWN if *either* the original or | 3205 | __dlm_mle_detach_hb_events(dlm, mle); |
3249 | * the new master dies. all UNKNOWN lockreses | ||
3250 | * are sent to whichever node becomes the recovery | ||
3251 | * master. the new master is responsible for | ||
3252 | * determining if there is still a master for | ||
3253 | * this lockres, or if he needs to take over | ||
3254 | * mastery. either way, this node should expect | ||
3255 | * another message to resolve this. */ | ||
3256 | if (mle->master != dead_node && | ||
3257 | mle->new_master != dead_node) | ||
3258 | continue; | ||
3259 | 3206 | ||
3260 | /* if we have reached this point, this mle needs to | 3207 | spin_lock(&mle->spinlock); |
3261 | * be removed from the list and freed. */ | 3208 | __dlm_unlink_mle(dlm, mle); |
3209 | atomic_set(&mle->woken, 1); | ||
3210 | spin_unlock(&mle->spinlock); | ||
3262 | 3211 | ||
3263 | /* remove from the list early. NOTE: unlinking | 3212 | wake_up(&mle->wq); |
3264 | * list_head while in list_for_each_safe */ | 3213 | } |
3265 | __dlm_mle_detach_hb_events(dlm, mle); | 3214 | |
3266 | spin_lock(&mle->spinlock); | 3215 | static void dlm_clean_block_mle(struct dlm_ctxt *dlm, |
3267 | list_del_init(&mle->list); | 3216 | struct dlm_master_list_entry *mle, u8 dead_node) |
3217 | { | ||
3218 | int bit; | ||
3219 | |||
3220 | BUG_ON(mle->type != DLM_MLE_BLOCK); | ||
3221 | |||
3222 | spin_lock(&mle->spinlock); | ||
3223 | bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); | ||
3224 | if (bit != dead_node) { | ||
3225 | mlog(0, "mle found, but dead node %u would not have been " | ||
3226 | "master\n", dead_node); | ||
3227 | spin_unlock(&mle->spinlock); | ||
3228 | } else { | ||
3229 | /* Must drop the refcount by one since the assert_master will | ||
3230 | * never arrive. This may result in the mle being unlinked and | ||
3231 | * freed, but there may still be a process waiting in the | ||
3232 | * dlmlock path which is fine. */ | ||
3233 | mlog(0, "node %u was expected master\n", dead_node); | ||
3268 | atomic_set(&mle->woken, 1); | 3234 | atomic_set(&mle->woken, 1); |
3269 | spin_unlock(&mle->spinlock); | 3235 | spin_unlock(&mle->spinlock); |
3270 | wake_up(&mle->wq); | 3236 | wake_up(&mle->wq); |
3271 | 3237 | ||
3272 | mlog(0, "%s: node %u died during migration from " | 3238 | /* Do not need events any longer, so detach from heartbeat */ |
3273 | "%u to %u!\n", dlm->name, dead_node, | 3239 | __dlm_mle_detach_hb_events(dlm, mle); |
3274 | mle->master, mle->new_master); | 3240 | __dlm_put_mle(mle); |
3275 | /* if there is a lockres associated with this | 3241 | } |
3276 | * mle, find it and set its owner to UNKNOWN */ | 3242 | } |
3277 | hash = dlm_lockid_hash(mle->u.name.name, mle->u.name.len); | ||
3278 | res = __dlm_lookup_lockres(dlm, mle->u.name.name, | ||
3279 | mle->u.name.len, hash); | ||
3280 | if (res) { | ||
3281 | /* unfortunately if we hit this rare case, our | ||
3282 | * lock ordering is messed. we need to drop | ||
3283 | * the master lock so that we can take the | ||
3284 | * lockres lock, meaning that we will have to | ||
3285 | * restart from the head of list. */ | ||
3286 | spin_unlock(&dlm->master_lock); | ||
3287 | 3243 | ||
3288 | /* move lockres onto recovery list */ | 3244 | void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) |
3289 | spin_lock(&res->spinlock); | 3245 | { |
3290 | dlm_set_lockres_owner(dlm, res, | 3246 | struct dlm_master_list_entry *mle; |
3291 | DLM_LOCK_RES_OWNER_UNKNOWN); | 3247 | struct dlm_lock_resource *res; |
3292 | dlm_move_lockres_to_recovery_list(dlm, res); | 3248 | struct hlist_head *bucket; |
3293 | spin_unlock(&res->spinlock); | 3249 | struct hlist_node *list; |
3294 | dlm_lockres_put(res); | 3250 | unsigned int i; |
3295 | 3251 | ||
3296 | /* about to get rid of mle, detach from heartbeat */ | 3252 | mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); |
3297 | __dlm_mle_detach_hb_events(dlm, mle); | 3253 | top: |
3254 | assert_spin_locked(&dlm->spinlock); | ||
3298 | 3255 | ||
3299 | /* dump the mle */ | 3256 | /* clean the master list */ |
3300 | spin_lock(&dlm->master_lock); | 3257 | spin_lock(&dlm->master_lock); |
3301 | __dlm_put_mle(mle); | 3258 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
3302 | spin_unlock(&dlm->master_lock); | 3259 | bucket = dlm_master_hash(dlm, i); |
3260 | hlist_for_each(list, bucket) { | ||
3261 | mle = hlist_entry(list, struct dlm_master_list_entry, | ||
3262 | master_hash_node); | ||
3263 | |||
3264 | BUG_ON(mle->type != DLM_MLE_BLOCK && | ||
3265 | mle->type != DLM_MLE_MASTER && | ||
3266 | mle->type != DLM_MLE_MIGRATION); | ||
3267 | |||
3268 | /* MASTER mles are initiated locally. The waiting | ||
3269 | * process will notice the node map change shortly. | ||
3270 | * Let that happen as normal. */ | ||
3271 | if (mle->type == DLM_MLE_MASTER) | ||
3272 | continue; | ||
3273 | |||
3274 | /* BLOCK mles are initiated by other nodes. Need to | ||
3275 | * clean up if the dead node would have been the | ||
3276 | * master. */ | ||
3277 | if (mle->type == DLM_MLE_BLOCK) { | ||
3278 | dlm_clean_block_mle(dlm, mle, dead_node); | ||
3279 | continue; | ||
3280 | } | ||
3303 | 3281 | ||
3304 | /* restart */ | 3282 | /* Everything else is a MIGRATION mle */ |
3305 | goto top; | 3283 | |
3306 | } | 3284 | /* The rule for MIGRATION mles is that the master |
3285 | * becomes UNKNOWN if *either* the original or the new | ||
3286 | * master dies. All UNKNOWN lockres' are sent to | ||
3287 | * whichever node becomes the recovery master. The new | ||
3288 | * master is responsible for determining if there is | ||
3289 | * still a master for this lockres, or if he needs to | ||
3290 | * take over mastery. Either way, this node should | ||
3291 | * expect another message to resolve this. */ | ||
3292 | |||
3293 | if (mle->master != dead_node && | ||
3294 | mle->new_master != dead_node) | ||
3295 | continue; | ||
3296 | |||
3297 | /* If we have reached this point, this mle needs to be | ||
3298 | * removed from the list and freed. */ | ||
3299 | dlm_clean_migration_mle(dlm, mle); | ||
3300 | |||
3301 | mlog(0, "%s: node %u died during migration from " | ||
3302 | "%u to %u!\n", dlm->name, dead_node, mle->master, | ||
3303 | mle->new_master); | ||
3304 | |||
3305 | /* If we find a lockres associated with the mle, we've | ||
3306 | * hit this rare case that messes up our lock ordering. | ||
3307 | * If so, we need to drop the master lock so that we can | ||
3308 | * take the lockres lock, meaning that we will have to | ||
3309 | * restart from the head of list. */ | ||
3310 | res = dlm_reset_mleres_owner(dlm, mle); | ||
3311 | if (res) | ||
3312 | /* restart */ | ||
3313 | goto top; | ||
3307 | 3314 | ||
3308 | /* this may be the last reference */ | 3315 | /* This may be the last reference */ |
3309 | __dlm_put_mle(mle); | 3316 | __dlm_put_mle(mle); |
3317 | } | ||
3310 | } | 3318 | } |
3311 | spin_unlock(&dlm->master_lock); | 3319 | spin_unlock(&dlm->master_lock); |
3312 | } | 3320 | } |
3313 | 3321 | ||
3314 | |||
3315 | int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | 3322 | int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, |
3316 | u8 old_master) | 3323 | u8 old_master) |
3317 | { | 3324 | { |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 4060bb328bc8..d490b66ad9d7 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -162,12 +162,28 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
162 | 162 | ||
163 | spin_lock(&res->spinlock); | 163 | spin_lock(&res->spinlock); |
164 | if (!__dlm_lockres_unused(res)) { | 164 | if (!__dlm_lockres_unused(res)) { |
165 | spin_unlock(&res->spinlock); | ||
166 | mlog(0, "%s:%.*s: tried to purge but not unused\n", | 165 | mlog(0, "%s:%.*s: tried to purge but not unused\n", |
167 | dlm->name, res->lockname.len, res->lockname.name); | 166 | dlm->name, res->lockname.len, res->lockname.name); |
168 | return -ENOTEMPTY; | 167 | __dlm_print_one_lock_resource(res); |
168 | spin_unlock(&res->spinlock); | ||
169 | BUG(); | ||
169 | } | 170 | } |
171 | |||
172 | if (res->state & DLM_LOCK_RES_MIGRATING) { | ||
173 | mlog(0, "%s:%.*s: Delay dropref as this lockres is " | ||
174 | "being remastered\n", dlm->name, res->lockname.len, | ||
175 | res->lockname.name); | ||
176 | /* Re-add the lockres to the end of the purge list */ | ||
177 | if (!list_empty(&res->purge)) { | ||
178 | list_del_init(&res->purge); | ||
179 | list_add_tail(&res->purge, &dlm->purge_list); | ||
180 | } | ||
181 | spin_unlock(&res->spinlock); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
170 | master = (res->owner == dlm->node_num); | 185 | master = (res->owner == dlm->node_num); |
186 | |||
171 | if (!master) | 187 | if (!master) |
172 | res->state |= DLM_LOCK_RES_DROPPING_REF; | 188 | res->state |= DLM_LOCK_RES_DROPPING_REF; |
173 | spin_unlock(&res->spinlock); | 189 | spin_unlock(&res->spinlock); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 7219a86d34cc..e15fc7d50827 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -244,6 +244,10 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = { | |||
244 | .flags = 0, | 244 | .flags = 0, |
245 | }; | 245 | }; |
246 | 246 | ||
247 | static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { | ||
248 | .flags = 0, | ||
249 | }; | ||
250 | |||
247 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { | 251 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { |
248 | .get_osb = ocfs2_get_dentry_osb, | 252 | .get_osb = ocfs2_get_dentry_osb, |
249 | .post_unlock = ocfs2_dentry_post_unlock, | 253 | .post_unlock = ocfs2_dentry_post_unlock, |
@@ -622,6 +626,17 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
622 | &ocfs2_rename_lops, osb); | 626 | &ocfs2_rename_lops, osb); |
623 | } | 627 | } |
624 | 628 | ||
629 | static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, | ||
630 | struct ocfs2_super *osb) | ||
631 | { | ||
632 | /* nfs_sync lockres doesn't come from a slab so we call init | ||
633 | * once on it manually. */ | ||
634 | ocfs2_lock_res_init_once(res); | ||
635 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); | ||
636 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, | ||
637 | &ocfs2_nfs_sync_lops, osb); | ||
638 | } | ||
639 | |||
625 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | 640 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, |
626 | struct ocfs2_file_private *fp) | 641 | struct ocfs2_file_private *fp) |
627 | { | 642 | { |
@@ -2417,6 +2432,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) | |||
2417 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); | 2432 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); |
2418 | } | 2433 | } |
2419 | 2434 | ||
2435 | int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) | ||
2436 | { | ||
2437 | int status; | ||
2438 | struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; | ||
2439 | |||
2440 | if (ocfs2_is_hard_readonly(osb)) | ||
2441 | return -EROFS; | ||
2442 | |||
2443 | if (ocfs2_mount_local(osb)) | ||
2444 | return 0; | ||
2445 | |||
2446 | status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, | ||
2447 | 0, 0); | ||
2448 | if (status < 0) | ||
2449 | mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); | ||
2450 | |||
2451 | return status; | ||
2452 | } | ||
2453 | |||
2454 | void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) | ||
2455 | { | ||
2456 | struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; | ||
2457 | |||
2458 | if (!ocfs2_mount_local(osb)) | ||
2459 | ocfs2_cluster_unlock(osb, lockres, | ||
2460 | ex ? LKM_EXMODE : LKM_PRMODE); | ||
2461 | } | ||
2462 | |||
2420 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | 2463 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) |
2421 | { | 2464 | { |
2422 | int ret; | 2465 | int ret; |
@@ -2798,6 +2841,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2798 | local: | 2841 | local: |
2799 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); | 2842 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); |
2800 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); | 2843 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); |
2844 | ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); | ||
2801 | 2845 | ||
2802 | osb->cconn = conn; | 2846 | osb->cconn = conn; |
2803 | 2847 | ||
@@ -2833,6 +2877,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, | |||
2833 | 2877 | ||
2834 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2878 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
2835 | ocfs2_lock_res_free(&osb->osb_rename_lockres); | 2879 | ocfs2_lock_res_free(&osb->osb_rename_lockres); |
2880 | ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); | ||
2836 | 2881 | ||
2837 | ocfs2_cluster_disconnect(osb->cconn, hangup_pending); | 2882 | ocfs2_cluster_disconnect(osb->cconn, hangup_pending); |
2838 | osb->cconn = NULL; | 2883 | osb->cconn = NULL; |
@@ -3015,6 +3060,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) | |||
3015 | { | 3060 | { |
3016 | ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); | 3061 | ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); |
3017 | ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); | 3062 | ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); |
3063 | ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); | ||
3018 | } | 3064 | } |
3019 | 3065 | ||
3020 | int ocfs2_drop_inode_locks(struct inode *inode) | 3066 | int ocfs2_drop_inode_locks(struct inode *inode) |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 3f8d9986b8e0..e1fd5721cd7f 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -115,6 +115,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb, | |||
115 | int ex); | 115 | int ex); |
116 | int ocfs2_rename_lock(struct ocfs2_super *osb); | 116 | int ocfs2_rename_lock(struct ocfs2_super *osb); |
117 | void ocfs2_rename_unlock(struct ocfs2_super *osb); | 117 | void ocfs2_rename_unlock(struct ocfs2_super *osb); |
118 | int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); | ||
119 | void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex); | ||
118 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); | 120 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); |
119 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); | 121 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); |
120 | int ocfs2_file_lock(struct file *file, int ex, int trylock); | 122 | int ocfs2_file_lock(struct file *file, int ex, int trylock); |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 2f27b332d8b3..de3da8eb558c 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -31,6 +31,7 @@ | |||
31 | 31 | ||
32 | #include "ocfs2.h" | 32 | #include "ocfs2.h" |
33 | 33 | ||
34 | #include "alloc.h" | ||
34 | #include "dir.h" | 35 | #include "dir.h" |
35 | #include "dlmglue.h" | 36 | #include "dlmglue.h" |
36 | #include "dcache.h" | 37 | #include "dcache.h" |
@@ -38,6 +39,7 @@ | |||
38 | #include "inode.h" | 39 | #include "inode.h" |
39 | 40 | ||
40 | #include "buffer_head_io.h" | 41 | #include "buffer_head_io.h" |
42 | #include "suballoc.h" | ||
41 | 43 | ||
42 | struct ocfs2_inode_handle | 44 | struct ocfs2_inode_handle |
43 | { | 45 | { |
@@ -49,29 +51,97 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
49 | struct ocfs2_inode_handle *handle) | 51 | struct ocfs2_inode_handle *handle) |
50 | { | 52 | { |
51 | struct inode *inode; | 53 | struct inode *inode; |
54 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
55 | u64 blkno = handle->ih_blkno; | ||
56 | int status, set; | ||
52 | struct dentry *result; | 57 | struct dentry *result; |
53 | 58 | ||
54 | mlog_entry("(0x%p, 0x%p)\n", sb, handle); | 59 | mlog_entry("(0x%p, 0x%p)\n", sb, handle); |
55 | 60 | ||
56 | if (handle->ih_blkno == 0) { | 61 | if (blkno == 0) { |
57 | mlog_errno(-ESTALE); | 62 | mlog(0, "nfs wants inode with blkno: 0\n"); |
58 | return ERR_PTR(-ESTALE); | 63 | result = ERR_PTR(-ESTALE); |
64 | goto bail; | ||
65 | } | ||
66 | |||
67 | inode = ocfs2_ilookup(sb, blkno); | ||
68 | /* | ||
69 | * If the inode exists in memory, we only need to check it's | ||
70 | * generation number | ||
71 | */ | ||
72 | if (inode) | ||
73 | goto check_gen; | ||
74 | |||
75 | /* | ||
76 | * This will synchronize us against ocfs2_delete_inode() on | ||
77 | * all nodes | ||
78 | */ | ||
79 | status = ocfs2_nfs_sync_lock(osb, 1); | ||
80 | if (status < 0) { | ||
81 | mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status); | ||
82 | goto check_err; | ||
83 | } | ||
84 | |||
85 | status = ocfs2_test_inode_bit(osb, blkno, &set); | ||
86 | if (status < 0) { | ||
87 | if (status == -EINVAL) { | ||
88 | /* | ||
89 | * The blkno NFS gave us doesn't even show up | ||
90 | * as an inode, we return -ESTALE to be | ||
91 | * nice | ||
92 | */ | ||
93 | mlog(0, "test inode bit failed %d\n", status); | ||
94 | status = -ESTALE; | ||
95 | } else { | ||
96 | mlog(ML_ERROR, "test inode bit failed %d\n", status); | ||
97 | } | ||
98 | goto unlock_nfs_sync; | ||
99 | } | ||
100 | |||
101 | /* If the inode allocator bit is clear, this inode must be stale */ | ||
102 | if (!set) { | ||
103 | mlog(0, "inode %llu suballoc bit is clear\n", blkno); | ||
104 | status = -ESTALE; | ||
105 | goto unlock_nfs_sync; | ||
59 | } | 106 | } |
60 | 107 | ||
61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0); | 108 | inode = ocfs2_iget(osb, blkno, 0, 0); |
62 | 109 | ||
63 | if (IS_ERR(inode)) | 110 | unlock_nfs_sync: |
64 | return (void *)inode; | 111 | ocfs2_nfs_sync_unlock(osb, 1); |
65 | 112 | ||
113 | check_err: | ||
114 | if (status < 0) { | ||
115 | if (status == -ESTALE) { | ||
116 | mlog(0, "stale inode ino: %llu generation: %u\n", | ||
117 | blkno, handle->ih_generation); | ||
118 | } | ||
119 | result = ERR_PTR(status); | ||
120 | goto bail; | ||
121 | } | ||
122 | |||
123 | if (IS_ERR(inode)) { | ||
124 | mlog_errno(PTR_ERR(inode)); | ||
125 | result = (void *)inode; | ||
126 | goto bail; | ||
127 | } | ||
128 | |||
129 | check_gen: | ||
66 | if (handle->ih_generation != inode->i_generation) { | 130 | if (handle->ih_generation != inode->i_generation) { |
67 | iput(inode); | 131 | iput(inode); |
68 | return ERR_PTR(-ESTALE); | 132 | mlog(0, "stale inode ino: %llu generation: %u\n", blkno, |
133 | handle->ih_generation); | ||
134 | result = ERR_PTR(-ESTALE); | ||
135 | goto bail; | ||
69 | } | 136 | } |
70 | 137 | ||
71 | result = d_obtain_alias(inode); | 138 | result = d_obtain_alias(inode); |
72 | if (!IS_ERR(result)) | 139 | if (!IS_ERR(result)) |
73 | result->d_op = &ocfs2_dentry_ops; | 140 | result->d_op = &ocfs2_dentry_ops; |
141 | else | ||
142 | mlog_errno(PTR_ERR(result)); | ||
74 | 143 | ||
144 | bail: | ||
75 | mlog_exit_ptr(result); | 145 | mlog_exit_ptr(result); |
76 | return result; | 146 | return result; |
77 | } | 147 | } |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 229e707bc050..10e1fa87396a 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "ocfs2.h" | 38 | #include "ocfs2.h" |
39 | 39 | ||
40 | #include "alloc.h" | 40 | #include "alloc.h" |
41 | #include "dir.h" | ||
41 | #include "blockcheck.h" | 42 | #include "blockcheck.h" |
42 | #include "dlmglue.h" | 43 | #include "dlmglue.h" |
43 | #include "extent_map.h" | 44 | #include "extent_map.h" |
@@ -112,6 +113,17 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi) | |||
112 | oi->ip_attr |= OCFS2_DIRSYNC_FL; | 113 | oi->ip_attr |= OCFS2_DIRSYNC_FL; |
113 | } | 114 | } |
114 | 115 | ||
116 | struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno) | ||
117 | { | ||
118 | struct ocfs2_find_inode_args args; | ||
119 | |||
120 | args.fi_blkno = blkno; | ||
121 | args.fi_flags = 0; | ||
122 | args.fi_ino = ino_from_blkno(sb, blkno); | ||
123 | args.fi_sysfile_type = 0; | ||
124 | |||
125 | return ilookup5(sb, blkno, ocfs2_find_actor, &args); | ||
126 | } | ||
115 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, | 127 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, |
116 | int sysfile_type) | 128 | int sysfile_type) |
117 | { | 129 | { |
@@ -275,7 +287,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
275 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 287 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
276 | (unsigned long long)le64_to_cpu(fe->i_blkno)); | 288 | (unsigned long long)le64_to_cpu(fe->i_blkno)); |
277 | 289 | ||
278 | inode->i_nlink = le16_to_cpu(fe->i_links_count); | 290 | inode->i_nlink = ocfs2_read_links_count(fe); |
279 | 291 | ||
280 | if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { | 292 | if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { |
281 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; | 293 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; |
@@ -351,6 +363,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
351 | 363 | ||
352 | ocfs2_set_inode_flags(inode); | 364 | ocfs2_set_inode_flags(inode); |
353 | 365 | ||
366 | OCFS2_I(inode)->ip_last_used_slot = 0; | ||
367 | OCFS2_I(inode)->ip_last_used_group = 0; | ||
354 | mlog_exit_void(); | 368 | mlog_exit_void(); |
355 | } | 369 | } |
356 | 370 | ||
@@ -606,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
606 | } | 620 | } |
607 | 621 | ||
608 | handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS + | 622 | handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS + |
609 | ocfs2_quota_trans_credits(inode->i_sb)); | 623 | ocfs2_quota_trans_credits(inode->i_sb)); |
610 | if (IS_ERR(handle)) { | 624 | if (IS_ERR(handle)) { |
611 | status = PTR_ERR(handle); | 625 | status = PTR_ERR(handle); |
612 | mlog_errno(status); | 626 | mlog_errno(status); |
@@ -740,6 +754,15 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
740 | goto bail_unlock_dir; | 754 | goto bail_unlock_dir; |
741 | } | 755 | } |
742 | 756 | ||
757 | /* Remove any dir index tree */ | ||
758 | if (S_ISDIR(inode->i_mode)) { | ||
759 | status = ocfs2_dx_dir_truncate(inode, di_bh); | ||
760 | if (status) { | ||
761 | mlog_errno(status); | ||
762 | goto bail_unlock_dir; | ||
763 | } | ||
764 | } | ||
765 | |||
743 | /*Free extended attribute resources associated with this inode.*/ | 766 | /*Free extended attribute resources associated with this inode.*/ |
744 | status = ocfs2_xattr_remove(inode, di_bh); | 767 | status = ocfs2_xattr_remove(inode, di_bh); |
745 | if (status < 0) { | 768 | if (status < 0) { |
@@ -949,6 +972,17 @@ void ocfs2_delete_inode(struct inode *inode) | |||
949 | goto bail; | 972 | goto bail; |
950 | } | 973 | } |
951 | 974 | ||
975 | /* | ||
976 | * Synchronize us against ocfs2_get_dentry. We take this in | ||
977 | * shared mode so that all nodes can still concurrently | ||
978 | * process deletes. | ||
979 | */ | ||
980 | status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0); | ||
981 | if (status < 0) { | ||
982 | mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status); | ||
983 | ocfs2_cleanup_delete_inode(inode, 0); | ||
984 | goto bail_unblock; | ||
985 | } | ||
952 | /* Lock down the inode. This gives us an up to date view of | 986 | /* Lock down the inode. This gives us an up to date view of |
953 | * it's metadata (for verification), and allows us to | 987 | * it's metadata (for verification), and allows us to |
954 | * serialize delete_inode on multiple nodes. | 988 | * serialize delete_inode on multiple nodes. |
@@ -962,7 +996,7 @@ void ocfs2_delete_inode(struct inode *inode) | |||
962 | if (status != -ENOENT) | 996 | if (status != -ENOENT) |
963 | mlog_errno(status); | 997 | mlog_errno(status); |
964 | ocfs2_cleanup_delete_inode(inode, 0); | 998 | ocfs2_cleanup_delete_inode(inode, 0); |
965 | goto bail_unblock; | 999 | goto bail_unlock_nfs_sync; |
966 | } | 1000 | } |
967 | 1001 | ||
968 | /* Query the cluster. This will be the final decision made | 1002 | /* Query the cluster. This will be the final decision made |
@@ -1005,6 +1039,10 @@ void ocfs2_delete_inode(struct inode *inode) | |||
1005 | bail_unlock_inode: | 1039 | bail_unlock_inode: |
1006 | ocfs2_inode_unlock(inode, 1); | 1040 | ocfs2_inode_unlock(inode, 1); |
1007 | brelse(di_bh); | 1041 | brelse(di_bh); |
1042 | |||
1043 | bail_unlock_nfs_sync: | ||
1044 | ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); | ||
1045 | |||
1008 | bail_unblock: | 1046 | bail_unblock: |
1009 | status = sigprocmask(SIG_SETMASK, &oldset, NULL); | 1047 | status = sigprocmask(SIG_SETMASK, &oldset, NULL); |
1010 | if (status < 0) | 1048 | if (status < 0) |
@@ -1205,7 +1243,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle, | |||
1205 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 1243 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
1206 | 1244 | ||
1207 | fe->i_size = cpu_to_le64(i_size_read(inode)); | 1245 | fe->i_size = cpu_to_le64(i_size_read(inode)); |
1208 | fe->i_links_count = cpu_to_le16(inode->i_nlink); | 1246 | ocfs2_set_links_count(fe, inode->i_nlink); |
1209 | fe->i_uid = cpu_to_le32(inode->i_uid); | 1247 | fe->i_uid = cpu_to_le32(inode->i_uid); |
1210 | fe->i_gid = cpu_to_le32(inode->i_gid); | 1248 | fe->i_gid = cpu_to_le32(inode->i_gid); |
1211 | fe->i_mode = cpu_to_le16(inode->i_mode); | 1249 | fe->i_mode = cpu_to_le16(inode->i_mode); |
@@ -1242,7 +1280,7 @@ void ocfs2_refresh_inode(struct inode *inode, | |||
1242 | OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); | 1280 | OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); |
1243 | ocfs2_set_inode_flags(inode); | 1281 | ocfs2_set_inode_flags(inode); |
1244 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 1282 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
1245 | inode->i_nlink = le16_to_cpu(fe->i_links_count); | 1283 | inode->i_nlink = ocfs2_read_links_count(fe); |
1246 | inode->i_uid = le32_to_cpu(fe->i_uid); | 1284 | inode->i_uid = le32_to_cpu(fe->i_uid); |
1247 | inode->i_gid = le32_to_cpu(fe->i_gid); | 1285 | inode->i_gid = le32_to_cpu(fe->i_gid); |
1248 | inode->i_mode = le16_to_cpu(fe->i_mode); | 1286 | inode->i_mode = le16_to_cpu(fe->i_mode); |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index eb3c302b38d3..ea71525aad41 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -72,6 +72,10 @@ struct ocfs2_inode_info | |||
72 | 72 | ||
73 | struct inode vfs_inode; | 73 | struct inode vfs_inode; |
74 | struct jbd2_inode ip_jinode; | 74 | struct jbd2_inode ip_jinode; |
75 | |||
76 | /* Only valid if the inode is the dir. */ | ||
77 | u32 ip_last_used_slot; | ||
78 | u64 ip_last_used_group; | ||
75 | }; | 79 | }; |
76 | 80 | ||
77 | /* | 81 | /* |
@@ -124,6 +128,7 @@ void ocfs2_drop_inode(struct inode *inode); | |||
124 | /* Flags for ocfs2_iget() */ | 128 | /* Flags for ocfs2_iget() */ |
125 | #define OCFS2_FI_FLAG_SYSFILE 0x1 | 129 | #define OCFS2_FI_FLAG_SYSFILE 0x1 |
126 | #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2 | 130 | #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2 |
131 | struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff); | ||
127 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, | 132 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, |
128 | int sysfile_type); | 133 | int sysfile_type); |
129 | int ocfs2_inode_init_private(struct inode *inode); | 134 | int ocfs2_inode_init_private(struct inode *inode); |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 57d7d25a2b9a..a20a0f1e37fd 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -65,6 +65,11 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb, | |||
65 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 65 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, |
66 | int slot); | 66 | int slot); |
67 | static int ocfs2_commit_thread(void *arg); | 67 | static int ocfs2_commit_thread(void *arg); |
68 | static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | ||
69 | int slot_num, | ||
70 | struct ocfs2_dinode *la_dinode, | ||
71 | struct ocfs2_dinode *tl_dinode, | ||
72 | struct ocfs2_quota_recovery *qrec); | ||
68 | 73 | ||
69 | static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) | 74 | static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) |
70 | { | 75 | { |
@@ -76,18 +81,97 @@ static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb) | |||
76 | return __ocfs2_wait_on_mount(osb, 1); | 81 | return __ocfs2_wait_on_mount(osb, 1); |
77 | } | 82 | } |
78 | 83 | ||
79 | |||
80 | |||
81 | /* | 84 | /* |
82 | * The recovery_list is a simple linked list of node numbers to recover. | 85 | * This replay_map is to track online/offline slots, so we could recover |
83 | * It is protected by the recovery_lock. | 86 | * offline slots during recovery and mount |
84 | */ | 87 | */ |
85 | 88 | ||
86 | struct ocfs2_recovery_map { | 89 | enum ocfs2_replay_state { |
87 | unsigned int rm_used; | 90 | REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */ |
88 | unsigned int *rm_entries; | 91 | REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */ |
92 | REPLAY_DONE /* Replay was already queued */ | ||
89 | }; | 93 | }; |
90 | 94 | ||
95 | struct ocfs2_replay_map { | ||
96 | unsigned int rm_slots; | ||
97 | enum ocfs2_replay_state rm_state; | ||
98 | unsigned char rm_replay_slots[0]; | ||
99 | }; | ||
100 | |||
101 | void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state) | ||
102 | { | ||
103 | if (!osb->replay_map) | ||
104 | return; | ||
105 | |||
106 | /* If we've already queued the replay, we don't have any more to do */ | ||
107 | if (osb->replay_map->rm_state == REPLAY_DONE) | ||
108 | return; | ||
109 | |||
110 | osb->replay_map->rm_state = state; | ||
111 | } | ||
112 | |||
113 | int ocfs2_compute_replay_slots(struct ocfs2_super *osb) | ||
114 | { | ||
115 | struct ocfs2_replay_map *replay_map; | ||
116 | int i, node_num; | ||
117 | |||
118 | /* If replay map is already set, we don't do it again */ | ||
119 | if (osb->replay_map) | ||
120 | return 0; | ||
121 | |||
122 | replay_map = kzalloc(sizeof(struct ocfs2_replay_map) + | ||
123 | (osb->max_slots * sizeof(char)), GFP_KERNEL); | ||
124 | |||
125 | if (!replay_map) { | ||
126 | mlog_errno(-ENOMEM); | ||
127 | return -ENOMEM; | ||
128 | } | ||
129 | |||
130 | spin_lock(&osb->osb_lock); | ||
131 | |||
132 | replay_map->rm_slots = osb->max_slots; | ||
133 | replay_map->rm_state = REPLAY_UNNEEDED; | ||
134 | |||
135 | /* set rm_replay_slots for offline slot(s) */ | ||
136 | for (i = 0; i < replay_map->rm_slots; i++) { | ||
137 | if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT) | ||
138 | replay_map->rm_replay_slots[i] = 1; | ||
139 | } | ||
140 | |||
141 | osb->replay_map = replay_map; | ||
142 | spin_unlock(&osb->osb_lock); | ||
143 | return 0; | ||
144 | } | ||
145 | |||
146 | void ocfs2_queue_replay_slots(struct ocfs2_super *osb) | ||
147 | { | ||
148 | struct ocfs2_replay_map *replay_map = osb->replay_map; | ||
149 | int i; | ||
150 | |||
151 | if (!replay_map) | ||
152 | return; | ||
153 | |||
154 | if (replay_map->rm_state != REPLAY_NEEDED) | ||
155 | return; | ||
156 | |||
157 | for (i = 0; i < replay_map->rm_slots; i++) | ||
158 | if (replay_map->rm_replay_slots[i]) | ||
159 | ocfs2_queue_recovery_completion(osb->journal, i, NULL, | ||
160 | NULL, NULL); | ||
161 | replay_map->rm_state = REPLAY_DONE; | ||
162 | } | ||
163 | |||
164 | void ocfs2_free_replay_slots(struct ocfs2_super *osb) | ||
165 | { | ||
166 | struct ocfs2_replay_map *replay_map = osb->replay_map; | ||
167 | |||
168 | if (!osb->replay_map) | ||
169 | return; | ||
170 | |||
171 | kfree(replay_map); | ||
172 | osb->replay_map = NULL; | ||
173 | } | ||
174 | |||
91 | int ocfs2_recovery_init(struct ocfs2_super *osb) | 175 | int ocfs2_recovery_init(struct ocfs2_super *osb) |
92 | { | 176 | { |
93 | struct ocfs2_recovery_map *rm; | 177 | struct ocfs2_recovery_map *rm; |
@@ -496,6 +580,22 @@ static struct ocfs2_triggers dq_triggers = { | |||
496 | }, | 580 | }, |
497 | }; | 581 | }; |
498 | 582 | ||
583 | static struct ocfs2_triggers dr_triggers = { | ||
584 | .ot_triggers = { | ||
585 | .t_commit = ocfs2_commit_trigger, | ||
586 | .t_abort = ocfs2_abort_trigger, | ||
587 | }, | ||
588 | .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), | ||
589 | }; | ||
590 | |||
591 | static struct ocfs2_triggers dl_triggers = { | ||
592 | .ot_triggers = { | ||
593 | .t_commit = ocfs2_commit_trigger, | ||
594 | .t_abort = ocfs2_abort_trigger, | ||
595 | }, | ||
596 | .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), | ||
597 | }; | ||
598 | |||
499 | static int __ocfs2_journal_access(handle_t *handle, | 599 | static int __ocfs2_journal_access(handle_t *handle, |
500 | struct inode *inode, | 600 | struct inode *inode, |
501 | struct buffer_head *bh, | 601 | struct buffer_head *bh, |
@@ -600,6 +700,20 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, | |||
600 | type); | 700 | type); |
601 | } | 701 | } |
602 | 702 | ||
703 | int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, | ||
704 | struct buffer_head *bh, int type) | ||
705 | { | ||
706 | return __ocfs2_journal_access(handle, inode, bh, &dr_triggers, | ||
707 | type); | ||
708 | } | ||
709 | |||
710 | int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, | ||
711 | struct buffer_head *bh, int type) | ||
712 | { | ||
713 | return __ocfs2_journal_access(handle, inode, bh, &dl_triggers, | ||
714 | type); | ||
715 | } | ||
716 | |||
603 | int ocfs2_journal_access(handle_t *handle, struct inode *inode, | 717 | int ocfs2_journal_access(handle_t *handle, struct inode *inode, |
604 | struct buffer_head *bh, int type) | 718 | struct buffer_head *bh, int type) |
605 | { | 719 | { |
@@ -1176,24 +1290,24 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | |||
1176 | } | 1290 | } |
1177 | 1291 | ||
1178 | /* Called by the mount code to queue recovery the last part of | 1292 | /* Called by the mount code to queue recovery the last part of |
1179 | * recovery for it's own slot. */ | 1293 | * recovery for it's own and offline slot(s). */ |
1180 | void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) | 1294 | void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) |
1181 | { | 1295 | { |
1182 | struct ocfs2_journal *journal = osb->journal; | 1296 | struct ocfs2_journal *journal = osb->journal; |
1183 | 1297 | ||
1184 | if (osb->dirty) { | 1298 | /* No need to queue up our truncate_log as regular cleanup will catch |
1185 | /* No need to queue up our truncate_log as regular | 1299 | * that */ |
1186 | * cleanup will catch that. */ | 1300 | ocfs2_queue_recovery_completion(journal, osb->slot_num, |
1187 | ocfs2_queue_recovery_completion(journal, | 1301 | osb->local_alloc_copy, NULL, NULL); |
1188 | osb->slot_num, | 1302 | ocfs2_schedule_truncate_log_flush(osb, 0); |
1189 | osb->local_alloc_copy, | ||
1190 | NULL, | ||
1191 | NULL); | ||
1192 | ocfs2_schedule_truncate_log_flush(osb, 0); | ||
1193 | 1303 | ||
1194 | osb->local_alloc_copy = NULL; | 1304 | osb->local_alloc_copy = NULL; |
1195 | osb->dirty = 0; | 1305 | osb->dirty = 0; |
1196 | } | 1306 | |
1307 | /* queue to recover orphan slots for all offline slots */ | ||
1308 | ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); | ||
1309 | ocfs2_queue_replay_slots(osb); | ||
1310 | ocfs2_free_replay_slots(osb); | ||
1197 | } | 1311 | } |
1198 | 1312 | ||
1199 | void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) | 1313 | void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) |
@@ -1236,6 +1350,14 @@ restart: | |||
1236 | goto bail; | 1350 | goto bail; |
1237 | } | 1351 | } |
1238 | 1352 | ||
1353 | status = ocfs2_compute_replay_slots(osb); | ||
1354 | if (status < 0) | ||
1355 | mlog_errno(status); | ||
1356 | |||
1357 | /* queue recovery for our own slot */ | ||
1358 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | ||
1359 | NULL, NULL); | ||
1360 | |||
1239 | spin_lock(&osb->osb_lock); | 1361 | spin_lock(&osb->osb_lock); |
1240 | while (rm->rm_used) { | 1362 | while (rm->rm_used) { |
1241 | /* It's always safe to remove entry zero, as we won't | 1363 | /* It's always safe to remove entry zero, as we won't |
@@ -1301,11 +1423,8 @@ skip_recovery: | |||
1301 | 1423 | ||
1302 | ocfs2_super_unlock(osb, 1); | 1424 | ocfs2_super_unlock(osb, 1); |
1303 | 1425 | ||
1304 | /* We always run recovery on our own orphan dir - the dead | 1426 | /* queue recovery for offline slots */ |
1305 | * node(s) may have disallowd a previos inode delete. Re-processing | 1427 | ocfs2_queue_replay_slots(osb); |
1306 | * is therefore required. */ | ||
1307 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | ||
1308 | NULL, NULL); | ||
1309 | 1428 | ||
1310 | bail: | 1429 | bail: |
1311 | mutex_lock(&osb->recovery_lock); | 1430 | mutex_lock(&osb->recovery_lock); |
@@ -1314,6 +1433,7 @@ bail: | |||
1314 | goto restart; | 1433 | goto restart; |
1315 | } | 1434 | } |
1316 | 1435 | ||
1436 | ocfs2_free_replay_slots(osb); | ||
1317 | osb->recovery_thread_task = NULL; | 1437 | osb->recovery_thread_task = NULL; |
1318 | mb(); /* sync with ocfs2_recovery_thread_running */ | 1438 | mb(); /* sync with ocfs2_recovery_thread_running */ |
1319 | wake_up(&osb->recovery_event); | 1439 | wake_up(&osb->recovery_event); |
@@ -1465,6 +1585,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1465 | goto done; | 1585 | goto done; |
1466 | } | 1586 | } |
1467 | 1587 | ||
1588 | /* we need to run complete recovery for offline orphan slots */ | ||
1589 | ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); | ||
1590 | |||
1468 | mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", | 1591 | mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", |
1469 | node_num, slot_num, | 1592 | node_num, slot_num, |
1470 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1593 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 172850a9a12a..619dd7f6c053 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -38,6 +38,17 @@ enum ocfs2_journal_state { | |||
38 | struct ocfs2_super; | 38 | struct ocfs2_super; |
39 | struct ocfs2_dinode; | 39 | struct ocfs2_dinode; |
40 | 40 | ||
41 | /* | ||
42 | * The recovery_list is a simple linked list of node numbers to recover. | ||
43 | * It is protected by the recovery_lock. | ||
44 | */ | ||
45 | |||
46 | struct ocfs2_recovery_map { | ||
47 | unsigned int rm_used; | ||
48 | unsigned int *rm_entries; | ||
49 | }; | ||
50 | |||
51 | |||
41 | struct ocfs2_journal { | 52 | struct ocfs2_journal { |
42 | enum ocfs2_journal_state j_state; /* Journals current state */ | 53 | enum ocfs2_journal_state j_state; /* Journals current state */ |
43 | 54 | ||
@@ -139,6 +150,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); | |||
139 | int ocfs2_recovery_init(struct ocfs2_super *osb); | 150 | int ocfs2_recovery_init(struct ocfs2_super *osb); |
140 | void ocfs2_recovery_exit(struct ocfs2_super *osb); | 151 | void ocfs2_recovery_exit(struct ocfs2_super *osb); |
141 | 152 | ||
153 | int ocfs2_compute_replay_slots(struct ocfs2_super *osb); | ||
142 | /* | 154 | /* |
143 | * Journal Control: | 155 | * Journal Control: |
144 | * Initialize, Load, Shutdown, Wipe a journal. | 156 | * Initialize, Load, Shutdown, Wipe a journal. |
@@ -266,6 +278,12 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, | |||
266 | /* dirblock */ | 278 | /* dirblock */ |
267 | int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, | 279 | int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, |
268 | struct buffer_head *bh, int type); | 280 | struct buffer_head *bh, int type); |
281 | /* ocfs2_dx_root_block */ | ||
282 | int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, | ||
283 | struct buffer_head *bh, int type); | ||
284 | /* ocfs2_dx_leaf */ | ||
285 | int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, | ||
286 | struct buffer_head *bh, int type); | ||
269 | /* Anything that has no ecc */ | 287 | /* Anything that has no ecc */ |
270 | int ocfs2_journal_access(handle_t *handle, struct inode *inode, | 288 | int ocfs2_journal_access(handle_t *handle, struct inode *inode, |
271 | struct buffer_head *bh, int type); | 289 | struct buffer_head *bh, int type); |
@@ -368,14 +386,29 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb) | |||
368 | } | 386 | } |
369 | 387 | ||
370 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + | 388 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + |
371 | * bitmap block for the new bit) */ | 389 | * bitmap block for the new bit) dx_root update for free list */ |
372 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) | 390 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) |
391 | |||
392 | static inline int ocfs2_add_dir_index_credits(struct super_block *sb) | ||
393 | { | ||
394 | /* 1 block for index, 2 allocs (data, metadata), 1 clusters | ||
395 | * worth of blocks for initial extent. */ | ||
396 | return 1 + 2 * OCFS2_SUBALLOC_ALLOC + | ||
397 | ocfs2_clusters_to_blocks(sb, 1); | ||
398 | } | ||
373 | 399 | ||
374 | /* parent fe, parent block, new file entry, inode alloc fe, inode alloc | 400 | /* parent fe, parent block, new file entry, index leaf, inode alloc fe, inode |
375 | * group descriptor + mkdir/symlink blocks + quota update */ | 401 | * alloc group descriptor + mkdir/symlink blocks + dir blocks + xattr |
376 | static inline int ocfs2_mknod_credits(struct super_block *sb) | 402 | * blocks + quota update */ |
403 | static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir, | ||
404 | int xattr_credits) | ||
377 | { | 405 | { |
378 | return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS + | 406 | int dir_credits = OCFS2_DIR_LINK_ADDITIONAL_CREDITS; |
407 | |||
408 | if (is_dir) | ||
409 | dir_credits += ocfs2_add_dir_index_credits(sb); | ||
410 | |||
411 | return 4 + OCFS2_SUBALLOC_ALLOC + dir_credits + xattr_credits + | ||
379 | ocfs2_quota_trans_credits(sb); | 412 | ocfs2_quota_trans_credits(sb); |
380 | } | 413 | } |
381 | 414 | ||
@@ -388,31 +421,31 @@ static inline int ocfs2_mknod_credits(struct super_block *sb) | |||
388 | #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) | 421 | #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) |
389 | 422 | ||
390 | /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota | 423 | /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota |
391 | * update on dir */ | 424 | * update on dir + index leaf + dx root update for free list */ |
392 | static inline int ocfs2_link_credits(struct super_block *sb) | 425 | static inline int ocfs2_link_credits(struct super_block *sb) |
393 | { | 426 | { |
394 | return 2*OCFS2_INODE_UPDATE_CREDITS + 1 + | 427 | return 2*OCFS2_INODE_UPDATE_CREDITS + 3 + |
395 | ocfs2_quota_trans_credits(sb); | 428 | ocfs2_quota_trans_credits(sb); |
396 | } | 429 | } |
397 | 430 | ||
398 | /* inode + dir inode (if we unlink a dir), + dir entry block + orphan | 431 | /* inode + dir inode (if we unlink a dir), + dir entry block + orphan |
399 | * dir inode link */ | 432 | * dir inode link + dir inode index leaf + dir index root */ |
400 | static inline int ocfs2_unlink_credits(struct super_block *sb) | 433 | static inline int ocfs2_unlink_credits(struct super_block *sb) |
401 | { | 434 | { |
402 | /* The quota update from ocfs2_link_credits is unused here... */ | 435 | /* The quota update from ocfs2_link_credits is unused here... */ |
403 | return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb); | 436 | return 2 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_link_credits(sb); |
404 | } | 437 | } |
405 | 438 | ||
406 | /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + | 439 | /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + |
407 | * inode alloc group descriptor */ | 440 | * inode alloc group descriptor + orphan dir index leaf */ |
408 | #define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 1 + 1) | 441 | #define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3) |
409 | 442 | ||
410 | /* dinode update, old dir dinode update, new dir dinode update, old | 443 | /* dinode update, old dir dinode update, new dir dinode update, old |
411 | * dir dir entry, new dir dir entry, dir entry update for renaming | 444 | * dir dir entry, new dir dir entry, dir entry update for renaming |
412 | * directory + target unlink */ | 445 | * directory + target unlink + 3 x dir index leaves */ |
413 | static inline int ocfs2_rename_credits(struct super_block *sb) | 446 | static inline int ocfs2_rename_credits(struct super_block *sb) |
414 | { | 447 | { |
415 | return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb); | 448 | return 3 * OCFS2_INODE_UPDATE_CREDITS + 6 + ocfs2_unlink_credits(sb); |
416 | } | 449 | } |
417 | 450 | ||
418 | /* global bitmap dinode, group desc., relinked group, | 451 | /* global bitmap dinode, group desc., relinked group, |
@@ -422,6 +455,20 @@ static inline int ocfs2_rename_credits(struct super_block *sb) | |||
422 | + OCFS2_INODE_UPDATE_CREDITS \ | 455 | + OCFS2_INODE_UPDATE_CREDITS \ |
423 | + OCFS2_XATTR_BLOCK_UPDATE_CREDITS) | 456 | + OCFS2_XATTR_BLOCK_UPDATE_CREDITS) |
424 | 457 | ||
458 | /* inode update, removal of dx root block from allocator */ | ||
459 | #define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \ | ||
460 | OCFS2_SUBALLOC_FREE) | ||
461 | |||
462 | static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb) | ||
463 | { | ||
464 | int credits = 1 + OCFS2_SUBALLOC_ALLOC; | ||
465 | |||
466 | credits += ocfs2_clusters_to_blocks(sb, 1); | ||
467 | credits += ocfs2_quota_trans_credits(sb); | ||
468 | |||
469 | return credits; | ||
470 | } | ||
471 | |||
425 | /* | 472 | /* |
426 | * Please note that the caller must make sure that root_el is the root | 473 | * Please note that the caller must make sure that root_el is the root |
427 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | 474 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise |
@@ -457,7 +504,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, | |||
457 | 504 | ||
458 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) | 505 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) |
459 | { | 506 | { |
460 | int blocks = ocfs2_mknod_credits(sb); | 507 | int blocks = ocfs2_mknod_credits(sb, 0, 0); |
461 | 508 | ||
462 | /* links can be longer than one block so we may update many | 509 | /* links can be longer than one block so we may update many |
463 | * within our single allocated extent. */ | 510 | * within our single allocated extent. */ |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ec70cdbe77fc..bac7e6abaf47 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/bitops.h> | 30 | #include <linux/bitops.h> |
31 | #include <linux/debugfs.h> | ||
32 | 31 | ||
33 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | 32 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC |
34 | #include <cluster/masklog.h> | 33 | #include <cluster/masklog.h> |
@@ -75,84 +74,6 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 74 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
76 | struct inode *local_alloc_inode); | 75 | struct inode *local_alloc_inode); |
77 | 76 | ||
78 | #ifdef CONFIG_OCFS2_FS_STATS | ||
79 | |||
80 | static int ocfs2_la_debug_open(struct inode *inode, struct file *file) | ||
81 | { | ||
82 | file->private_data = inode->i_private; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | #define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE | ||
87 | #define LA_DEBUG_VER 1 | ||
88 | static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf, | ||
89 | size_t count, loff_t *ppos) | ||
90 | { | ||
91 | static DEFINE_MUTEX(la_debug_mutex); | ||
92 | struct ocfs2_super *osb = file->private_data; | ||
93 | int written, ret; | ||
94 | char *buf = osb->local_alloc_debug_buf; | ||
95 | |||
96 | mutex_lock(&la_debug_mutex); | ||
97 | memset(buf, 0, LA_DEBUG_BUF_SZ); | ||
98 | |||
99 | written = snprintf(buf, LA_DEBUG_BUF_SZ, | ||
100 | "0x%x\t0x%llx\t%u\t%u\t0x%x\n", | ||
101 | LA_DEBUG_VER, | ||
102 | (unsigned long long)osb->la_last_gd, | ||
103 | osb->local_alloc_default_bits, | ||
104 | osb->local_alloc_bits, osb->local_alloc_state); | ||
105 | |||
106 | ret = simple_read_from_buffer(userbuf, count, ppos, buf, written); | ||
107 | |||
108 | mutex_unlock(&la_debug_mutex); | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | static const struct file_operations ocfs2_la_debug_fops = { | ||
113 | .open = ocfs2_la_debug_open, | ||
114 | .read = ocfs2_la_debug_read, | ||
115 | }; | ||
116 | |||
117 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
118 | { | ||
119 | osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS); | ||
120 | if (!osb->local_alloc_debug_buf) | ||
121 | return; | ||
122 | |||
123 | osb->local_alloc_debug = debugfs_create_file("local_alloc_stats", | ||
124 | S_IFREG|S_IRUSR, | ||
125 | osb->osb_debug_root, | ||
126 | osb, | ||
127 | &ocfs2_la_debug_fops); | ||
128 | if (!osb->local_alloc_debug) { | ||
129 | kfree(osb->local_alloc_debug_buf); | ||
130 | osb->local_alloc_debug_buf = NULL; | ||
131 | } | ||
132 | } | ||
133 | |||
134 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
135 | { | ||
136 | if (osb->local_alloc_debug) | ||
137 | debugfs_remove(osb->local_alloc_debug); | ||
138 | |||
139 | if (osb->local_alloc_debug_buf) | ||
140 | kfree(osb->local_alloc_debug_buf); | ||
141 | |||
142 | osb->local_alloc_debug_buf = NULL; | ||
143 | osb->local_alloc_debug = NULL; | ||
144 | } | ||
145 | #else /* CONFIG_OCFS2_FS_STATS */ | ||
146 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
147 | { | ||
148 | return; | ||
149 | } | ||
150 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
151 | { | ||
152 | return; | ||
153 | } | ||
154 | #endif | ||
155 | |||
156 | static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) | 77 | static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) |
157 | { | 78 | { |
158 | return (osb->local_alloc_state == OCFS2_LA_THROTTLED || | 79 | return (osb->local_alloc_state == OCFS2_LA_THROTTLED || |
@@ -226,8 +147,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
226 | 147 | ||
227 | mlog_entry_void(); | 148 | mlog_entry_void(); |
228 | 149 | ||
229 | ocfs2_init_la_debug(osb); | ||
230 | |||
231 | if (osb->local_alloc_bits == 0) | 150 | if (osb->local_alloc_bits == 0) |
232 | goto bail; | 151 | goto bail; |
233 | 152 | ||
@@ -299,9 +218,6 @@ bail: | |||
299 | if (inode) | 218 | if (inode) |
300 | iput(inode); | 219 | iput(inode); |
301 | 220 | ||
302 | if (status < 0) | ||
303 | ocfs2_shutdown_la_debug(osb); | ||
304 | |||
305 | mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); | 221 | mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); |
306 | 222 | ||
307 | mlog_exit(status); | 223 | mlog_exit(status); |
@@ -331,8 +247,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
331 | cancel_delayed_work(&osb->la_enable_wq); | 247 | cancel_delayed_work(&osb->la_enable_wq); |
332 | flush_workqueue(ocfs2_wq); | 248 | flush_workqueue(ocfs2_wq); |
333 | 249 | ||
334 | ocfs2_shutdown_la_debug(osb); | ||
335 | |||
336 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) | 250 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) |
337 | goto out; | 251 | goto out; |
338 | 252 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 4b11762f249e..2220f93f668b 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -80,14 +80,14 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
80 | struct inode **ret_orphan_dir, | 80 | struct inode **ret_orphan_dir, |
81 | struct inode *inode, | 81 | struct inode *inode, |
82 | char *name, | 82 | char *name, |
83 | struct buffer_head **de_bh); | 83 | struct ocfs2_dir_lookup_result *lookup); |
84 | 84 | ||
85 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 85 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
86 | handle_t *handle, | 86 | handle_t *handle, |
87 | struct inode *inode, | 87 | struct inode *inode, |
88 | struct ocfs2_dinode *fe, | 88 | struct ocfs2_dinode *fe, |
89 | char *name, | 89 | char *name, |
90 | struct buffer_head *de_bh, | 90 | struct ocfs2_dir_lookup_result *lookup, |
91 | struct inode *orphan_dir_inode); | 91 | struct inode *orphan_dir_inode); |
92 | 92 | ||
93 | static int ocfs2_create_symlink_data(struct ocfs2_super *osb, | 93 | static int ocfs2_create_symlink_data(struct ocfs2_super *osb, |
@@ -228,17 +228,18 @@ static int ocfs2_mknod(struct inode *dir, | |||
228 | struct ocfs2_super *osb; | 228 | struct ocfs2_super *osb; |
229 | struct ocfs2_dinode *dirfe; | 229 | struct ocfs2_dinode *dirfe; |
230 | struct buffer_head *new_fe_bh = NULL; | 230 | struct buffer_head *new_fe_bh = NULL; |
231 | struct buffer_head *de_bh = NULL; | ||
232 | struct inode *inode = NULL; | 231 | struct inode *inode = NULL; |
233 | struct ocfs2_alloc_context *inode_ac = NULL; | 232 | struct ocfs2_alloc_context *inode_ac = NULL; |
234 | struct ocfs2_alloc_context *data_ac = NULL; | 233 | struct ocfs2_alloc_context *data_ac = NULL; |
235 | struct ocfs2_alloc_context *xattr_ac = NULL; | 234 | struct ocfs2_alloc_context *meta_ac = NULL; |
236 | int want_clusters = 0; | 235 | int want_clusters = 0; |
236 | int want_meta = 0; | ||
237 | int xattr_credits = 0; | 237 | int xattr_credits = 0; |
238 | struct ocfs2_security_xattr_info si = { | 238 | struct ocfs2_security_xattr_info si = { |
239 | .enable = 1, | 239 | .enable = 1, |
240 | }; | 240 | }; |
241 | int did_quota_inode = 0; | 241 | int did_quota_inode = 0; |
242 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | ||
242 | 243 | ||
243 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, | 244 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, |
244 | (unsigned long)dev, dentry->d_name.len, | 245 | (unsigned long)dev, dentry->d_name.len, |
@@ -254,13 +255,13 @@ static int ocfs2_mknod(struct inode *dir, | |||
254 | return status; | 255 | return status; |
255 | } | 256 | } |
256 | 257 | ||
257 | if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) { | 258 | if (S_ISDIR(mode) && (dir->i_nlink >= ocfs2_link_max(osb))) { |
258 | status = -EMLINK; | 259 | status = -EMLINK; |
259 | goto leave; | 260 | goto leave; |
260 | } | 261 | } |
261 | 262 | ||
262 | dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 263 | dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
263 | if (!dirfe->i_links_count) { | 264 | if (!ocfs2_read_links_count(dirfe)) { |
264 | /* can't make a file in a deleted directory. */ | 265 | /* can't make a file in a deleted directory. */ |
265 | status = -ENOENT; | 266 | status = -ENOENT; |
266 | goto leave; | 267 | goto leave; |
@@ -274,7 +275,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
274 | /* get a spot inside the dir. */ | 275 | /* get a spot inside the dir. */ |
275 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, | 276 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, |
276 | dentry->d_name.name, | 277 | dentry->d_name.name, |
277 | dentry->d_name.len, &de_bh); | 278 | dentry->d_name.len, &lookup); |
278 | if (status < 0) { | 279 | if (status < 0) { |
279 | mlog_errno(status); | 280 | mlog_errno(status); |
280 | goto leave; | 281 | goto leave; |
@@ -308,17 +309,29 @@ static int ocfs2_mknod(struct inode *dir, | |||
308 | 309 | ||
309 | /* calculate meta data/clusters for setting security and acl xattr */ | 310 | /* calculate meta data/clusters for setting security and acl xattr */ |
310 | status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode, | 311 | status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode, |
311 | &si, &want_clusters, | 312 | &si, &want_clusters, |
312 | &xattr_credits, &xattr_ac); | 313 | &xattr_credits, &want_meta); |
313 | if (status < 0) { | 314 | if (status < 0) { |
314 | mlog_errno(status); | 315 | mlog_errno(status); |
315 | goto leave; | 316 | goto leave; |
316 | } | 317 | } |
317 | 318 | ||
318 | /* Reserve a cluster if creating an extent based directory. */ | 319 | /* Reserve a cluster if creating an extent based directory. */ |
319 | if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) | 320 | if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) { |
320 | want_clusters += 1; | 321 | want_clusters += 1; |
321 | 322 | ||
323 | /* Dir indexing requires extra space as well */ | ||
324 | if (ocfs2_supports_indexed_dirs(osb)) | ||
325 | want_meta++; | ||
326 | } | ||
327 | |||
328 | status = ocfs2_reserve_new_metadata_blocks(osb, want_meta, &meta_ac); | ||
329 | if (status < 0) { | ||
330 | if (status != -ENOSPC) | ||
331 | mlog_errno(status); | ||
332 | goto leave; | ||
333 | } | ||
334 | |||
322 | status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac); | 335 | status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac); |
323 | if (status < 0) { | 336 | if (status < 0) { |
324 | if (status != -ENOSPC) | 337 | if (status != -ENOSPC) |
@@ -326,8 +339,9 @@ static int ocfs2_mknod(struct inode *dir, | |||
326 | goto leave; | 339 | goto leave; |
327 | } | 340 | } |
328 | 341 | ||
329 | handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) + | 342 | handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, |
330 | xattr_credits); | 343 | S_ISDIR(mode), |
344 | xattr_credits)); | ||
331 | if (IS_ERR(handle)) { | 345 | if (IS_ERR(handle)) { |
332 | status = PTR_ERR(handle); | 346 | status = PTR_ERR(handle); |
333 | handle = NULL; | 347 | handle = NULL; |
@@ -355,7 +369,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
355 | 369 | ||
356 | if (S_ISDIR(mode)) { | 370 | if (S_ISDIR(mode)) { |
357 | status = ocfs2_fill_new_dir(osb, handle, dir, inode, | 371 | status = ocfs2_fill_new_dir(osb, handle, dir, inode, |
358 | new_fe_bh, data_ac); | 372 | new_fe_bh, data_ac, meta_ac); |
359 | if (status < 0) { | 373 | if (status < 0) { |
360 | mlog_errno(status); | 374 | mlog_errno(status); |
361 | goto leave; | 375 | goto leave; |
@@ -367,7 +381,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
367 | mlog_errno(status); | 381 | mlog_errno(status); |
368 | goto leave; | 382 | goto leave; |
369 | } | 383 | } |
370 | le16_add_cpu(&dirfe->i_links_count, 1); | 384 | ocfs2_add_links_count(dirfe, 1); |
371 | status = ocfs2_journal_dirty(handle, parent_fe_bh); | 385 | status = ocfs2_journal_dirty(handle, parent_fe_bh); |
372 | if (status < 0) { | 386 | if (status < 0) { |
373 | mlog_errno(status); | 387 | mlog_errno(status); |
@@ -377,7 +391,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
377 | } | 391 | } |
378 | 392 | ||
379 | status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh, | 393 | status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh, |
380 | xattr_ac, data_ac); | 394 | meta_ac, data_ac); |
381 | if (status < 0) { | 395 | if (status < 0) { |
382 | mlog_errno(status); | 396 | mlog_errno(status); |
383 | goto leave; | 397 | goto leave; |
@@ -385,7 +399,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
385 | 399 | ||
386 | if (si.enable) { | 400 | if (si.enable) { |
387 | status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si, | 401 | status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si, |
388 | xattr_ac, data_ac); | 402 | meta_ac, data_ac); |
389 | if (status < 0) { | 403 | if (status < 0) { |
390 | mlog_errno(status); | 404 | mlog_errno(status); |
391 | goto leave; | 405 | goto leave; |
@@ -394,7 +408,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
394 | 408 | ||
395 | status = ocfs2_add_entry(handle, dentry, inode, | 409 | status = ocfs2_add_entry(handle, dentry, inode, |
396 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, | 410 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, |
397 | de_bh); | 411 | &lookup); |
398 | if (status < 0) { | 412 | if (status < 0) { |
399 | mlog_errno(status); | 413 | mlog_errno(status); |
400 | goto leave; | 414 | goto leave; |
@@ -423,11 +437,12 @@ leave: | |||
423 | mlog(0, "Disk is full\n"); | 437 | mlog(0, "Disk is full\n"); |
424 | 438 | ||
425 | brelse(new_fe_bh); | 439 | brelse(new_fe_bh); |
426 | brelse(de_bh); | ||
427 | brelse(parent_fe_bh); | 440 | brelse(parent_fe_bh); |
428 | kfree(si.name); | 441 | kfree(si.name); |
429 | kfree(si.value); | 442 | kfree(si.value); |
430 | 443 | ||
444 | ocfs2_free_dir_lookup_result(&lookup); | ||
445 | |||
431 | if ((status < 0) && inode) { | 446 | if ((status < 0) && inode) { |
432 | clear_nlink(inode); | 447 | clear_nlink(inode); |
433 | iput(inode); | 448 | iput(inode); |
@@ -439,8 +454,8 @@ leave: | |||
439 | if (data_ac) | 454 | if (data_ac) |
440 | ocfs2_free_alloc_context(data_ac); | 455 | ocfs2_free_alloc_context(data_ac); |
441 | 456 | ||
442 | if (xattr_ac) | 457 | if (meta_ac) |
443 | ocfs2_free_alloc_context(xattr_ac); | 458 | ocfs2_free_alloc_context(meta_ac); |
444 | 459 | ||
445 | mlog_exit(status); | 460 | mlog_exit(status); |
446 | 461 | ||
@@ -462,6 +477,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
462 | struct ocfs2_extent_list *fel; | 477 | struct ocfs2_extent_list *fel; |
463 | u64 fe_blkno = 0; | 478 | u64 fe_blkno = 0; |
464 | u16 suballoc_bit; | 479 | u16 suballoc_bit; |
480 | u16 feat; | ||
465 | 481 | ||
466 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, | 482 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, |
467 | inode->i_mode, (unsigned long)dev, dentry->d_name.len, | 483 | inode->i_mode, (unsigned long)dev, dentry->d_name.len, |
@@ -469,8 +485,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
469 | 485 | ||
470 | *new_fe_bh = NULL; | 486 | *new_fe_bh = NULL; |
471 | 487 | ||
472 | status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit, | 488 | status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh, |
473 | &fe_blkno); | 489 | inode_ac, &suballoc_bit, &fe_blkno); |
474 | if (status < 0) { | 490 | if (status < 0) { |
475 | mlog_errno(status); | 491 | mlog_errno(status); |
476 | goto leave; | 492 | goto leave; |
@@ -513,7 +529,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
513 | fe->i_mode = cpu_to_le16(inode->i_mode); | 529 | fe->i_mode = cpu_to_le16(inode->i_mode); |
514 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | 530 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
515 | fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); | 531 | fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); |
516 | fe->i_links_count = cpu_to_le16(inode->i_nlink); | 532 | |
533 | ocfs2_set_links_count(fe, inode->i_nlink); | ||
517 | 534 | ||
518 | fe->i_last_eb_blk = 0; | 535 | fe->i_last_eb_blk = 0; |
519 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); | 536 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); |
@@ -525,11 +542,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
525 | fe->i_dtime = 0; | 542 | fe->i_dtime = 0; |
526 | 543 | ||
527 | /* | 544 | /* |
528 | * If supported, directories start with inline data. | 545 | * If supported, directories start with inline data. If inline |
546 | * isn't supported, but indexing is, we start them as indexed. | ||
529 | */ | 547 | */ |
548 | feat = le16_to_cpu(fe->i_dyn_features); | ||
530 | if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) { | 549 | if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) { |
531 | u16 feat = le16_to_cpu(fe->i_dyn_features); | ||
532 | |||
533 | fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL); | 550 | fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL); |
534 | 551 | ||
535 | fe->id2.i_data.id_count = cpu_to_le16( | 552 | fe->id2.i_data.id_count = cpu_to_le16( |
@@ -608,9 +625,9 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
608 | int err; | 625 | int err; |
609 | struct buffer_head *fe_bh = NULL; | 626 | struct buffer_head *fe_bh = NULL; |
610 | struct buffer_head *parent_fe_bh = NULL; | 627 | struct buffer_head *parent_fe_bh = NULL; |
611 | struct buffer_head *de_bh = NULL; | ||
612 | struct ocfs2_dinode *fe = NULL; | 628 | struct ocfs2_dinode *fe = NULL; |
613 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 629 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
630 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | ||
614 | 631 | ||
615 | mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, | 632 | mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, |
616 | old_dentry->d_name.len, old_dentry->d_name.name, | 633 | old_dentry->d_name.len, old_dentry->d_name.name, |
@@ -638,7 +655,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
638 | 655 | ||
639 | err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, | 656 | err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, |
640 | dentry->d_name.name, | 657 | dentry->d_name.name, |
641 | dentry->d_name.len, &de_bh); | 658 | dentry->d_name.len, &lookup); |
642 | if (err < 0) { | 659 | if (err < 0) { |
643 | mlog_errno(err); | 660 | mlog_errno(err); |
644 | goto out; | 661 | goto out; |
@@ -652,7 +669,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
652 | } | 669 | } |
653 | 670 | ||
654 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 671 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
655 | if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) { | 672 | if (ocfs2_read_links_count(fe) >= ocfs2_link_max(osb)) { |
656 | err = -EMLINK; | 673 | err = -EMLINK; |
657 | goto out_unlock_inode; | 674 | goto out_unlock_inode; |
658 | } | 675 | } |
@@ -674,13 +691,13 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
674 | 691 | ||
675 | inc_nlink(inode); | 692 | inc_nlink(inode); |
676 | inode->i_ctime = CURRENT_TIME; | 693 | inode->i_ctime = CURRENT_TIME; |
677 | fe->i_links_count = cpu_to_le16(inode->i_nlink); | 694 | ocfs2_set_links_count(fe, inode->i_nlink); |
678 | fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | 695 | fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
679 | fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 696 | fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
680 | 697 | ||
681 | err = ocfs2_journal_dirty(handle, fe_bh); | 698 | err = ocfs2_journal_dirty(handle, fe_bh); |
682 | if (err < 0) { | 699 | if (err < 0) { |
683 | le16_add_cpu(&fe->i_links_count, -1); | 700 | ocfs2_add_links_count(fe, -1); |
684 | drop_nlink(inode); | 701 | drop_nlink(inode); |
685 | mlog_errno(err); | 702 | mlog_errno(err); |
686 | goto out_commit; | 703 | goto out_commit; |
@@ -688,9 +705,9 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
688 | 705 | ||
689 | err = ocfs2_add_entry(handle, dentry, inode, | 706 | err = ocfs2_add_entry(handle, dentry, inode, |
690 | OCFS2_I(inode)->ip_blkno, | 707 | OCFS2_I(inode)->ip_blkno, |
691 | parent_fe_bh, de_bh); | 708 | parent_fe_bh, &lookup); |
692 | if (err) { | 709 | if (err) { |
693 | le16_add_cpu(&fe->i_links_count, -1); | 710 | ocfs2_add_links_count(fe, -1); |
694 | drop_nlink(inode); | 711 | drop_nlink(inode); |
695 | mlog_errno(err); | 712 | mlog_errno(err); |
696 | goto out_commit; | 713 | goto out_commit; |
@@ -714,10 +731,11 @@ out_unlock_inode: | |||
714 | out: | 731 | out: |
715 | ocfs2_inode_unlock(dir, 1); | 732 | ocfs2_inode_unlock(dir, 1); |
716 | 733 | ||
717 | brelse(de_bh); | ||
718 | brelse(fe_bh); | 734 | brelse(fe_bh); |
719 | brelse(parent_fe_bh); | 735 | brelse(parent_fe_bh); |
720 | 736 | ||
737 | ocfs2_free_dir_lookup_result(&lookup); | ||
738 | |||
721 | mlog_exit(err); | 739 | mlog_exit(err); |
722 | 740 | ||
723 | return err; | 741 | return err; |
@@ -766,10 +784,9 @@ static int ocfs2_unlink(struct inode *dir, | |||
766 | struct buffer_head *fe_bh = NULL; | 784 | struct buffer_head *fe_bh = NULL; |
767 | struct buffer_head *parent_node_bh = NULL; | 785 | struct buffer_head *parent_node_bh = NULL; |
768 | handle_t *handle = NULL; | 786 | handle_t *handle = NULL; |
769 | struct ocfs2_dir_entry *dirent = NULL; | ||
770 | struct buffer_head *dirent_bh = NULL; | ||
771 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; | 787 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; |
772 | struct buffer_head *orphan_entry_bh = NULL; | 788 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
789 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | ||
773 | 790 | ||
774 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, | 791 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, |
775 | dentry->d_name.len, dentry->d_name.name); | 792 | dentry->d_name.len, dentry->d_name.name); |
@@ -791,8 +808,8 @@ static int ocfs2_unlink(struct inode *dir, | |||
791 | } | 808 | } |
792 | 809 | ||
793 | status = ocfs2_find_files_on_disk(dentry->d_name.name, | 810 | status = ocfs2_find_files_on_disk(dentry->d_name.name, |
794 | dentry->d_name.len, &blkno, | 811 | dentry->d_name.len, &blkno, dir, |
795 | dir, &dirent_bh, &dirent); | 812 | &lookup); |
796 | if (status < 0) { | 813 | if (status < 0) { |
797 | if (status != -ENOENT) | 814 | if (status != -ENOENT) |
798 | mlog_errno(status); | 815 | mlog_errno(status); |
@@ -817,10 +834,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
817 | child_locked = 1; | 834 | child_locked = 1; |
818 | 835 | ||
819 | if (S_ISDIR(inode->i_mode)) { | 836 | if (S_ISDIR(inode->i_mode)) { |
820 | if (!ocfs2_empty_dir(inode)) { | 837 | if (inode->i_nlink != 2 || !ocfs2_empty_dir(inode)) { |
821 | status = -ENOTEMPTY; | ||
822 | goto leave; | ||
823 | } else if (inode->i_nlink != 2) { | ||
824 | status = -ENOTEMPTY; | 838 | status = -ENOTEMPTY; |
825 | goto leave; | 839 | goto leave; |
826 | } | 840 | } |
@@ -836,8 +850,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
836 | 850 | ||
837 | if (inode_is_unlinkable(inode)) { | 851 | if (inode_is_unlinkable(inode)) { |
838 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode, | 852 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode, |
839 | orphan_name, | 853 | orphan_name, &orphan_insert); |
840 | &orphan_entry_bh); | ||
841 | if (status < 0) { | 854 | if (status < 0) { |
842 | mlog_errno(status); | 855 | mlog_errno(status); |
843 | goto leave; | 856 | goto leave; |
@@ -863,7 +876,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
863 | 876 | ||
864 | if (inode_is_unlinkable(inode)) { | 877 | if (inode_is_unlinkable(inode)) { |
865 | status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, | 878 | status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, |
866 | orphan_entry_bh, orphan_dir); | 879 | &orphan_insert, orphan_dir); |
867 | if (status < 0) { | 880 | if (status < 0) { |
868 | mlog_errno(status); | 881 | mlog_errno(status); |
869 | goto leave; | 882 | goto leave; |
@@ -871,7 +884,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
871 | } | 884 | } |
872 | 885 | ||
873 | /* delete the name from the parent dir */ | 886 | /* delete the name from the parent dir */ |
874 | status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh); | 887 | status = ocfs2_delete_entry(handle, dir, &lookup); |
875 | if (status < 0) { | 888 | if (status < 0) { |
876 | mlog_errno(status); | 889 | mlog_errno(status); |
877 | goto leave; | 890 | goto leave; |
@@ -880,7 +893,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
880 | if (S_ISDIR(inode->i_mode)) | 893 | if (S_ISDIR(inode->i_mode)) |
881 | drop_nlink(inode); | 894 | drop_nlink(inode); |
882 | drop_nlink(inode); | 895 | drop_nlink(inode); |
883 | fe->i_links_count = cpu_to_le16(inode->i_nlink); | 896 | ocfs2_set_links_count(fe, inode->i_nlink); |
884 | 897 | ||
885 | status = ocfs2_journal_dirty(handle, fe_bh); | 898 | status = ocfs2_journal_dirty(handle, fe_bh); |
886 | if (status < 0) { | 899 | if (status < 0) { |
@@ -916,9 +929,10 @@ leave: | |||
916 | } | 929 | } |
917 | 930 | ||
918 | brelse(fe_bh); | 931 | brelse(fe_bh); |
919 | brelse(dirent_bh); | ||
920 | brelse(parent_node_bh); | 932 | brelse(parent_node_bh); |
921 | brelse(orphan_entry_bh); | 933 | |
934 | ocfs2_free_dir_lookup_result(&orphan_insert); | ||
935 | ocfs2_free_dir_lookup_result(&lookup); | ||
922 | 936 | ||
923 | mlog_exit(status); | 937 | mlog_exit(status); |
924 | 938 | ||
@@ -1004,8 +1018,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1004 | struct inode *new_dir, | 1018 | struct inode *new_dir, |
1005 | struct dentry *new_dentry) | 1019 | struct dentry *new_dentry) |
1006 | { | 1020 | { |
1007 | int status = 0, rename_lock = 0, parents_locked = 0; | 1021 | int status = 0, rename_lock = 0, parents_locked = 0, target_exists = 0; |
1008 | int old_child_locked = 0, new_child_locked = 0; | 1022 | int old_child_locked = 0, new_child_locked = 0, update_dot_dot = 0; |
1009 | struct inode *old_inode = old_dentry->d_inode; | 1023 | struct inode *old_inode = old_dentry->d_inode; |
1010 | struct inode *new_inode = new_dentry->d_inode; | 1024 | struct inode *new_inode = new_dentry->d_inode; |
1011 | struct inode *orphan_dir = NULL; | 1025 | struct inode *orphan_dir = NULL; |
@@ -1020,13 +1034,13 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1020 | handle_t *handle = NULL; | 1034 | handle_t *handle = NULL; |
1021 | struct buffer_head *old_dir_bh = NULL; | 1035 | struct buffer_head *old_dir_bh = NULL; |
1022 | struct buffer_head *new_dir_bh = NULL; | 1036 | struct buffer_head *new_dir_bh = NULL; |
1023 | struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL, | ||
1024 | *new_de = NULL; | ||
1025 | struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above | ||
1026 | struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, | ||
1027 | // this is the 1st dirent bh | ||
1028 | nlink_t old_dir_nlink = old_dir->i_nlink; | 1037 | nlink_t old_dir_nlink = old_dir->i_nlink; |
1029 | struct ocfs2_dinode *old_di; | 1038 | struct ocfs2_dinode *old_di; |
1039 | struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, }; | ||
1040 | struct ocfs2_dir_lookup_result target_lookup_res = { NULL, }; | ||
1041 | struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, }; | ||
1042 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | ||
1043 | struct ocfs2_dir_lookup_result target_insert = { NULL, }; | ||
1030 | 1044 | ||
1031 | /* At some point it might be nice to break this function up a | 1045 | /* At some point it might be nice to break this function up a |
1032 | * bit. */ | 1046 | * bit. */ |
@@ -1108,9 +1122,10 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1108 | if (S_ISDIR(old_inode->i_mode)) { | 1122 | if (S_ISDIR(old_inode->i_mode)) { |
1109 | u64 old_inode_parent; | 1123 | u64 old_inode_parent; |
1110 | 1124 | ||
1125 | update_dot_dot = 1; | ||
1111 | status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent, | 1126 | status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent, |
1112 | old_inode, &old_inode_de_bh, | 1127 | old_inode, |
1113 | &old_inode_dot_dot_de); | 1128 | &old_inode_dot_dot_res); |
1114 | if (status) { | 1129 | if (status) { |
1115 | status = -EIO; | 1130 | status = -EIO; |
1116 | goto bail; | 1131 | goto bail; |
@@ -1122,7 +1137,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1122 | } | 1137 | } |
1123 | 1138 | ||
1124 | if (!new_inode && new_dir != old_dir && | 1139 | if (!new_inode && new_dir != old_dir && |
1125 | new_dir->i_nlink >= OCFS2_LINK_MAX) { | 1140 | new_dir->i_nlink >= ocfs2_link_max(osb)) { |
1126 | status = -EMLINK; | 1141 | status = -EMLINK; |
1127 | goto bail; | 1142 | goto bail; |
1128 | } | 1143 | } |
@@ -1151,8 +1166,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1151 | * to delete it */ | 1166 | * to delete it */ |
1152 | status = ocfs2_find_files_on_disk(new_dentry->d_name.name, | 1167 | status = ocfs2_find_files_on_disk(new_dentry->d_name.name, |
1153 | new_dentry->d_name.len, | 1168 | new_dentry->d_name.len, |
1154 | &newfe_blkno, new_dir, &new_de_bh, | 1169 | &newfe_blkno, new_dir, |
1155 | &new_de); | 1170 | &target_lookup_res); |
1156 | /* The only error we allow here is -ENOENT because the new | 1171 | /* The only error we allow here is -ENOENT because the new |
1157 | * file not existing is perfectly valid. */ | 1172 | * file not existing is perfectly valid. */ |
1158 | if ((status < 0) && (status != -ENOENT)) { | 1173 | if ((status < 0) && (status != -ENOENT)) { |
@@ -1161,8 +1176,10 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1161 | mlog_errno(status); | 1176 | mlog_errno(status); |
1162 | goto bail; | 1177 | goto bail; |
1163 | } | 1178 | } |
1179 | if (status == 0) | ||
1180 | target_exists = 1; | ||
1164 | 1181 | ||
1165 | if (!new_de && new_inode) { | 1182 | if (!target_exists && new_inode) { |
1166 | /* | 1183 | /* |
1167 | * Target was unlinked by another node while we were | 1184 | * Target was unlinked by another node while we were |
1168 | * waiting to get to ocfs2_rename(). There isn't | 1185 | * waiting to get to ocfs2_rename(). There isn't |
@@ -1175,7 +1192,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1175 | 1192 | ||
1176 | /* In case we need to overwrite an existing file, we blow it | 1193 | /* In case we need to overwrite an existing file, we blow it |
1177 | * away first */ | 1194 | * away first */ |
1178 | if (new_de) { | 1195 | if (target_exists) { |
1179 | /* VFS didn't think there existed an inode here, but | 1196 | /* VFS didn't think there existed an inode here, but |
1180 | * someone else in the cluster must have raced our | 1197 | * someone else in the cluster must have raced our |
1181 | * rename to create one. Today we error cleanly, in | 1198 | * rename to create one. Today we error cleanly, in |
@@ -1216,8 +1233,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1216 | 1233 | ||
1217 | newfe = (struct ocfs2_dinode *) newfe_bh->b_data; | 1234 | newfe = (struct ocfs2_dinode *) newfe_bh->b_data; |
1218 | 1235 | ||
1219 | mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu " | 1236 | mlog(0, "aha rename over existing... new_blkno=%llu " |
1220 | "newfebh=%p bhblocknr=%llu\n", new_de, | 1237 | "newfebh=%p bhblocknr=%llu\n", |
1221 | (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ? | 1238 | (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ? |
1222 | (unsigned long long)newfe_bh->b_blocknr : 0ULL); | 1239 | (unsigned long long)newfe_bh->b_blocknr : 0ULL); |
1223 | 1240 | ||
@@ -1225,7 +1242,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1225 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | 1242 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
1226 | new_inode, | 1243 | new_inode, |
1227 | orphan_name, | 1244 | orphan_name, |
1228 | &orphan_entry_bh); | 1245 | &orphan_insert); |
1229 | if (status < 0) { | 1246 | if (status < 0) { |
1230 | mlog_errno(status); | 1247 | mlog_errno(status); |
1231 | goto bail; | 1248 | goto bail; |
@@ -1243,7 +1260,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1243 | status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh, | 1260 | status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh, |
1244 | new_dentry->d_name.name, | 1261 | new_dentry->d_name.name, |
1245 | new_dentry->d_name.len, | 1262 | new_dentry->d_name.len, |
1246 | &insert_entry_bh); | 1263 | &target_insert); |
1247 | if (status < 0) { | 1264 | if (status < 0) { |
1248 | mlog_errno(status); | 1265 | mlog_errno(status); |
1249 | goto bail; | 1266 | goto bail; |
@@ -1258,10 +1275,10 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1258 | goto bail; | 1275 | goto bail; |
1259 | } | 1276 | } |
1260 | 1277 | ||
1261 | if (new_de) { | 1278 | if (target_exists) { |
1262 | if (S_ISDIR(new_inode->i_mode)) { | 1279 | if (S_ISDIR(new_inode->i_mode)) { |
1263 | if (!ocfs2_empty_dir(new_inode) || | 1280 | if (new_inode->i_nlink != 2 || |
1264 | new_inode->i_nlink != 2) { | 1281 | !ocfs2_empty_dir(new_inode)) { |
1265 | status = -ENOTEMPTY; | 1282 | status = -ENOTEMPTY; |
1266 | goto bail; | 1283 | goto bail; |
1267 | } | 1284 | } |
@@ -1274,10 +1291,10 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1274 | } | 1291 | } |
1275 | 1292 | ||
1276 | if (S_ISDIR(new_inode->i_mode) || | 1293 | if (S_ISDIR(new_inode->i_mode) || |
1277 | (newfe->i_links_count == cpu_to_le16(1))){ | 1294 | (ocfs2_read_links_count(newfe) == 1)) { |
1278 | status = ocfs2_orphan_add(osb, handle, new_inode, | 1295 | status = ocfs2_orphan_add(osb, handle, new_inode, |
1279 | newfe, orphan_name, | 1296 | newfe, orphan_name, |
1280 | orphan_entry_bh, orphan_dir); | 1297 | &orphan_insert, orphan_dir); |
1281 | if (status < 0) { | 1298 | if (status < 0) { |
1282 | mlog_errno(status); | 1299 | mlog_errno(status); |
1283 | goto bail; | 1300 | goto bail; |
@@ -1285,8 +1302,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1285 | } | 1302 | } |
1286 | 1303 | ||
1287 | /* change the dirent to point to the correct inode */ | 1304 | /* change the dirent to point to the correct inode */ |
1288 | status = ocfs2_update_entry(new_dir, handle, new_de_bh, | 1305 | status = ocfs2_update_entry(new_dir, handle, &target_lookup_res, |
1289 | new_de, old_inode); | 1306 | old_inode); |
1290 | if (status < 0) { | 1307 | if (status < 0) { |
1291 | mlog_errno(status); | 1308 | mlog_errno(status); |
1292 | goto bail; | 1309 | goto bail; |
@@ -1294,9 +1311,9 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1294 | new_dir->i_version++; | 1311 | new_dir->i_version++; |
1295 | 1312 | ||
1296 | if (S_ISDIR(new_inode->i_mode)) | 1313 | if (S_ISDIR(new_inode->i_mode)) |
1297 | newfe->i_links_count = 0; | 1314 | ocfs2_set_links_count(newfe, 0); |
1298 | else | 1315 | else |
1299 | le16_add_cpu(&newfe->i_links_count, -1); | 1316 | ocfs2_add_links_count(newfe, -1); |
1300 | 1317 | ||
1301 | status = ocfs2_journal_dirty(handle, newfe_bh); | 1318 | status = ocfs2_journal_dirty(handle, newfe_bh); |
1302 | if (status < 0) { | 1319 | if (status < 0) { |
@@ -1307,7 +1324,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1307 | /* if the name was not found in new_dir, add it now */ | 1324 | /* if the name was not found in new_dir, add it now */ |
1308 | status = ocfs2_add_entry(handle, new_dentry, old_inode, | 1325 | status = ocfs2_add_entry(handle, new_dentry, old_inode, |
1309 | OCFS2_I(old_inode)->ip_blkno, | 1326 | OCFS2_I(old_inode)->ip_blkno, |
1310 | new_dir_bh, insert_entry_bh); | 1327 | new_dir_bh, &target_insert); |
1311 | } | 1328 | } |
1312 | 1329 | ||
1313 | old_inode->i_ctime = CURRENT_TIME; | 1330 | old_inode->i_ctime = CURRENT_TIME; |
@@ -1334,15 +1351,13 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1334 | * because the insert might have changed the type of directory | 1351 | * because the insert might have changed the type of directory |
1335 | * we're dealing with. | 1352 | * we're dealing with. |
1336 | */ | 1353 | */ |
1337 | old_de_bh = ocfs2_find_entry(old_dentry->d_name.name, | 1354 | status = ocfs2_find_entry(old_dentry->d_name.name, |
1338 | old_dentry->d_name.len, | 1355 | old_dentry->d_name.len, old_dir, |
1339 | old_dir, &old_de); | 1356 | &old_entry_lookup); |
1340 | if (!old_de_bh) { | 1357 | if (status) |
1341 | status = -EIO; | ||
1342 | goto bail; | 1358 | goto bail; |
1343 | } | ||
1344 | 1359 | ||
1345 | status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh); | 1360 | status = ocfs2_delete_entry(handle, old_dir, &old_entry_lookup); |
1346 | if (status < 0) { | 1361 | if (status < 0) { |
1347 | mlog_errno(status); | 1362 | mlog_errno(status); |
1348 | goto bail; | 1363 | goto bail; |
@@ -1353,9 +1368,10 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1353 | new_inode->i_ctime = CURRENT_TIME; | 1368 | new_inode->i_ctime = CURRENT_TIME; |
1354 | } | 1369 | } |
1355 | old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; | 1370 | old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; |
1356 | if (old_inode_de_bh) { | 1371 | |
1357 | status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh, | 1372 | if (update_dot_dot) { |
1358 | old_inode_dot_dot_de, new_dir); | 1373 | status = ocfs2_update_entry(old_inode, handle, |
1374 | &old_inode_dot_dot_res, new_dir); | ||
1359 | old_dir->i_nlink--; | 1375 | old_dir->i_nlink--; |
1360 | if (new_inode) { | 1376 | if (new_inode) { |
1361 | new_inode->i_nlink--; | 1377 | new_inode->i_nlink--; |
@@ -1391,14 +1407,13 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1391 | } else { | 1407 | } else { |
1392 | struct ocfs2_dinode *fe; | 1408 | struct ocfs2_dinode *fe; |
1393 | status = ocfs2_journal_access_di(handle, old_dir, | 1409 | status = ocfs2_journal_access_di(handle, old_dir, |
1394 | old_dir_bh, | 1410 | old_dir_bh, |
1395 | OCFS2_JOURNAL_ACCESS_WRITE); | 1411 | OCFS2_JOURNAL_ACCESS_WRITE); |
1396 | fe = (struct ocfs2_dinode *) old_dir_bh->b_data; | 1412 | fe = (struct ocfs2_dinode *) old_dir_bh->b_data; |
1397 | fe->i_links_count = cpu_to_le16(old_dir->i_nlink); | 1413 | ocfs2_set_links_count(fe, old_dir->i_nlink); |
1398 | status = ocfs2_journal_dirty(handle, old_dir_bh); | 1414 | status = ocfs2_journal_dirty(handle, old_dir_bh); |
1399 | } | 1415 | } |
1400 | } | 1416 | } |
1401 | |||
1402 | ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); | 1417 | ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); |
1403 | status = 0; | 1418 | status = 0; |
1404 | bail: | 1419 | bail: |
@@ -1429,13 +1444,17 @@ bail: | |||
1429 | 1444 | ||
1430 | if (new_inode) | 1445 | if (new_inode) |
1431 | iput(new_inode); | 1446 | iput(new_inode); |
1447 | |||
1448 | ocfs2_free_dir_lookup_result(&target_lookup_res); | ||
1449 | ocfs2_free_dir_lookup_result(&old_entry_lookup); | ||
1450 | ocfs2_free_dir_lookup_result(&old_inode_dot_dot_res); | ||
1451 | ocfs2_free_dir_lookup_result(&orphan_insert); | ||
1452 | ocfs2_free_dir_lookup_result(&target_insert); | ||
1453 | |||
1432 | brelse(newfe_bh); | 1454 | brelse(newfe_bh); |
1433 | brelse(old_inode_bh); | 1455 | brelse(old_inode_bh); |
1434 | brelse(old_dir_bh); | 1456 | brelse(old_dir_bh); |
1435 | brelse(new_dir_bh); | 1457 | brelse(new_dir_bh); |
1436 | brelse(new_de_bh); | ||
1437 | brelse(old_de_bh); | ||
1438 | brelse(old_inode_de_bh); | ||
1439 | brelse(orphan_entry_bh); | 1458 | brelse(orphan_entry_bh); |
1440 | brelse(insert_entry_bh); | 1459 | brelse(insert_entry_bh); |
1441 | 1460 | ||
@@ -1558,7 +1577,6 @@ static int ocfs2_symlink(struct inode *dir, | |||
1558 | struct inode *inode = NULL; | 1577 | struct inode *inode = NULL; |
1559 | struct super_block *sb; | 1578 | struct super_block *sb; |
1560 | struct buffer_head *new_fe_bh = NULL; | 1579 | struct buffer_head *new_fe_bh = NULL; |
1561 | struct buffer_head *de_bh = NULL; | ||
1562 | struct buffer_head *parent_fe_bh = NULL; | 1580 | struct buffer_head *parent_fe_bh = NULL; |
1563 | struct ocfs2_dinode *fe = NULL; | 1581 | struct ocfs2_dinode *fe = NULL; |
1564 | struct ocfs2_dinode *dirfe; | 1582 | struct ocfs2_dinode *dirfe; |
@@ -1572,6 +1590,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1572 | .enable = 1, | 1590 | .enable = 1, |
1573 | }; | 1591 | }; |
1574 | int did_quota = 0, did_quota_inode = 0; | 1592 | int did_quota = 0, did_quota_inode = 0; |
1593 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | ||
1575 | 1594 | ||
1576 | mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, | 1595 | mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, |
1577 | dentry, symname, dentry->d_name.len, dentry->d_name.name); | 1596 | dentry, symname, dentry->d_name.len, dentry->d_name.name); |
@@ -1592,7 +1611,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1592 | } | 1611 | } |
1593 | 1612 | ||
1594 | dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 1613 | dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
1595 | if (!dirfe->i_links_count) { | 1614 | if (!ocfs2_read_links_count(dirfe)) { |
1596 | /* can't make a file in a deleted directory. */ | 1615 | /* can't make a file in a deleted directory. */ |
1597 | status = -ENOENT; | 1616 | status = -ENOENT; |
1598 | goto bail; | 1617 | goto bail; |
@@ -1605,7 +1624,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1605 | 1624 | ||
1606 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, | 1625 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, |
1607 | dentry->d_name.name, | 1626 | dentry->d_name.name, |
1608 | dentry->d_name.len, &de_bh); | 1627 | dentry->d_name.len, &lookup); |
1609 | if (status < 0) { | 1628 | if (status < 0) { |
1610 | mlog_errno(status); | 1629 | mlog_errno(status); |
1611 | goto bail; | 1630 | goto bail; |
@@ -1744,7 +1763,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1744 | 1763 | ||
1745 | status = ocfs2_add_entry(handle, dentry, inode, | 1764 | status = ocfs2_add_entry(handle, dentry, inode, |
1746 | le64_to_cpu(fe->i_blkno), parent_fe_bh, | 1765 | le64_to_cpu(fe->i_blkno), parent_fe_bh, |
1747 | de_bh); | 1766 | &lookup); |
1748 | if (status < 0) { | 1767 | if (status < 0) { |
1749 | mlog_errno(status); | 1768 | mlog_errno(status); |
1750 | goto bail; | 1769 | goto bail; |
@@ -1772,9 +1791,9 @@ bail: | |||
1772 | 1791 | ||
1773 | brelse(new_fe_bh); | 1792 | brelse(new_fe_bh); |
1774 | brelse(parent_fe_bh); | 1793 | brelse(parent_fe_bh); |
1775 | brelse(de_bh); | ||
1776 | kfree(si.name); | 1794 | kfree(si.name); |
1777 | kfree(si.value); | 1795 | kfree(si.value); |
1796 | ocfs2_free_dir_lookup_result(&lookup); | ||
1778 | if (inode_ac) | 1797 | if (inode_ac) |
1779 | ocfs2_free_alloc_context(inode_ac); | 1798 | ocfs2_free_alloc_context(inode_ac); |
1780 | if (data_ac) | 1799 | if (data_ac) |
@@ -1826,7 +1845,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
1826 | struct inode **ret_orphan_dir, | 1845 | struct inode **ret_orphan_dir, |
1827 | struct inode *inode, | 1846 | struct inode *inode, |
1828 | char *name, | 1847 | char *name, |
1829 | struct buffer_head **de_bh) | 1848 | struct ocfs2_dir_lookup_result *lookup) |
1830 | { | 1849 | { |
1831 | struct inode *orphan_dir_inode; | 1850 | struct inode *orphan_dir_inode; |
1832 | struct buffer_head *orphan_dir_bh = NULL; | 1851 | struct buffer_head *orphan_dir_bh = NULL; |
@@ -1857,7 +1876,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
1857 | 1876 | ||
1858 | status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | 1877 | status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, |
1859 | orphan_dir_bh, name, | 1878 | orphan_dir_bh, name, |
1860 | OCFS2_ORPHAN_NAMELEN, de_bh); | 1879 | OCFS2_ORPHAN_NAMELEN, lookup); |
1861 | if (status < 0) { | 1880 | if (status < 0) { |
1862 | ocfs2_inode_unlock(orphan_dir_inode, 1); | 1881 | ocfs2_inode_unlock(orphan_dir_inode, 1); |
1863 | 1882 | ||
@@ -1884,7 +1903,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1884 | struct inode *inode, | 1903 | struct inode *inode, |
1885 | struct ocfs2_dinode *fe, | 1904 | struct ocfs2_dinode *fe, |
1886 | char *name, | 1905 | char *name, |
1887 | struct buffer_head *de_bh, | 1906 | struct ocfs2_dir_lookup_result *lookup, |
1888 | struct inode *orphan_dir_inode) | 1907 | struct inode *orphan_dir_inode) |
1889 | { | 1908 | { |
1890 | struct buffer_head *orphan_dir_bh = NULL; | 1909 | struct buffer_head *orphan_dir_bh = NULL; |
@@ -1910,8 +1929,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1910 | * underneath us... */ | 1929 | * underneath us... */ |
1911 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; | 1930 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; |
1912 | if (S_ISDIR(inode->i_mode)) | 1931 | if (S_ISDIR(inode->i_mode)) |
1913 | le16_add_cpu(&orphan_fe->i_links_count, 1); | 1932 | ocfs2_add_links_count(orphan_fe, 1); |
1914 | orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count); | 1933 | orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); |
1915 | 1934 | ||
1916 | status = ocfs2_journal_dirty(handle, orphan_dir_bh); | 1935 | status = ocfs2_journal_dirty(handle, orphan_dir_bh); |
1917 | if (status < 0) { | 1936 | if (status < 0) { |
@@ -1922,7 +1941,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1922 | status = __ocfs2_add_entry(handle, orphan_dir_inode, name, | 1941 | status = __ocfs2_add_entry(handle, orphan_dir_inode, name, |
1923 | OCFS2_ORPHAN_NAMELEN, inode, | 1942 | OCFS2_ORPHAN_NAMELEN, inode, |
1924 | OCFS2_I(inode)->ip_blkno, | 1943 | OCFS2_I(inode)->ip_blkno, |
1925 | orphan_dir_bh, de_bh); | 1944 | orphan_dir_bh, lookup); |
1926 | if (status < 0) { | 1945 | if (status < 0) { |
1927 | mlog_errno(status); | 1946 | mlog_errno(status); |
1928 | goto leave; | 1947 | goto leave; |
@@ -1955,8 +1974,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
1955 | char name[OCFS2_ORPHAN_NAMELEN + 1]; | 1974 | char name[OCFS2_ORPHAN_NAMELEN + 1]; |
1956 | struct ocfs2_dinode *orphan_fe; | 1975 | struct ocfs2_dinode *orphan_fe; |
1957 | int status = 0; | 1976 | int status = 0; |
1958 | struct buffer_head *target_de_bh = NULL; | 1977 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
1959 | struct ocfs2_dir_entry *target_de = NULL; | ||
1960 | 1978 | ||
1961 | mlog_entry_void(); | 1979 | mlog_entry_void(); |
1962 | 1980 | ||
@@ -1971,17 +1989,15 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
1971 | OCFS2_ORPHAN_NAMELEN); | 1989 | OCFS2_ORPHAN_NAMELEN); |
1972 | 1990 | ||
1973 | /* find it's spot in the orphan directory */ | 1991 | /* find it's spot in the orphan directory */ |
1974 | target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, | 1992 | status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode, |
1975 | orphan_dir_inode, &target_de); | 1993 | &lookup); |
1976 | if (!target_de_bh) { | 1994 | if (status) { |
1977 | status = -ENOENT; | ||
1978 | mlog_errno(status); | 1995 | mlog_errno(status); |
1979 | goto leave; | 1996 | goto leave; |
1980 | } | 1997 | } |
1981 | 1998 | ||
1982 | /* remove it from the orphan directory */ | 1999 | /* remove it from the orphan directory */ |
1983 | status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de, | 2000 | status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup); |
1984 | target_de_bh); | ||
1985 | if (status < 0) { | 2001 | if (status < 0) { |
1986 | mlog_errno(status); | 2002 | mlog_errno(status); |
1987 | goto leave; | 2003 | goto leave; |
@@ -1997,8 +2013,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
1997 | /* do the i_nlink dance! :) */ | 2013 | /* do the i_nlink dance! :) */ |
1998 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; | 2014 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; |
1999 | if (S_ISDIR(inode->i_mode)) | 2015 | if (S_ISDIR(inode->i_mode)) |
2000 | le16_add_cpu(&orphan_fe->i_links_count, -1); | 2016 | ocfs2_add_links_count(orphan_fe, -1); |
2001 | orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count); | 2017 | orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); |
2002 | 2018 | ||
2003 | status = ocfs2_journal_dirty(handle, orphan_dir_bh); | 2019 | status = ocfs2_journal_dirty(handle, orphan_dir_bh); |
2004 | if (status < 0) { | 2020 | if (status < 0) { |
@@ -2007,7 +2023,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
2007 | } | 2023 | } |
2008 | 2024 | ||
2009 | leave: | 2025 | leave: |
2010 | brelse(target_de_bh); | 2026 | ocfs2_free_dir_lookup_result(&lookup); |
2011 | 2027 | ||
2012 | mlog_exit(status); | 2028 | mlog_exit(status); |
2013 | return status; | 2029 | return status; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 946d3c34b90b..1386281950db 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -209,6 +209,7 @@ enum ocfs2_mount_options | |||
209 | struct ocfs2_journal; | 209 | struct ocfs2_journal; |
210 | struct ocfs2_slot_info; | 210 | struct ocfs2_slot_info; |
211 | struct ocfs2_recovery_map; | 211 | struct ocfs2_recovery_map; |
212 | struct ocfs2_replay_map; | ||
212 | struct ocfs2_quota_recovery; | 213 | struct ocfs2_quota_recovery; |
213 | struct ocfs2_dentry_lock; | 214 | struct ocfs2_dentry_lock; |
214 | struct ocfs2_super | 215 | struct ocfs2_super |
@@ -264,6 +265,7 @@ struct ocfs2_super | |||
264 | atomic_t vol_state; | 265 | atomic_t vol_state; |
265 | struct mutex recovery_lock; | 266 | struct mutex recovery_lock; |
266 | struct ocfs2_recovery_map *recovery_map; | 267 | struct ocfs2_recovery_map *recovery_map; |
268 | struct ocfs2_replay_map *replay_map; | ||
267 | struct task_struct *recovery_thread_task; | 269 | struct task_struct *recovery_thread_task; |
268 | int disable_recovery; | 270 | int disable_recovery; |
269 | wait_queue_head_t checkpoint_event; | 271 | wait_queue_head_t checkpoint_event; |
@@ -287,11 +289,6 @@ struct ocfs2_super | |||
287 | 289 | ||
288 | u64 la_last_gd; | 290 | u64 la_last_gd; |
289 | 291 | ||
290 | #ifdef CONFIG_OCFS2_FS_STATS | ||
291 | struct dentry *local_alloc_debug; | ||
292 | char *local_alloc_debug_buf; | ||
293 | #endif | ||
294 | |||
295 | /* Next three fields are for local node slot recovery during | 292 | /* Next three fields are for local node slot recovery during |
296 | * mount. */ | 293 | * mount. */ |
297 | int dirty; | 294 | int dirty; |
@@ -305,9 +302,11 @@ struct ocfs2_super | |||
305 | struct ocfs2_cluster_connection *cconn; | 302 | struct ocfs2_cluster_connection *cconn; |
306 | struct ocfs2_lock_res osb_super_lockres; | 303 | struct ocfs2_lock_res osb_super_lockres; |
307 | struct ocfs2_lock_res osb_rename_lockres; | 304 | struct ocfs2_lock_res osb_rename_lockres; |
305 | struct ocfs2_lock_res osb_nfs_sync_lockres; | ||
308 | struct ocfs2_dlm_debug *osb_dlm_debug; | 306 | struct ocfs2_dlm_debug *osb_dlm_debug; |
309 | 307 | ||
310 | struct dentry *osb_debug_root; | 308 | struct dentry *osb_debug_root; |
309 | struct dentry *osb_ctxt; | ||
311 | 310 | ||
312 | wait_queue_head_t recovery_event; | 311 | wait_queue_head_t recovery_event; |
313 | 312 | ||
@@ -344,6 +343,12 @@ struct ocfs2_super | |||
344 | 343 | ||
345 | /* used to protect metaecc calculation check of xattr. */ | 344 | /* used to protect metaecc calculation check of xattr. */ |
346 | spinlock_t osb_xattr_lock; | 345 | spinlock_t osb_xattr_lock; |
346 | |||
347 | unsigned int osb_dx_mask; | ||
348 | u32 osb_dx_seed[4]; | ||
349 | |||
350 | /* the group we used to allocate inodes. */ | ||
351 | u64 osb_inode_alloc_group; | ||
347 | }; | 352 | }; |
348 | 353 | ||
349 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 354 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
@@ -402,6 +407,51 @@ static inline int ocfs2_meta_ecc(struct ocfs2_super *osb) | |||
402 | return 0; | 407 | return 0; |
403 | } | 408 | } |
404 | 409 | ||
410 | static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb) | ||
411 | { | ||
412 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) | ||
413 | return 1; | ||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) | ||
418 | { | ||
419 | if (ocfs2_supports_indexed_dirs(osb)) | ||
420 | return OCFS2_DX_LINK_MAX; | ||
421 | return OCFS2_LINK_MAX; | ||
422 | } | ||
423 | |||
424 | static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di) | ||
425 | { | ||
426 | u32 nlink = le16_to_cpu(di->i_links_count); | ||
427 | u32 hi = le16_to_cpu(di->i_links_count_hi); | ||
428 | |||
429 | if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL)) | ||
430 | nlink |= (hi << OCFS2_LINKS_HI_SHIFT); | ||
431 | |||
432 | return nlink; | ||
433 | } | ||
434 | |||
435 | static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink) | ||
436 | { | ||
437 | u16 lo, hi; | ||
438 | |||
439 | lo = nlink; | ||
440 | hi = nlink >> OCFS2_LINKS_HI_SHIFT; | ||
441 | |||
442 | di->i_links_count = cpu_to_le16(lo); | ||
443 | di->i_links_count_hi = cpu_to_le16(hi); | ||
444 | } | ||
445 | |||
446 | static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n) | ||
447 | { | ||
448 | u32 links = ocfs2_read_links_count(di); | ||
449 | |||
450 | links += n; | ||
451 | |||
452 | ocfs2_set_links_count(di, links); | ||
453 | } | ||
454 | |||
405 | /* set / clear functions because cluster events can make these happen | 455 | /* set / clear functions because cluster events can make these happen |
406 | * in parallel so we want the transitions to be atomic. this also | 456 | * in parallel so we want the transitions to be atomic. this also |
407 | * means that any future flags osb_flags must be protected by spinlock | 457 | * means that any future flags osb_flags must be protected by spinlock |
@@ -482,6 +532,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) | |||
482 | #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ | 532 | #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ |
483 | (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) | 533 | (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) |
484 | 534 | ||
535 | #define OCFS2_IS_VALID_DX_ROOT(ptr) \ | ||
536 | (!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE)) | ||
537 | |||
538 | #define OCFS2_IS_VALID_DX_LEAF(ptr) \ | ||
539 | (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) | ||
540 | |||
485 | static inline unsigned long ino_from_blkno(struct super_block *sb, | 541 | static inline unsigned long ino_from_blkno(struct super_block *sb, |
486 | u64 blkno) | 542 | u64 blkno) |
487 | { | 543 | { |
@@ -532,6 +588,16 @@ static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb, | |||
532 | return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; | 588 | return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; |
533 | } | 589 | } |
534 | 590 | ||
591 | static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb, | ||
592 | u64 blocks) | ||
593 | { | ||
594 | int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; | ||
595 | unsigned int clusters; | ||
596 | |||
597 | clusters = ocfs2_blocks_to_clusters(sb, blocks); | ||
598 | return (u64)clusters << bits; | ||
599 | } | ||
600 | |||
535 | static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, | 601 | static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, |
536 | u64 bytes) | 602 | u64 bytes) |
537 | { | 603 | { |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 2332ef740f4f..7ab6e9e5e77c 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -66,6 +66,8 @@ | |||
66 | #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" | 66 | #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" |
67 | #define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" | 67 | #define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" |
68 | #define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" | 68 | #define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" |
69 | #define OCFS2_DX_ROOT_SIGNATURE "DXDIR01" | ||
70 | #define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1" | ||
69 | 71 | ||
70 | /* Compatibility flags */ | 72 | /* Compatibility flags */ |
71 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ | 73 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ |
@@ -95,7 +97,8 @@ | |||
95 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | 97 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ |
96 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ | 98 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ |
97 | | OCFS2_FEATURE_INCOMPAT_XATTR \ | 99 | | OCFS2_FEATURE_INCOMPAT_XATTR \ |
98 | | OCFS2_FEATURE_INCOMPAT_META_ECC) | 100 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ |
101 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) | ||
99 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | 102 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ |
100 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | 103 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ |
101 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | 104 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) |
@@ -151,6 +154,9 @@ | |||
151 | /* Support for extended attributes */ | 154 | /* Support for extended attributes */ |
152 | #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 | 155 | #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 |
153 | 156 | ||
157 | /* Support for indexed directores */ | ||
158 | #define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS 0x0400 | ||
159 | |||
154 | /* Metadata checksum and error correction */ | 160 | /* Metadata checksum and error correction */ |
155 | #define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 | 161 | #define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 |
156 | 162 | ||
@@ -411,8 +417,12 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | |||
411 | #define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \ | 417 | #define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \ |
412 | OCFS2_DIR_ROUND) & \ | 418 | OCFS2_DIR_ROUND) & \ |
413 | ~OCFS2_DIR_ROUND) | 419 | ~OCFS2_DIR_ROUND) |
420 | #define OCFS2_DIR_MIN_REC_LEN OCFS2_DIR_REC_LEN(1) | ||
414 | 421 | ||
415 | #define OCFS2_LINK_MAX 32000 | 422 | #define OCFS2_LINK_MAX 32000 |
423 | #define OCFS2_DX_LINK_MAX ((1U << 31) - 1U) | ||
424 | #define OCFS2_LINKS_HI_SHIFT 16 | ||
425 | #define OCFS2_DX_ENTRIES_MAX (0xffffffffU) | ||
416 | 426 | ||
417 | #define S_SHIFT 12 | 427 | #define S_SHIFT 12 |
418 | static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = { | 428 | static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = { |
@@ -628,8 +638,9 @@ struct ocfs2_super_block { | |||
628 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size | 638 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size |
629 | for this fs*/ | 639 | for this fs*/ |
630 | __le16 s_reserved0; | 640 | __le16 s_reserved0; |
631 | __le32 s_reserved1; | 641 | __le32 s_dx_seed[3]; /* seed[0-2] for dx dir hash. |
632 | /*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */ | 642 | * s_uuid_hash serves as seed[3]. */ |
643 | /*C0*/ __le64 s_reserved2[15]; /* Fill out superblock */ | ||
633 | /*140*/ | 644 | /*140*/ |
634 | 645 | ||
635 | /* | 646 | /* |
@@ -679,7 +690,7 @@ struct ocfs2_dinode { | |||
679 | belongs to */ | 690 | belongs to */ |
680 | __le16 i_suballoc_bit; /* Bit offset in suballocator | 691 | __le16 i_suballoc_bit; /* Bit offset in suballocator |
681 | block group */ | 692 | block group */ |
682 | /*10*/ __le16 i_reserved0; | 693 | /*10*/ __le16 i_links_count_hi; /* High 16 bits of links count */ |
683 | __le16 i_xattr_inline_size; | 694 | __le16 i_xattr_inline_size; |
684 | __le32 i_clusters; /* Cluster count */ | 695 | __le32 i_clusters; /* Cluster count */ |
685 | __le32 i_uid; /* Owner UID */ | 696 | __le32 i_uid; /* Owner UID */ |
@@ -705,7 +716,8 @@ struct ocfs2_dinode { | |||
705 | __le16 i_dyn_features; | 716 | __le16 i_dyn_features; |
706 | __le64 i_xattr_loc; | 717 | __le64 i_xattr_loc; |
707 | /*80*/ struct ocfs2_block_check i_check; /* Error checking */ | 718 | /*80*/ struct ocfs2_block_check i_check; /* Error checking */ |
708 | /*88*/ __le64 i_reserved2[6]; | 719 | /*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ |
720 | __le64 i_reserved2[5]; | ||
709 | /*B8*/ union { | 721 | /*B8*/ union { |
710 | __le64 i_pad1; /* Generic way to refer to this | 722 | __le64 i_pad1; /* Generic way to refer to this |
711 | 64bit union */ | 723 | 64bit union */ |
@@ -781,6 +793,90 @@ struct ocfs2_dir_block_trailer { | |||
781 | /*40*/ | 793 | /*40*/ |
782 | }; | 794 | }; |
783 | 795 | ||
796 | /* | ||
797 | * A directory entry in the indexed tree. We don't store the full name here, | ||
798 | * but instead provide a pointer to the full dirent in the unindexed tree. | ||
799 | * | ||
800 | * We also store name_len here so as to reduce the number of leaf blocks we | ||
801 | * need to search in case of collisions. | ||
802 | */ | ||
803 | struct ocfs2_dx_entry { | ||
804 | __le32 dx_major_hash; /* Used to find logical | ||
805 | * cluster in index */ | ||
806 | __le32 dx_minor_hash; /* Lower bits used to find | ||
807 | * block in cluster */ | ||
808 | __le64 dx_dirent_blk; /* Physical block in unindexed | ||
809 | * tree holding this dirent. */ | ||
810 | }; | ||
811 | |||
812 | struct ocfs2_dx_entry_list { | ||
813 | __le32 de_reserved; | ||
814 | __le16 de_count; /* Maximum number of entries | ||
815 | * possible in de_entries */ | ||
816 | __le16 de_num_used; /* Current number of | ||
817 | * de_entries entries */ | ||
818 | struct ocfs2_dx_entry de_entries[0]; /* Indexed dir entries | ||
819 | * in a packed array of | ||
820 | * length de_num_used */ | ||
821 | }; | ||
822 | |||
823 | #define OCFS2_DX_FLAG_INLINE 0x01 | ||
824 | |||
825 | /* | ||
826 | * A directory indexing block. Each indexed directory has one of these, | ||
827 | * pointed to by ocfs2_dinode. | ||
828 | * | ||
829 | * This block stores an indexed btree root, and a set of free space | ||
830 | * start-of-list pointers. | ||
831 | */ | ||
832 | struct ocfs2_dx_root_block { | ||
833 | __u8 dr_signature[8]; /* Signature for verification */ | ||
834 | struct ocfs2_block_check dr_check; /* Error checking */ | ||
835 | __le16 dr_suballoc_slot; /* Slot suballocator this | ||
836 | * block belongs to. */ | ||
837 | __le16 dr_suballoc_bit; /* Bit offset in suballocator | ||
838 | * block group */ | ||
839 | __le32 dr_fs_generation; /* Must match super block */ | ||
840 | __le64 dr_blkno; /* Offset on disk, in blocks */ | ||
841 | __le64 dr_last_eb_blk; /* Pointer to last | ||
842 | * extent block */ | ||
843 | __le32 dr_clusters; /* Clusters allocated | ||
844 | * to the indexed tree. */ | ||
845 | __u8 dr_flags; /* OCFS2_DX_FLAG_* flags */ | ||
846 | __u8 dr_reserved0; | ||
847 | __le16 dr_reserved1; | ||
848 | __le64 dr_dir_blkno; /* Pointer to parent inode */ | ||
849 | __le32 dr_num_entries; /* Total number of | ||
850 | * names stored in | ||
851 | * this directory.*/ | ||
852 | __le32 dr_reserved2; | ||
853 | __le64 dr_free_blk; /* Pointer to head of free | ||
854 | * unindexed block list. */ | ||
855 | __le64 dr_reserved3[15]; | ||
856 | union { | ||
857 | struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 | ||
858 | * bits for maximum space | ||
859 | * efficiency. */ | ||
860 | struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of | ||
861 | * entries. We grow out | ||
862 | * to extents if this | ||
863 | * gets too big. */ | ||
864 | }; | ||
865 | }; | ||
866 | |||
867 | /* | ||
868 | * The header of a leaf block in the indexed tree. | ||
869 | */ | ||
870 | struct ocfs2_dx_leaf { | ||
871 | __u8 dl_signature[8];/* Signature for verification */ | ||
872 | struct ocfs2_block_check dl_check; /* Error checking */ | ||
873 | __le64 dl_blkno; /* Offset on disk, in blocks */ | ||
874 | __le32 dl_fs_generation;/* Must match super block */ | ||
875 | __le32 dl_reserved0; | ||
876 | __le64 dl_reserved1; | ||
877 | struct ocfs2_dx_entry_list dl_list; | ||
878 | }; | ||
879 | |||
784 | /* | 880 | /* |
785 | * On disk allocator group structure for OCFS2 | 881 | * On disk allocator group structure for OCFS2 |
786 | */ | 882 | */ |
@@ -1112,6 +1208,16 @@ static inline int ocfs2_extent_recs_per_inode_with_xattr( | |||
1112 | return size / sizeof(struct ocfs2_extent_rec); | 1208 | return size / sizeof(struct ocfs2_extent_rec); |
1113 | } | 1209 | } |
1114 | 1210 | ||
1211 | static inline int ocfs2_extent_recs_per_dx_root(struct super_block *sb) | ||
1212 | { | ||
1213 | int size; | ||
1214 | |||
1215 | size = sb->s_blocksize - | ||
1216 | offsetof(struct ocfs2_dx_root_block, dr_list.l_recs); | ||
1217 | |||
1218 | return size / sizeof(struct ocfs2_extent_rec); | ||
1219 | } | ||
1220 | |||
1115 | static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) | 1221 | static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) |
1116 | { | 1222 | { |
1117 | int size; | 1223 | int size; |
@@ -1132,6 +1238,26 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb) | |||
1132 | return size / sizeof(struct ocfs2_extent_rec); | 1238 | return size / sizeof(struct ocfs2_extent_rec); |
1133 | } | 1239 | } |
1134 | 1240 | ||
1241 | static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb) | ||
1242 | { | ||
1243 | int size; | ||
1244 | |||
1245 | size = sb->s_blocksize - | ||
1246 | offsetof(struct ocfs2_dx_leaf, dl_list.de_entries); | ||
1247 | |||
1248 | return size / sizeof(struct ocfs2_dx_entry); | ||
1249 | } | ||
1250 | |||
1251 | static inline int ocfs2_dx_entries_per_root(struct super_block *sb) | ||
1252 | { | ||
1253 | int size; | ||
1254 | |||
1255 | size = sb->s_blocksize - | ||
1256 | offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries); | ||
1257 | |||
1258 | return size / sizeof(struct ocfs2_dx_entry); | ||
1259 | } | ||
1260 | |||
1135 | static inline u16 ocfs2_local_alloc_size(struct super_block *sb) | 1261 | static inline u16 ocfs2_local_alloc_size(struct super_block *sb) |
1136 | { | 1262 | { |
1137 | u16 size; | 1263 | u16 size; |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index eb6f50c9ceca..a53ce87481bf 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -47,6 +47,7 @@ enum ocfs2_lock_type { | |||
47 | OCFS2_LOCK_TYPE_OPEN, | 47 | OCFS2_LOCK_TYPE_OPEN, |
48 | OCFS2_LOCK_TYPE_FLOCK, | 48 | OCFS2_LOCK_TYPE_FLOCK, |
49 | OCFS2_LOCK_TYPE_QINFO, | 49 | OCFS2_LOCK_TYPE_QINFO, |
50 | OCFS2_LOCK_TYPE_NFS_SYNC, | ||
50 | OCFS2_NUM_LOCK_TYPES | 51 | OCFS2_NUM_LOCK_TYPES |
51 | }; | 52 | }; |
52 | 53 | ||
@@ -81,6 +82,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
81 | case OCFS2_LOCK_TYPE_QINFO: | 82 | case OCFS2_LOCK_TYPE_QINFO: |
82 | c = 'Q'; | 83 | c = 'Q'; |
83 | break; | 84 | break; |
85 | case OCFS2_LOCK_TYPE_NFS_SYNC: | ||
86 | c = 'Y'; | ||
87 | break; | ||
84 | default: | 88 | default: |
85 | c = '\0'; | 89 | c = '\0'; |
86 | } | 90 | } |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index a69628603e18..b4ca5911caaf 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -48,7 +48,8 @@ | |||
48 | #include "buffer_head_io.h" | 48 | #include "buffer_head_io.h" |
49 | 49 | ||
50 | #define NOT_ALLOC_NEW_GROUP 0 | 50 | #define NOT_ALLOC_NEW_GROUP 0 |
51 | #define ALLOC_NEW_GROUP 1 | 51 | #define ALLOC_NEW_GROUP 0x1 |
52 | #define ALLOC_GROUPS_FROM_GLOBAL 0x2 | ||
52 | 53 | ||
53 | #define OCFS2_MAX_INODES_TO_STEAL 1024 | 54 | #define OCFS2_MAX_INODES_TO_STEAL 1024 |
54 | 55 | ||
@@ -64,7 +65,9 @@ static int ocfs2_block_group_fill(handle_t *handle, | |||
64 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | 65 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
65 | struct inode *alloc_inode, | 66 | struct inode *alloc_inode, |
66 | struct buffer_head *bh, | 67 | struct buffer_head *bh, |
67 | u64 max_block); | 68 | u64 max_block, |
69 | u64 *last_alloc_group, | ||
70 | int flags); | ||
68 | 71 | ||
69 | static int ocfs2_cluster_group_search(struct inode *inode, | 72 | static int ocfs2_cluster_group_search(struct inode *inode, |
70 | struct buffer_head *group_bh, | 73 | struct buffer_head *group_bh, |
@@ -116,6 +119,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |||
116 | u16 *bg_bit_off); | 119 | u16 *bg_bit_off); |
117 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, | 120 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, |
118 | u32 bits_wanted, u64 max_block, | 121 | u32 bits_wanted, u64 max_block, |
122 | int flags, | ||
119 | struct ocfs2_alloc_context **ac); | 123 | struct ocfs2_alloc_context **ac); |
120 | 124 | ||
121 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | 125 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
@@ -403,7 +407,9 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) | |||
403 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | 407 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
404 | struct inode *alloc_inode, | 408 | struct inode *alloc_inode, |
405 | struct buffer_head *bh, | 409 | struct buffer_head *bh, |
406 | u64 max_block) | 410 | u64 max_block, |
411 | u64 *last_alloc_group, | ||
412 | int flags) | ||
407 | { | 413 | { |
408 | int status, credits; | 414 | int status, credits; |
409 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; | 415 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; |
@@ -423,7 +429,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
423 | cl = &fe->id2.i_chain; | 429 | cl = &fe->id2.i_chain; |
424 | status = ocfs2_reserve_clusters_with_limit(osb, | 430 | status = ocfs2_reserve_clusters_with_limit(osb, |
425 | le16_to_cpu(cl->cl_cpg), | 431 | le16_to_cpu(cl->cl_cpg), |
426 | max_block, &ac); | 432 | max_block, flags, &ac); |
427 | if (status < 0) { | 433 | if (status < 0) { |
428 | if (status != -ENOSPC) | 434 | if (status != -ENOSPC) |
429 | mlog_errno(status); | 435 | mlog_errno(status); |
@@ -440,6 +446,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
440 | goto bail; | 446 | goto bail; |
441 | } | 447 | } |
442 | 448 | ||
449 | if (last_alloc_group && *last_alloc_group != 0) { | ||
450 | mlog(0, "use old allocation group %llu for block group alloc\n", | ||
451 | (unsigned long long)*last_alloc_group); | ||
452 | ac->ac_last_group = *last_alloc_group; | ||
453 | } | ||
443 | status = ocfs2_claim_clusters(osb, | 454 | status = ocfs2_claim_clusters(osb, |
444 | handle, | 455 | handle, |
445 | ac, | 456 | ac, |
@@ -514,6 +525,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
514 | alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); | 525 | alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); |
515 | 526 | ||
516 | status = 0; | 527 | status = 0; |
528 | |||
529 | /* save the new last alloc group so that the caller can cache it. */ | ||
530 | if (last_alloc_group) | ||
531 | *last_alloc_group = ac->ac_last_group; | ||
532 | |||
517 | bail: | 533 | bail: |
518 | if (handle) | 534 | if (handle) |
519 | ocfs2_commit_trans(osb, handle); | 535 | ocfs2_commit_trans(osb, handle); |
@@ -531,7 +547,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
531 | struct ocfs2_alloc_context *ac, | 547 | struct ocfs2_alloc_context *ac, |
532 | int type, | 548 | int type, |
533 | u32 slot, | 549 | u32 slot, |
534 | int alloc_new_group) | 550 | u64 *last_alloc_group, |
551 | int flags) | ||
535 | { | 552 | { |
536 | int status; | 553 | int status; |
537 | u32 bits_wanted = ac->ac_bits_wanted; | 554 | u32 bits_wanted = ac->ac_bits_wanted; |
@@ -587,7 +604,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
587 | goto bail; | 604 | goto bail; |
588 | } | 605 | } |
589 | 606 | ||
590 | if (alloc_new_group != ALLOC_NEW_GROUP) { | 607 | if (!(flags & ALLOC_NEW_GROUP)) { |
591 | mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " | 608 | mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " |
592 | "and we don't alloc a new group for it.\n", | 609 | "and we don't alloc a new group for it.\n", |
593 | slot, bits_wanted, free_bits); | 610 | slot, bits_wanted, free_bits); |
@@ -596,7 +613,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
596 | } | 613 | } |
597 | 614 | ||
598 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh, | 615 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh, |
599 | ac->ac_max_block); | 616 | ac->ac_max_block, |
617 | last_alloc_group, flags); | ||
600 | if (status < 0) { | 618 | if (status < 0) { |
601 | if (status != -ENOSPC) | 619 | if (status != -ENOSPC) |
602 | mlog_errno(status); | 620 | mlog_errno(status); |
@@ -640,7 +658,7 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, | |||
640 | 658 | ||
641 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), | 659 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), |
642 | EXTENT_ALLOC_SYSTEM_INODE, | 660 | EXTENT_ALLOC_SYSTEM_INODE, |
643 | slot, ALLOC_NEW_GROUP); | 661 | slot, NULL, ALLOC_NEW_GROUP); |
644 | if (status < 0) { | 662 | if (status < 0) { |
645 | if (status != -ENOSPC) | 663 | if (status != -ENOSPC) |
646 | mlog_errno(status); | 664 | mlog_errno(status); |
@@ -686,7 +704,8 @@ static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | |||
686 | 704 | ||
687 | status = ocfs2_reserve_suballoc_bits(osb, ac, | 705 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
688 | INODE_ALLOC_SYSTEM_INODE, | 706 | INODE_ALLOC_SYSTEM_INODE, |
689 | slot, NOT_ALLOC_NEW_GROUP); | 707 | slot, NULL, |
708 | NOT_ALLOC_NEW_GROUP); | ||
690 | if (status >= 0) { | 709 | if (status >= 0) { |
691 | ocfs2_set_inode_steal_slot(osb, slot); | 710 | ocfs2_set_inode_steal_slot(osb, slot); |
692 | break; | 711 | break; |
@@ -703,6 +722,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
703 | { | 722 | { |
704 | int status; | 723 | int status; |
705 | s16 slot = ocfs2_get_inode_steal_slot(osb); | 724 | s16 slot = ocfs2_get_inode_steal_slot(osb); |
725 | u64 alloc_group; | ||
706 | 726 | ||
707 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | 727 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
708 | if (!(*ac)) { | 728 | if (!(*ac)) { |
@@ -738,12 +758,22 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
738 | goto inode_steal; | 758 | goto inode_steal; |
739 | 759 | ||
740 | atomic_set(&osb->s_num_inodes_stolen, 0); | 760 | atomic_set(&osb->s_num_inodes_stolen, 0); |
761 | alloc_group = osb->osb_inode_alloc_group; | ||
741 | status = ocfs2_reserve_suballoc_bits(osb, *ac, | 762 | status = ocfs2_reserve_suballoc_bits(osb, *ac, |
742 | INODE_ALLOC_SYSTEM_INODE, | 763 | INODE_ALLOC_SYSTEM_INODE, |
743 | osb->slot_num, ALLOC_NEW_GROUP); | 764 | osb->slot_num, |
765 | &alloc_group, | ||
766 | ALLOC_NEW_GROUP | | ||
767 | ALLOC_GROUPS_FROM_GLOBAL); | ||
744 | if (status >= 0) { | 768 | if (status >= 0) { |
745 | status = 0; | 769 | status = 0; |
746 | 770 | ||
771 | spin_lock(&osb->osb_lock); | ||
772 | osb->osb_inode_alloc_group = alloc_group; | ||
773 | spin_unlock(&osb->osb_lock); | ||
774 | mlog(0, "after reservation, new allocation group is " | ||
775 | "%llu\n", (unsigned long long)alloc_group); | ||
776 | |||
747 | /* | 777 | /* |
748 | * Some inodes must be freed by us, so try to allocate | 778 | * Some inodes must be freed by us, so try to allocate |
749 | * from our own next time. | 779 | * from our own next time. |
@@ -790,7 +820,7 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | |||
790 | 820 | ||
791 | status = ocfs2_reserve_suballoc_bits(osb, ac, | 821 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
792 | GLOBAL_BITMAP_SYSTEM_INODE, | 822 | GLOBAL_BITMAP_SYSTEM_INODE, |
793 | OCFS2_INVALID_SLOT, | 823 | OCFS2_INVALID_SLOT, NULL, |
794 | ALLOC_NEW_GROUP); | 824 | ALLOC_NEW_GROUP); |
795 | if (status < 0 && status != -ENOSPC) { | 825 | if (status < 0 && status != -ENOSPC) { |
796 | mlog_errno(status); | 826 | mlog_errno(status); |
@@ -806,6 +836,7 @@ bail: | |||
806 | * things a bit. */ | 836 | * things a bit. */ |
807 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, | 837 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, |
808 | u32 bits_wanted, u64 max_block, | 838 | u32 bits_wanted, u64 max_block, |
839 | int flags, | ||
809 | struct ocfs2_alloc_context **ac) | 840 | struct ocfs2_alloc_context **ac) |
810 | { | 841 | { |
811 | int status; | 842 | int status; |
@@ -823,7 +854,8 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, | |||
823 | (*ac)->ac_max_block = max_block; | 854 | (*ac)->ac_max_block = max_block; |
824 | 855 | ||
825 | status = -ENOSPC; | 856 | status = -ENOSPC; |
826 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { | 857 | if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) && |
858 | ocfs2_alloc_should_use_local(osb, bits_wanted)) { | ||
827 | status = ocfs2_reserve_local_alloc_bits(osb, | 859 | status = ocfs2_reserve_local_alloc_bits(osb, |
828 | bits_wanted, | 860 | bits_wanted, |
829 | *ac); | 861 | *ac); |
@@ -861,7 +893,8 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, | |||
861 | u32 bits_wanted, | 893 | u32 bits_wanted, |
862 | struct ocfs2_alloc_context **ac) | 894 | struct ocfs2_alloc_context **ac) |
863 | { | 895 | { |
864 | return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac); | 896 | return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, |
897 | ALLOC_NEW_GROUP, ac); | ||
865 | } | 898 | } |
866 | 899 | ||
867 | /* | 900 | /* |
@@ -1618,8 +1651,41 @@ bail: | |||
1618 | return status; | 1651 | return status; |
1619 | } | 1652 | } |
1620 | 1653 | ||
1654 | static void ocfs2_init_inode_ac_group(struct inode *dir, | ||
1655 | struct buffer_head *parent_fe_bh, | ||
1656 | struct ocfs2_alloc_context *ac) | ||
1657 | { | ||
1658 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data; | ||
1659 | /* | ||
1660 | * Try to allocate inodes from some specific group. | ||
1661 | * | ||
1662 | * If the parent dir has recorded the last group used in allocation, | ||
1663 | * cool, use it. Otherwise if we try to allocate new inode from the | ||
1664 | * same slot the parent dir belongs to, use the same chunk. | ||
1665 | * | ||
1666 | * We are very careful here to avoid the mistake of setting | ||
1667 | * ac_last_group to a group descriptor from a different (unlocked) slot. | ||
1668 | */ | ||
1669 | if (OCFS2_I(dir)->ip_last_used_group && | ||
1670 | OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot) | ||
1671 | ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group; | ||
1672 | else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot) | ||
1673 | ac->ac_last_group = ocfs2_which_suballoc_group( | ||
1674 | le64_to_cpu(fe->i_blkno), | ||
1675 | le16_to_cpu(fe->i_suballoc_bit)); | ||
1676 | } | ||
1677 | |||
1678 | static inline void ocfs2_save_inode_ac_group(struct inode *dir, | ||
1679 | struct ocfs2_alloc_context *ac) | ||
1680 | { | ||
1681 | OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group; | ||
1682 | OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; | ||
1683 | } | ||
1684 | |||
1621 | int ocfs2_claim_new_inode(struct ocfs2_super *osb, | 1685 | int ocfs2_claim_new_inode(struct ocfs2_super *osb, |
1622 | handle_t *handle, | 1686 | handle_t *handle, |
1687 | struct inode *dir, | ||
1688 | struct buffer_head *parent_fe_bh, | ||
1623 | struct ocfs2_alloc_context *ac, | 1689 | struct ocfs2_alloc_context *ac, |
1624 | u16 *suballoc_bit, | 1690 | u16 *suballoc_bit, |
1625 | u64 *fe_blkno) | 1691 | u64 *fe_blkno) |
@@ -1635,6 +1701,8 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb, | |||
1635 | BUG_ON(ac->ac_bits_wanted != 1); | 1701 | BUG_ON(ac->ac_bits_wanted != 1); |
1636 | BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); | 1702 | BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); |
1637 | 1703 | ||
1704 | ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); | ||
1705 | |||
1638 | status = ocfs2_claim_suballoc_bits(osb, | 1706 | status = ocfs2_claim_suballoc_bits(osb, |
1639 | ac, | 1707 | ac, |
1640 | handle, | 1708 | handle, |
@@ -1653,6 +1721,7 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb, | |||
1653 | 1721 | ||
1654 | *fe_blkno = bg_blkno + (u64) (*suballoc_bit); | 1722 | *fe_blkno = bg_blkno + (u64) (*suballoc_bit); |
1655 | ac->ac_bits_given++; | 1723 | ac->ac_bits_given++; |
1724 | ocfs2_save_inode_ac_group(dir, ac); | ||
1656 | status = 0; | 1725 | status = 0; |
1657 | bail: | 1726 | bail: |
1658 | mlog_exit(status); | 1727 | mlog_exit(status); |
@@ -2116,3 +2185,162 @@ out: | |||
2116 | 2185 | ||
2117 | return ret; | 2186 | return ret; |
2118 | } | 2187 | } |
2188 | |||
2189 | /* | ||
2190 | * Read the inode specified by blkno to get suballoc_slot and | ||
2191 | * suballoc_bit. | ||
2192 | */ | ||
2193 | static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, | ||
2194 | u16 *suballoc_slot, u16 *suballoc_bit) | ||
2195 | { | ||
2196 | int status; | ||
2197 | struct buffer_head *inode_bh = NULL; | ||
2198 | struct ocfs2_dinode *inode_fe; | ||
2199 | |||
2200 | mlog_entry("blkno: %llu\n", blkno); | ||
2201 | |||
2202 | /* dirty read disk */ | ||
2203 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); | ||
2204 | if (status < 0) { | ||
2205 | mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status); | ||
2206 | goto bail; | ||
2207 | } | ||
2208 | |||
2209 | inode_fe = (struct ocfs2_dinode *) inode_bh->b_data; | ||
2210 | if (!OCFS2_IS_VALID_DINODE(inode_fe)) { | ||
2211 | mlog(ML_ERROR, "invalid inode %llu requested\n", blkno); | ||
2212 | status = -EINVAL; | ||
2213 | goto bail; | ||
2214 | } | ||
2215 | |||
2216 | if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT && | ||
2217 | (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) { | ||
2218 | mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n", | ||
2219 | blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); | ||
2220 | status = -EINVAL; | ||
2221 | goto bail; | ||
2222 | } | ||
2223 | |||
2224 | if (suballoc_slot) | ||
2225 | *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); | ||
2226 | if (suballoc_bit) | ||
2227 | *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); | ||
2228 | |||
2229 | bail: | ||
2230 | brelse(inode_bh); | ||
2231 | |||
2232 | mlog_exit(status); | ||
2233 | return status; | ||
2234 | } | ||
2235 | |||
2236 | /* | ||
2237 | * test whether bit is SET in allocator bitmap or not. on success, 0 | ||
2238 | * is returned and *res is 1 for SET; 0 otherwise. when fails, errno | ||
2239 | * is returned and *res is meaningless. Call this after you have | ||
2240 | * cluster locked against suballoc, or you may get a result based on | ||
2241 | * non-up2date contents | ||
2242 | */ | ||
2243 | static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | ||
2244 | struct inode *suballoc, | ||
2245 | struct buffer_head *alloc_bh, u64 blkno, | ||
2246 | u16 bit, int *res) | ||
2247 | { | ||
2248 | struct ocfs2_dinode *alloc_fe; | ||
2249 | struct ocfs2_group_desc *group; | ||
2250 | struct buffer_head *group_bh = NULL; | ||
2251 | u64 bg_blkno; | ||
2252 | int status; | ||
2253 | |||
2254 | mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit); | ||
2255 | |||
2256 | alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; | ||
2257 | if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { | ||
2258 | mlog(ML_ERROR, "suballoc bit %u out of range of %u\n", | ||
2259 | (unsigned int)bit, | ||
2260 | ocfs2_bits_per_group(&alloc_fe->id2.i_chain)); | ||
2261 | status = -EINVAL; | ||
2262 | goto bail; | ||
2263 | } | ||
2264 | |||
2265 | bg_blkno = ocfs2_which_suballoc_group(blkno, bit); | ||
2266 | status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, | ||
2267 | &group_bh); | ||
2268 | if (status < 0) { | ||
2269 | mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status); | ||
2270 | goto bail; | ||
2271 | } | ||
2272 | |||
2273 | group = (struct ocfs2_group_desc *) group_bh->b_data; | ||
2274 | *res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap); | ||
2275 | |||
2276 | bail: | ||
2277 | brelse(group_bh); | ||
2278 | |||
2279 | mlog_exit(status); | ||
2280 | return status; | ||
2281 | } | ||
2282 | |||
2283 | /* | ||
2284 | * Test if the bit representing this inode (blkno) is set in the | ||
2285 | * suballocator. | ||
2286 | * | ||
2287 | * On success, 0 is returned and *res is 1 for SET; 0 otherwise. | ||
2288 | * | ||
2289 | * In the event of failure, a negative value is returned and *res is | ||
2290 | * meaningless. | ||
2291 | * | ||
2292 | * Callers must make sure to hold nfs_sync_lock to prevent | ||
2293 | * ocfs2_delete_inode() on another node from accessing the same | ||
2294 | * suballocator concurrently. | ||
2295 | */ | ||
2296 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | ||
2297 | { | ||
2298 | int status; | ||
2299 | u16 suballoc_bit = 0, suballoc_slot = 0; | ||
2300 | struct inode *inode_alloc_inode; | ||
2301 | struct buffer_head *alloc_bh = NULL; | ||
2302 | |||
2303 | mlog_entry("blkno: %llu", blkno); | ||
2304 | |||
2305 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, | ||
2306 | &suballoc_bit); | ||
2307 | if (status < 0) { | ||
2308 | mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); | ||
2309 | goto bail; | ||
2310 | } | ||
2311 | |||
2312 | inode_alloc_inode = | ||
2313 | ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, | ||
2314 | suballoc_slot); | ||
2315 | if (!inode_alloc_inode) { | ||
2316 | /* the error code could be inaccurate, but we are not able to | ||
2317 | * get the correct one. */ | ||
2318 | status = -EINVAL; | ||
2319 | mlog(ML_ERROR, "unable to get alloc inode in slot %u\n", | ||
2320 | (u32)suballoc_slot); | ||
2321 | goto bail; | ||
2322 | } | ||
2323 | |||
2324 | mutex_lock(&inode_alloc_inode->i_mutex); | ||
2325 | status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); | ||
2326 | if (status < 0) { | ||
2327 | mutex_unlock(&inode_alloc_inode->i_mutex); | ||
2328 | mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n", | ||
2329 | (u32)suballoc_slot, status); | ||
2330 | goto bail; | ||
2331 | } | ||
2332 | |||
2333 | status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, | ||
2334 | blkno, suballoc_bit, res); | ||
2335 | if (status < 0) | ||
2336 | mlog(ML_ERROR, "test suballoc bit failed %d\n", status); | ||
2337 | |||
2338 | ocfs2_inode_unlock(inode_alloc_inode, 0); | ||
2339 | mutex_unlock(&inode_alloc_inode->i_mutex); | ||
2340 | |||
2341 | iput(inode_alloc_inode); | ||
2342 | brelse(alloc_bh); | ||
2343 | bail: | ||
2344 | mlog_exit(status); | ||
2345 | return status; | ||
2346 | } | ||
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index e3c13c77f9e8..8c9a78a43164 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -88,6 +88,8 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb, | |||
88 | u64 *blkno_start); | 88 | u64 *blkno_start); |
89 | int ocfs2_claim_new_inode(struct ocfs2_super *osb, | 89 | int ocfs2_claim_new_inode(struct ocfs2_super *osb, |
90 | handle_t *handle, | 90 | handle_t *handle, |
91 | struct inode *dir, | ||
92 | struct buffer_head *parent_fe_bh, | ||
91 | struct ocfs2_alloc_context *ac, | 93 | struct ocfs2_alloc_context *ac, |
92 | u16 *suballoc_bit, | 94 | u16 *suballoc_bit, |
93 | u64 *fe_blkno); | 95 | u64 *fe_blkno); |
@@ -186,4 +188,6 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, | |||
186 | u32 clusters_to_add, u32 extents_to_split, | 188 | u32 clusters_to_add, u32 extents_to_split, |
187 | struct ocfs2_alloc_context **data_ac, | 189 | struct ocfs2_alloc_context **data_ac, |
188 | struct ocfs2_alloc_context **meta_ac); | 190 | struct ocfs2_alloc_context **meta_ac); |
191 | |||
192 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); | ||
189 | #endif /* _CHAINALLOC_H_ */ | 193 | #endif /* _CHAINALLOC_H_ */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7ac83a81ee55..79ff8d9d37e0 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -201,6 +201,170 @@ static const match_table_t tokens = { | |||
201 | {Opt_err, NULL} | 201 | {Opt_err, NULL} |
202 | }; | 202 | }; |
203 | 203 | ||
204 | #ifdef CONFIG_DEBUG_FS | ||
205 | static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | ||
206 | { | ||
207 | int out = 0; | ||
208 | int i; | ||
209 | struct ocfs2_cluster_connection *cconn = osb->cconn; | ||
210 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
211 | |||
212 | out += snprintf(buf + out, len - out, | ||
213 | "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", | ||
214 | "Device", osb->dev_str, osb->uuid_str, | ||
215 | osb->fs_generation, osb->vol_label); | ||
216 | |||
217 | out += snprintf(buf + out, len - out, | ||
218 | "%10s => State: %d Flags: 0x%lX\n", "Volume", | ||
219 | atomic_read(&osb->vol_state), osb->osb_flags); | ||
220 | |||
221 | out += snprintf(buf + out, len - out, | ||
222 | "%10s => Block: %lu Cluster: %d\n", "Sizes", | ||
223 | osb->sb->s_blocksize, osb->s_clustersize); | ||
224 | |||
225 | out += snprintf(buf + out, len - out, | ||
226 | "%10s => Compat: 0x%X Incompat: 0x%X " | ||
227 | "ROcompat: 0x%X\n", | ||
228 | "Features", osb->s_feature_compat, | ||
229 | osb->s_feature_incompat, osb->s_feature_ro_compat); | ||
230 | |||
231 | out += snprintf(buf + out, len - out, | ||
232 | "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", | ||
233 | osb->s_mount_opt, osb->s_atime_quantum); | ||
234 | |||
235 | out += snprintf(buf + out, len - out, | ||
236 | "%10s => Stack: %s Name: %*s Version: %d.%d\n", | ||
237 | "Cluster", | ||
238 | (*osb->osb_cluster_stack == '\0' ? | ||
239 | "o2cb" : osb->osb_cluster_stack), | ||
240 | cconn->cc_namelen, cconn->cc_name, | ||
241 | cconn->cc_version.pv_major, cconn->cc_version.pv_minor); | ||
242 | |||
243 | spin_lock(&osb->dc_task_lock); | ||
244 | out += snprintf(buf + out, len - out, | ||
245 | "%10s => Pid: %d Count: %lu WakeSeq: %lu " | ||
246 | "WorkSeq: %lu\n", "DownCnvt", | ||
247 | task_pid_nr(osb->dc_task), osb->blocked_lock_count, | ||
248 | osb->dc_wake_sequence, osb->dc_work_sequence); | ||
249 | spin_unlock(&osb->dc_task_lock); | ||
250 | |||
251 | spin_lock(&osb->osb_lock); | ||
252 | out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", | ||
253 | "Recovery", | ||
254 | (osb->recovery_thread_task ? | ||
255 | task_pid_nr(osb->recovery_thread_task) : -1)); | ||
256 | if (rm->rm_used == 0) | ||
257 | out += snprintf(buf + out, len - out, " None\n"); | ||
258 | else { | ||
259 | for (i = 0; i < rm->rm_used; i++) | ||
260 | out += snprintf(buf + out, len - out, " %d", | ||
261 | rm->rm_entries[i]); | ||
262 | out += snprintf(buf + out, len - out, "\n"); | ||
263 | } | ||
264 | spin_unlock(&osb->osb_lock); | ||
265 | |||
266 | out += snprintf(buf + out, len - out, | ||
267 | "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", | ||
268 | task_pid_nr(osb->commit_task), osb->osb_commit_interval, | ||
269 | atomic_read(&osb->needs_checkpoint)); | ||
270 | |||
271 | out += snprintf(buf + out, len - out, | ||
272 | "%10s => State: %d NumTxns: %d TxnId: %lu\n", | ||
273 | "Journal", osb->journal->j_state, | ||
274 | atomic_read(&osb->journal->j_num_trans), | ||
275 | osb->journal->j_trans_id); | ||
276 | |||
277 | out += snprintf(buf + out, len - out, | ||
278 | "%10s => GlobalAllocs: %d LocalAllocs: %d " | ||
279 | "SubAllocs: %d LAWinMoves: %d SAExtends: %d\n", | ||
280 | "Stats", | ||
281 | atomic_read(&osb->alloc_stats.bitmap_data), | ||
282 | atomic_read(&osb->alloc_stats.local_data), | ||
283 | atomic_read(&osb->alloc_stats.bg_allocs), | ||
284 | atomic_read(&osb->alloc_stats.moves), | ||
285 | atomic_read(&osb->alloc_stats.bg_extends)); | ||
286 | |||
287 | out += snprintf(buf + out, len - out, | ||
288 | "%10s => State: %u Descriptor: %llu Size: %u bits " | ||
289 | "Default: %u bits\n", | ||
290 | "LocalAlloc", osb->local_alloc_state, | ||
291 | (unsigned long long)osb->la_last_gd, | ||
292 | osb->local_alloc_bits, osb->local_alloc_default_bits); | ||
293 | |||
294 | spin_lock(&osb->osb_lock); | ||
295 | out += snprintf(buf + out, len - out, | ||
296 | "%10s => Slot: %d NumStolen: %d\n", "Steal", | ||
297 | osb->s_inode_steal_slot, | ||
298 | atomic_read(&osb->s_num_inodes_stolen)); | ||
299 | spin_unlock(&osb->osb_lock); | ||
300 | |||
301 | out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", | ||
302 | "Slots", "Num", "RecoGen"); | ||
303 | |||
304 | for (i = 0; i < osb->max_slots; ++i) { | ||
305 | out += snprintf(buf + out, len - out, | ||
306 | "%10s %c %3d %10d\n", | ||
307 | " ", | ||
308 | (i == osb->slot_num ? '*' : ' '), | ||
309 | i, osb->slot_recovery_generations[i]); | ||
310 | } | ||
311 | |||
312 | return out; | ||
313 | } | ||
314 | |||
315 | static int ocfs2_osb_debug_open(struct inode *inode, struct file *file) | ||
316 | { | ||
317 | struct ocfs2_super *osb = inode->i_private; | ||
318 | char *buf = NULL; | ||
319 | |||
320 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
321 | if (!buf) | ||
322 | goto bail; | ||
323 | |||
324 | i_size_write(inode, ocfs2_osb_dump(osb, buf, PAGE_SIZE)); | ||
325 | |||
326 | file->private_data = buf; | ||
327 | |||
328 | return 0; | ||
329 | bail: | ||
330 | return -ENOMEM; | ||
331 | } | ||
332 | |||
333 | static int ocfs2_debug_release(struct inode *inode, struct file *file) | ||
334 | { | ||
335 | kfree(file->private_data); | ||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, | ||
340 | size_t nbytes, loff_t *ppos) | ||
341 | { | ||
342 | return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, | ||
343 | i_size_read(file->f_mapping->host)); | ||
344 | } | ||
345 | #else | ||
346 | static int ocfs2_osb_debug_open(struct inode *inode, struct file *file) | ||
347 | { | ||
348 | return 0; | ||
349 | } | ||
350 | static int ocfs2_debug_release(struct inode *inode, struct file *file) | ||
351 | { | ||
352 | return 0; | ||
353 | } | ||
354 | static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, | ||
355 | size_t nbytes, loff_t *ppos) | ||
356 | { | ||
357 | return 0; | ||
358 | } | ||
359 | #endif /* CONFIG_DEBUG_FS */ | ||
360 | |||
361 | static struct file_operations ocfs2_osb_debug_fops = { | ||
362 | .open = ocfs2_osb_debug_open, | ||
363 | .release = ocfs2_debug_release, | ||
364 | .read = ocfs2_debug_read, | ||
365 | .llseek = generic_file_llseek, | ||
366 | }; | ||
367 | |||
204 | /* | 368 | /* |
205 | * write_super and sync_fs ripped right out of ext3. | 369 | * write_super and sync_fs ripped right out of ext3. |
206 | */ | 370 | */ |
@@ -926,6 +1090,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
926 | goto read_super_error; | 1090 | goto read_super_error; |
927 | } | 1091 | } |
928 | 1092 | ||
1093 | osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR, | ||
1094 | osb->osb_debug_root, | ||
1095 | osb, | ||
1096 | &ocfs2_osb_debug_fops); | ||
1097 | if (!osb->osb_ctxt) { | ||
1098 | status = -EINVAL; | ||
1099 | mlog_errno(status); | ||
1100 | goto read_super_error; | ||
1101 | } | ||
1102 | |||
929 | status = ocfs2_mount_volume(sb); | 1103 | status = ocfs2_mount_volume(sb); |
930 | if (osb->root_inode) | 1104 | if (osb->root_inode) |
931 | inode = igrab(osb->root_inode); | 1105 | inode = igrab(osb->root_inode); |
@@ -1620,6 +1794,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1620 | osb = OCFS2_SB(sb); | 1794 | osb = OCFS2_SB(sb); |
1621 | BUG_ON(!osb); | 1795 | BUG_ON(!osb); |
1622 | 1796 | ||
1797 | debugfs_remove(osb->osb_ctxt); | ||
1798 | |||
1623 | ocfs2_disable_quotas(osb); | 1799 | ocfs2_disable_quotas(osb); |
1624 | 1800 | ||
1625 | ocfs2_shutdown_local_alloc(osb); | 1801 | ocfs2_shutdown_local_alloc(osb); |
@@ -1742,6 +1918,12 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1742 | bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); | 1918 | bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); |
1743 | sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); | 1919 | sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); |
1744 | 1920 | ||
1921 | osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; | ||
1922 | |||
1923 | for (i = 0; i < 3; i++) | ||
1924 | osb->osb_dx_seed[i] = le32_to_cpu(di->id2.i_super.s_dx_seed[i]); | ||
1925 | osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash); | ||
1926 | |||
1745 | osb->sb = sb; | 1927 | osb->sb = sb; |
1746 | /* Save off for ocfs2_rw_direct */ | 1928 | /* Save off for ocfs2_rw_direct */ |
1747 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1929 | osb->s_sectsize_bits = blksize_bits(sector_size); |
@@ -2130,6 +2312,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) | |||
2130 | * lock, and it's marked as dirty, set the bit in the recover | 2312 | * lock, and it's marked as dirty, set the bit in the recover |
2131 | * map and launch a recovery thread for it. */ | 2313 | * map and launch a recovery thread for it. */ |
2132 | status = ocfs2_mark_dead_nodes(osb); | 2314 | status = ocfs2_mark_dead_nodes(osb); |
2315 | if (status < 0) { | ||
2316 | mlog_errno(status); | ||
2317 | goto finally; | ||
2318 | } | ||
2319 | |||
2320 | status = ocfs2_compute_replay_slots(osb); | ||
2133 | if (status < 0) | 2321 | if (status < 0) |
2134 | mlog_errno(status); | 2322 | mlog_errno(status); |
2135 | 2323 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2563df89fc2a..15631019dc63 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -512,7 +512,7 @@ int ocfs2_calc_xattr_init(struct inode *dir, | |||
512 | struct ocfs2_security_xattr_info *si, | 512 | struct ocfs2_security_xattr_info *si, |
513 | int *want_clusters, | 513 | int *want_clusters, |
514 | int *xattr_credits, | 514 | int *xattr_credits, |
515 | struct ocfs2_alloc_context **xattr_ac) | 515 | int *want_meta) |
516 | { | 516 | { |
517 | int ret = 0; | 517 | int ret = 0; |
518 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 518 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
@@ -554,11 +554,7 @@ int ocfs2_calc_xattr_init(struct inode *dir, | |||
554 | if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || | 554 | if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || |
555 | (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || | 555 | (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || |
556 | (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { | 556 | (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { |
557 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); | 557 | *want_meta = *want_meta + 1; |
558 | if (ret) { | ||
559 | mlog_errno(ret); | ||
560 | return ret; | ||
561 | } | ||
562 | *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; | 558 | *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; |
563 | } | 559 | } |
564 | 560 | ||
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 5a1ebc789f7e..1ca7e9a1b7bc 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h | |||
@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *, | |||
68 | int *, int *, struct ocfs2_alloc_context **); | 68 | int *, int *, struct ocfs2_alloc_context **); |
69 | int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *, | 69 | int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *, |
70 | int, struct ocfs2_security_xattr_info *, | 70 | int, struct ocfs2_security_xattr_info *, |
71 | int *, int *, struct ocfs2_alloc_context **); | 71 | int *, int *, int *); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * xattrs can live inside an inode, as part of an external xattr block, | 74 | * xattrs can live inside an inode, as part of an external xattr block, |
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 633e9dc972bb..379ae5fb4411 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -262,14 +262,19 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
262 | { | 262 | { |
263 | struct super_block *s = dentry->d_sb; | 263 | struct super_block *s = dentry->d_sb; |
264 | struct omfs_sb_info *sbi = OMFS_SB(s); | 264 | struct omfs_sb_info *sbi = OMFS_SB(s); |
265 | u64 id = huge_encode_dev(s->s_bdev->bd_dev); | ||
266 | |||
265 | buf->f_type = OMFS_MAGIC; | 267 | buf->f_type = OMFS_MAGIC; |
266 | buf->f_bsize = sbi->s_blocksize; | 268 | buf->f_bsize = sbi->s_blocksize; |
267 | buf->f_blocks = sbi->s_num_blocks; | 269 | buf->f_blocks = sbi->s_num_blocks; |
268 | buf->f_files = sbi->s_num_blocks; | 270 | buf->f_files = sbi->s_num_blocks; |
269 | buf->f_namelen = OMFS_NAMELEN; | 271 | buf->f_namelen = OMFS_NAMELEN; |
272 | buf->f_fsid.val[0] = (u32)id; | ||
273 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
270 | 274 | ||
271 | buf->f_bfree = buf->f_bavail = buf->f_ffree = | 275 | buf->f_bfree = buf->f_bavail = buf->f_ffree = |
272 | omfs_count_free(s); | 276 | omfs_count_free(s); |
277 | |||
273 | return 0; | 278 | return 0; |
274 | } | 279 | } |
275 | 280 | ||
@@ -421,7 +426,7 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) | |||
421 | 426 | ||
422 | sbi->s_uid = current_uid(); | 427 | sbi->s_uid = current_uid(); |
423 | sbi->s_gid = current_gid(); | 428 | sbi->s_gid = current_gid(); |
424 | sbi->s_dmask = sbi->s_fmask = current->fs->umask; | 429 | sbi->s_dmask = sbi->s_fmask = current_umask(); |
425 | 430 | ||
426 | if (!parse_options((char *) data, sbi)) | 431 | if (!parse_options((char *) data, sbi)) |
427 | goto end; | 432 | goto end; |
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/rcupdate.h> | 29 | #include <linux/rcupdate.h> |
30 | #include <linux/audit.h> | 30 | #include <linux/audit.h> |
31 | #include <linux/falloc.h> | 31 | #include <linux/falloc.h> |
32 | #include <linux/fs_struct.h> | ||
32 | 33 | ||
33 | int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 34 | int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
34 | { | 35 | { |
diff --git a/fs/proc/base.c b/fs/proc/base.c index e0afd326b688..f71559784bfb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/oom.h> | 80 | #include <linux/oom.h> |
81 | #include <linux/elf.h> | 81 | #include <linux/elf.h> |
82 | #include <linux/pid_namespace.h> | 82 | #include <linux/pid_namespace.h> |
83 | #include <linux/fs_struct.h> | ||
83 | #include "internal.h" | 84 | #include "internal.h" |
84 | 85 | ||
85 | /* NOTE: | 86 | /* NOTE: |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 43d23948384a..74ea974f5ca6 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -120,7 +120,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
120 | K(i.freeram-i.freehigh), | 120 | K(i.freeram-i.freehigh), |
121 | #endif | 121 | #endif |
122 | #ifndef CONFIG_MMU | 122 | #ifndef CONFIG_MMU |
123 | K((unsigned long) atomic_read(&mmap_pages_allocated)), | 123 | K((unsigned long) atomic_long_read(&mmap_pages_allocated)), |
124 | #endif | 124 | #endif |
125 | K(i.totalswap), | 125 | K(i.totalswap), |
126 | K(i.freeswap), | 126 | K(i.freeswap), |
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index b446d7ad0b0d..7e14d1a04001 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c | |||
@@ -76,7 +76,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) | |||
76 | 76 | ||
77 | /* | 77 | /* |
78 | * display a list of all the REGIONs the kernel knows about | 78 | * display a list of all the REGIONs the kernel knows about |
79 | * - nommu kernals have a single flat list | 79 | * - nommu kernels have a single flat list |
80 | */ | 80 | */ |
81 | static int nommu_region_list_show(struct seq_file *m, void *_p) | 81 | static int nommu_region_list_show(struct seq_file *m, void *_p) |
82 | { | 82 | { |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 343ea1216bc8..863464d5519c 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/mm.h> | 2 | #include <linux/mm.h> |
3 | #include <linux/file.h> | 3 | #include <linux/file.h> |
4 | #include <linux/fdtable.h> | 4 | #include <linux/fdtable.h> |
5 | #include <linux/fs_struct.h> | ||
5 | #include <linux/mount.h> | 6 | #include <linux/mount.h> |
6 | #include <linux/ptrace.h> | 7 | #include <linux/ptrace.h> |
7 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
@@ -49,7 +50,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
49 | else | 50 | else |
50 | bytes += kobjsize(mm); | 51 | bytes += kobjsize(mm); |
51 | 52 | ||
52 | if (current->fs && atomic_read(¤t->fs->count) > 1) | 53 | if (current->fs && current->fs->users > 1) |
53 | sbytes += kobjsize(current->fs); | 54 | sbytes += kobjsize(current->fs); |
54 | else | 55 | else |
55 | bytes += kobjsize(current->fs); | 56 | bytes += kobjsize(current->fs); |
@@ -136,14 +137,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
136 | } | 137 | } |
137 | 138 | ||
138 | seq_printf(m, | 139 | seq_printf(m, |
139 | "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 140 | "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
140 | vma->vm_start, | 141 | vma->vm_start, |
141 | vma->vm_end, | 142 | vma->vm_end, |
142 | flags & VM_READ ? 'r' : '-', | 143 | flags & VM_READ ? 'r' : '-', |
143 | flags & VM_WRITE ? 'w' : '-', | 144 | flags & VM_WRITE ? 'w' : '-', |
144 | flags & VM_EXEC ? 'x' : '-', | 145 | flags & VM_EXEC ? 'x' : '-', |
145 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', | 146 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', |
146 | vma->vm_pgoff << PAGE_SHIFT, | 147 | (unsigned long long) vma->vm_pgoff << PAGE_SHIFT, |
147 | MAJOR(dev), MINOR(dev), ino, &len); | 148 | MAJOR(dev), MINOR(dev), ino, &len); |
148 | 149 | ||
149 | if (file) { | 150 | if (file) { |
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 2aad1044b84c..fe1f0f31d11c 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -282,6 +282,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock ) | |||
282 | static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf) | 282 | static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf) |
283 | { | 283 | { |
284 | struct super_block *sb = dentry->d_sb; | 284 | struct super_block *sb = dentry->d_sb; |
285 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
285 | 286 | ||
286 | lock_kernel(); | 287 | lock_kernel(); |
287 | 288 | ||
@@ -291,6 +292,8 @@ static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
291 | buf->f_bfree = qnx4_count_free_blocks(sb); | 292 | buf->f_bfree = qnx4_count_free_blocks(sb); |
292 | buf->f_bavail = buf->f_bfree; | 293 | buf->f_bavail = buf->f_bfree; |
293 | buf->f_namelen = QNX4_NAME_MAX; | 294 | buf->f_namelen = QNX4_NAME_MAX; |
295 | buf->f_fsid.val[0] = (u32)id; | ||
296 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
294 | 297 | ||
295 | unlock_kernel(); | 298 | unlock_kernel(); |
296 | 299 | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 2ca967a5ef77..607c579e5eca 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -823,7 +823,7 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
823 | 823 | ||
824 | spin_lock(&inode_lock); | 824 | spin_lock(&inode_lock); |
825 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 825 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
826 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 826 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) |
827 | continue; | 827 | continue; |
828 | if (!atomic_read(&inode->i_writecount)) | 828 | if (!atomic_read(&inode->i_writecount)) |
829 | continue; | 829 | continue; |
diff --git a/fs/read_write.c b/fs/read_write.c index 400fe81c973e..9d1e76bb9ee1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -731,6 +731,62 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | |||
731 | return ret; | 731 | return ret; |
732 | } | 732 | } |
733 | 733 | ||
734 | static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) | ||
735 | { | ||
736 | #define HALF_LONG_BITS (BITS_PER_LONG / 2) | ||
737 | return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; | ||
738 | } | ||
739 | |||
740 | SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, | ||
741 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | ||
742 | { | ||
743 | loff_t pos = pos_from_hilo(pos_h, pos_l); | ||
744 | struct file *file; | ||
745 | ssize_t ret = -EBADF; | ||
746 | int fput_needed; | ||
747 | |||
748 | if (pos < 0) | ||
749 | return -EINVAL; | ||
750 | |||
751 | file = fget_light(fd, &fput_needed); | ||
752 | if (file) { | ||
753 | ret = -ESPIPE; | ||
754 | if (file->f_mode & FMODE_PREAD) | ||
755 | ret = vfs_readv(file, vec, vlen, &pos); | ||
756 | fput_light(file, fput_needed); | ||
757 | } | ||
758 | |||
759 | if (ret > 0) | ||
760 | add_rchar(current, ret); | ||
761 | inc_syscr(current); | ||
762 | return ret; | ||
763 | } | ||
764 | |||
765 | SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | ||
766 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | ||
767 | { | ||
768 | loff_t pos = pos_from_hilo(pos_h, pos_l); | ||
769 | struct file *file; | ||
770 | ssize_t ret = -EBADF; | ||
771 | int fput_needed; | ||
772 | |||
773 | if (pos < 0) | ||
774 | return -EINVAL; | ||
775 | |||
776 | file = fget_light(fd, &fput_needed); | ||
777 | if (file) { | ||
778 | ret = -ESPIPE; | ||
779 | if (file->f_mode & FMODE_PWRITE) | ||
780 | ret = vfs_writev(file, vec, vlen, &pos); | ||
781 | fput_light(file, fput_needed); | ||
782 | } | ||
783 | |||
784 | if (ret > 0) | ||
785 | add_wchar(current, ret); | ||
786 | inc_syscw(current); | ||
787 | return ret; | ||
788 | } | ||
789 | |||
734 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | 790 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, |
735 | size_t count, loff_t max) | 791 | size_t count, loff_t max) |
736 | { | 792 | { |
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig index 949b8c6addc8..513f431038f9 100644 --- a/fs/reiserfs/Kconfig +++ b/fs/reiserfs/Kconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | config REISERFS_FS | 1 | config REISERFS_FS |
2 | tristate "Reiserfs support" | 2 | tristate "Reiserfs support" |
3 | select CRC32 | ||
3 | help | 4 | help |
4 | Stores not just filenames but the files themselves in a balanced | 5 | Stores not just filenames but the files themselves in a balanced |
5 | tree. Uses journalling. | 6 | tree. Uses journalling. |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 972250c62896..0ae6486d9046 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/mnt_namespace.h> | 27 | #include <linux/mnt_namespace.h> |
28 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
29 | #include <linux/namei.h> | 29 | #include <linux/namei.h> |
30 | #include <linux/crc32.h> | ||
30 | 31 | ||
31 | struct file_system_type reiserfs_fs_type; | 32 | struct file_system_type reiserfs_fs_type; |
32 | 33 | ||
@@ -1904,6 +1905,10 @@ static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1904 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1905 | buf->f_bsize = dentry->d_sb->s_blocksize; |
1905 | /* changed to accommodate gcc folks. */ | 1906 | /* changed to accommodate gcc folks. */ |
1906 | buf->f_type = REISERFS_SUPER_MAGIC; | 1907 | buf->f_type = REISERFS_SUPER_MAGIC; |
1908 | buf->f_fsid.val[0] = (u32)crc32_le(0, rs->s_uuid, sizeof(rs->s_uuid)/2); | ||
1909 | buf->f_fsid.val[1] = (u32)crc32_le(0, rs->s_uuid + sizeof(rs->s_uuid)/2, | ||
1910 | sizeof(rs->s_uuid)/2); | ||
1911 | |||
1907 | return 0; | 1912 | return 0; |
1908 | } | 1913 | } |
1909 | 1914 | ||
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index d423416d93d1..c303c426fe2b 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -428,7 +428,7 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | |||
428 | } else { | 428 | } else { |
429 | apply_umask: | 429 | apply_umask: |
430 | /* no ACL, apply umask */ | 430 | /* no ACL, apply umask */ |
431 | inode->i_mode &= ~current->fs->umask; | 431 | inode->i_mode &= ~current_umask(); |
432 | } | 432 | } |
433 | 433 | ||
434 | return err; | 434 | return err; |
diff --git a/fs/splice.c b/fs/splice.c index 4ed0ba44a966..dd727d43e5b7 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -59,7 +59,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, | |||
59 | */ | 59 | */ |
60 | wait_on_page_writeback(page); | 60 | wait_on_page_writeback(page); |
61 | 61 | ||
62 | if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) | 62 | if (page_has_private(page) && |
63 | !try_to_release_page(page, GFP_KERNEL)) | ||
63 | goto out_unlock; | 64 | goto out_unlock; |
64 | 65 | ||
65 | /* | 66 | /* |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 681ec0d83799..ffa6edcd2d0c 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -301,6 +301,7 @@ failure: | |||
301 | static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 301 | static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
302 | { | 302 | { |
303 | struct squashfs_sb_info *msblk = dentry->d_sb->s_fs_info; | 303 | struct squashfs_sb_info *msblk = dentry->d_sb->s_fs_info; |
304 | u64 id = huge_encode_dev(dentry->d_sb->s_bdev->bd_dev); | ||
304 | 305 | ||
305 | TRACE("Entered squashfs_statfs\n"); | 306 | TRACE("Entered squashfs_statfs\n"); |
306 | 307 | ||
@@ -311,6 +312,8 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
311 | buf->f_files = msblk->inodes; | 312 | buf->f_files = msblk->inodes; |
312 | buf->f_ffree = 0; | 313 | buf->f_ffree = 0; |
313 | buf->f_namelen = SQUASHFS_NAME_LEN; | 314 | buf->f_namelen = SQUASHFS_NAME_LEN; |
315 | buf->f_fsid.val[0] = (u32)id; | ||
316 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
314 | 317 | ||
315 | return 0; | 318 | return 0; |
316 | } | 319 | } |
diff --git a/fs/super.c b/fs/super.c index 2ba481518ba7..77cb4ec919b9 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -287,6 +287,7 @@ int fsync_super(struct super_block *sb) | |||
287 | __fsync_super(sb); | 287 | __fsync_super(sb); |
288 | return sync_blockdev(sb->s_bdev); | 288 | return sync_blockdev(sb->s_bdev); |
289 | } | 289 | } |
290 | EXPORT_SYMBOL_GPL(fsync_super); | ||
290 | 291 | ||
291 | /** | 292 | /** |
292 | * generic_shutdown_super - common helper for ->kill_sb() | 293 | * generic_shutdown_super - common helper for ->kill_sb() |
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3d81bf58dae2..da20b48d350f 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -90,6 +90,7 @@ static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
90 | { | 90 | { |
91 | struct super_block *sb = dentry->d_sb; | 91 | struct super_block *sb = dentry->d_sb; |
92 | struct sysv_sb_info *sbi = SYSV_SB(sb); | 92 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
93 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
93 | 94 | ||
94 | buf->f_type = sb->s_magic; | 95 | buf->f_type = sb->s_magic; |
95 | buf->f_bsize = sb->s_blocksize; | 96 | buf->f_bsize = sb->s_blocksize; |
@@ -98,6 +99,8 @@ static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
98 | buf->f_files = sbi->s_ninodes; | 99 | buf->f_files = sbi->s_ninodes; |
99 | buf->f_ffree = sysv_count_free_inodes(sb); | 100 | buf->f_ffree = sysv_count_free_inodes(sb); |
100 | buf->f_namelen = SYSV_NAMELEN; | 101 | buf->f_namelen = SYSV_NAMELEN; |
102 | buf->f_fsid.val[0] = (u32)id; | ||
103 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
101 | return 0; | 104 | return 0; |
102 | } | 105 | } |
103 | 106 | ||
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index e35b54d5059d..830e3f76f442 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig | |||
@@ -22,7 +22,7 @@ config UBIFS_FS_ADVANCED_COMPR | |||
22 | depends on UBIFS_FS | 22 | depends on UBIFS_FS |
23 | help | 23 | help |
24 | This option allows to explicitly choose which compressions, if any, | 24 | This option allows to explicitly choose which compressions, if any, |
25 | are enabled in UBIFS. Removing compressors means inbility to read | 25 | are enabled in UBIFS. Removing compressors means inability to read |
26 | existing file systems. | 26 | existing file systems. |
27 | 27 | ||
28 | If unsure, say 'N'. | 28 | If unsure, say 'N'. |
@@ -32,7 +32,7 @@ config UBIFS_FS_LZO | |||
32 | depends on UBIFS_FS | 32 | depends on UBIFS_FS |
33 | default y | 33 | default y |
34 | help | 34 | help |
35 | LZO compressor is generally faster then zlib but compresses worse. | 35 | LZO compressor is generally faster than zlib but compresses worse. |
36 | Say 'Y' if unsure. | 36 | Say 'Y' if unsure. |
37 | 37 | ||
38 | config UBIFS_FS_ZLIB | 38 | config UBIFS_FS_ZLIB |
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 2bb788a2acb1..e48e9a3af763 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c | |||
@@ -87,12 +87,12 @@ static int read_block_bitmap(struct super_block *sb, | |||
87 | { | 87 | { |
88 | struct buffer_head *bh = NULL; | 88 | struct buffer_head *bh = NULL; |
89 | int retval = 0; | 89 | int retval = 0; |
90 | kernel_lb_addr loc; | 90 | struct kernel_lb_addr loc; |
91 | 91 | ||
92 | loc.logicalBlockNum = bitmap->s_extPosition; | 92 | loc.logicalBlockNum = bitmap->s_extPosition; |
93 | loc.partitionReferenceNum = UDF_SB(sb)->s_partition; | 93 | loc.partitionReferenceNum = UDF_SB(sb)->s_partition; |
94 | 94 | ||
95 | bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block)); | 95 | bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block)); |
96 | if (!bh) | 96 | if (!bh) |
97 | retval = -EIO; | 97 | retval = -EIO; |
98 | 98 | ||
@@ -140,27 +140,29 @@ static inline int load_block_bitmap(struct super_block *sb, | |||
140 | return slot; | 140 | return slot; |
141 | } | 141 | } |
142 | 142 | ||
143 | static bool udf_add_free_space(struct udf_sb_info *sbi, | 143 | static void udf_add_free_space(struct super_block *sb, u16 partition, u32 cnt) |
144 | u16 partition, u32 cnt) | ||
145 | { | 144 | { |
145 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
146 | struct logicalVolIntegrityDesc *lvid; | 146 | struct logicalVolIntegrityDesc *lvid; |
147 | 147 | ||
148 | if (sbi->s_lvid_bh == NULL) | 148 | if (!sbi->s_lvid_bh) |
149 | return false; | 149 | return; |
150 | 150 | ||
151 | lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; | 151 | lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; |
152 | le32_add_cpu(&lvid->freeSpaceTable[partition], cnt); | 152 | le32_add_cpu(&lvid->freeSpaceTable[partition], cnt); |
153 | return true; | 153 | udf_updated_lvid(sb); |
154 | } | 154 | } |
155 | 155 | ||
156 | static void udf_bitmap_free_blocks(struct super_block *sb, | 156 | static void udf_bitmap_free_blocks(struct super_block *sb, |
157 | struct inode *inode, | 157 | struct inode *inode, |
158 | struct udf_bitmap *bitmap, | 158 | struct udf_bitmap *bitmap, |
159 | kernel_lb_addr bloc, uint32_t offset, | 159 | struct kernel_lb_addr *bloc, |
160 | uint32_t offset, | ||
160 | uint32_t count) | 161 | uint32_t count) |
161 | { | 162 | { |
162 | struct udf_sb_info *sbi = UDF_SB(sb); | 163 | struct udf_sb_info *sbi = UDF_SB(sb); |
163 | struct buffer_head *bh = NULL; | 164 | struct buffer_head *bh = NULL; |
165 | struct udf_part_map *partmap; | ||
164 | unsigned long block; | 166 | unsigned long block; |
165 | unsigned long block_group; | 167 | unsigned long block_group; |
166 | unsigned long bit; | 168 | unsigned long bit; |
@@ -169,17 +171,17 @@ static void udf_bitmap_free_blocks(struct super_block *sb, | |||
169 | unsigned long overflow; | 171 | unsigned long overflow; |
170 | 172 | ||
171 | mutex_lock(&sbi->s_alloc_mutex); | 173 | mutex_lock(&sbi->s_alloc_mutex); |
172 | if (bloc.logicalBlockNum < 0 || | 174 | partmap = &sbi->s_partmaps[bloc->partitionReferenceNum]; |
173 | (bloc.logicalBlockNum + count) > | 175 | if (bloc->logicalBlockNum < 0 || |
174 | sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { | 176 | (bloc->logicalBlockNum + count) > |
177 | partmap->s_partition_len) { | ||
175 | udf_debug("%d < %d || %d + %d > %d\n", | 178 | udf_debug("%d < %d || %d + %d > %d\n", |
176 | bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, | 179 | bloc->logicalBlockNum, 0, bloc->logicalBlockNum, |
177 | sbi->s_partmaps[bloc.partitionReferenceNum]. | 180 | count, partmap->s_partition_len); |
178 | s_partition_len); | ||
179 | goto error_return; | 181 | goto error_return; |
180 | } | 182 | } |
181 | 183 | ||
182 | block = bloc.logicalBlockNum + offset + | 184 | block = bloc->logicalBlockNum + offset + |
183 | (sizeof(struct spaceBitmapDesc) << 3); | 185 | (sizeof(struct spaceBitmapDesc) << 3); |
184 | 186 | ||
185 | do { | 187 | do { |
@@ -207,7 +209,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb, | |||
207 | } else { | 209 | } else { |
208 | if (inode) | 210 | if (inode) |
209 | vfs_dq_free_block(inode, 1); | 211 | vfs_dq_free_block(inode, 1); |
210 | udf_add_free_space(sbi, sbi->s_partition, 1); | 212 | udf_add_free_space(sb, sbi->s_partition, 1); |
211 | } | 213 | } |
212 | } | 214 | } |
213 | mark_buffer_dirty(bh); | 215 | mark_buffer_dirty(bh); |
@@ -218,9 +220,6 @@ static void udf_bitmap_free_blocks(struct super_block *sb, | |||
218 | } while (overflow); | 220 | } while (overflow); |
219 | 221 | ||
220 | error_return: | 222 | error_return: |
221 | sb->s_dirt = 1; | ||
222 | if (sbi->s_lvid_bh) | ||
223 | mark_buffer_dirty(sbi->s_lvid_bh); | ||
224 | mutex_unlock(&sbi->s_alloc_mutex); | 223 | mutex_unlock(&sbi->s_alloc_mutex); |
225 | } | 224 | } |
226 | 225 | ||
@@ -277,9 +276,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb, | |||
277 | } while (block_count > 0); | 276 | } while (block_count > 0); |
278 | 277 | ||
279 | out: | 278 | out: |
280 | if (udf_add_free_space(sbi, partition, -alloc_count)) | 279 | udf_add_free_space(sb, partition, -alloc_count); |
281 | mark_buffer_dirty(sbi->s_lvid_bh); | ||
282 | sb->s_dirt = 1; | ||
283 | mutex_unlock(&sbi->s_alloc_mutex); | 280 | mutex_unlock(&sbi->s_alloc_mutex); |
284 | return alloc_count; | 281 | return alloc_count; |
285 | } | 282 | } |
@@ -409,9 +406,7 @@ got_block: | |||
409 | 406 | ||
410 | mark_buffer_dirty(bh); | 407 | mark_buffer_dirty(bh); |
411 | 408 | ||
412 | if (udf_add_free_space(sbi, partition, -1)) | 409 | udf_add_free_space(sb, partition, -1); |
413 | mark_buffer_dirty(sbi->s_lvid_bh); | ||
414 | sb->s_dirt = 1; | ||
415 | mutex_unlock(&sbi->s_alloc_mutex); | 410 | mutex_unlock(&sbi->s_alloc_mutex); |
416 | *err = 0; | 411 | *err = 0; |
417 | return newblock; | 412 | return newblock; |
@@ -425,26 +420,28 @@ error_return: | |||
425 | static void udf_table_free_blocks(struct super_block *sb, | 420 | static void udf_table_free_blocks(struct super_block *sb, |
426 | struct inode *inode, | 421 | struct inode *inode, |
427 | struct inode *table, | 422 | struct inode *table, |
428 | kernel_lb_addr bloc, uint32_t offset, | 423 | struct kernel_lb_addr *bloc, |
424 | uint32_t offset, | ||
429 | uint32_t count) | 425 | uint32_t count) |
430 | { | 426 | { |
431 | struct udf_sb_info *sbi = UDF_SB(sb); | 427 | struct udf_sb_info *sbi = UDF_SB(sb); |
428 | struct udf_part_map *partmap; | ||
432 | uint32_t start, end; | 429 | uint32_t start, end; |
433 | uint32_t elen; | 430 | uint32_t elen; |
434 | kernel_lb_addr eloc; | 431 | struct kernel_lb_addr eloc; |
435 | struct extent_position oepos, epos; | 432 | struct extent_position oepos, epos; |
436 | int8_t etype; | 433 | int8_t etype; |
437 | int i; | 434 | int i; |
438 | struct udf_inode_info *iinfo; | 435 | struct udf_inode_info *iinfo; |
439 | 436 | ||
440 | mutex_lock(&sbi->s_alloc_mutex); | 437 | mutex_lock(&sbi->s_alloc_mutex); |
441 | if (bloc.logicalBlockNum < 0 || | 438 | partmap = &sbi->s_partmaps[bloc->partitionReferenceNum]; |
442 | (bloc.logicalBlockNum + count) > | 439 | if (bloc->logicalBlockNum < 0 || |
443 | sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { | 440 | (bloc->logicalBlockNum + count) > |
441 | partmap->s_partition_len) { | ||
444 | udf_debug("%d < %d || %d + %d > %d\n", | 442 | udf_debug("%d < %d || %d + %d > %d\n", |
445 | bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, | 443 | bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, |
446 | sbi->s_partmaps[bloc.partitionReferenceNum]. | 444 | partmap->s_partition_len); |
447 | s_partition_len); | ||
448 | goto error_return; | 445 | goto error_return; |
449 | } | 446 | } |
450 | 447 | ||
@@ -453,11 +450,10 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
453 | could occure, but.. oh well */ | 450 | could occure, but.. oh well */ |
454 | if (inode) | 451 | if (inode) |
455 | vfs_dq_free_block(inode, count); | 452 | vfs_dq_free_block(inode, count); |
456 | if (udf_add_free_space(sbi, sbi->s_partition, count)) | 453 | udf_add_free_space(sb, sbi->s_partition, count); |
457 | mark_buffer_dirty(sbi->s_lvid_bh); | ||
458 | 454 | ||
459 | start = bloc.logicalBlockNum + offset; | 455 | start = bloc->logicalBlockNum + offset; |
460 | end = bloc.logicalBlockNum + offset + count - 1; | 456 | end = bloc->logicalBlockNum + offset + count - 1; |
461 | 457 | ||
462 | epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry); | 458 | epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry); |
463 | elen = 0; | 459 | elen = 0; |
@@ -483,7 +479,7 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
483 | start += count; | 479 | start += count; |
484 | count = 0; | 480 | count = 0; |
485 | } | 481 | } |
486 | udf_write_aext(table, &oepos, eloc, elen, 1); | 482 | udf_write_aext(table, &oepos, &eloc, elen, 1); |
487 | } else if (eloc.logicalBlockNum == (end + 1)) { | 483 | } else if (eloc.logicalBlockNum == (end + 1)) { |
488 | if ((0x3FFFFFFF - elen) < | 484 | if ((0x3FFFFFFF - elen) < |
489 | (count << sb->s_blocksize_bits)) { | 485 | (count << sb->s_blocksize_bits)) { |
@@ -502,7 +498,7 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
502 | end -= count; | 498 | end -= count; |
503 | count = 0; | 499 | count = 0; |
504 | } | 500 | } |
505 | udf_write_aext(table, &oepos, eloc, elen, 1); | 501 | udf_write_aext(table, &oepos, &eloc, elen, 1); |
506 | } | 502 | } |
507 | 503 | ||
508 | if (epos.bh != oepos.bh) { | 504 | if (epos.bh != oepos.bh) { |
@@ -532,8 +528,8 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
532 | */ | 528 | */ |
533 | 529 | ||
534 | int adsize; | 530 | int adsize; |
535 | short_ad *sad = NULL; | 531 | struct short_ad *sad = NULL; |
536 | long_ad *lad = NULL; | 532 | struct long_ad *lad = NULL; |
537 | struct allocExtDesc *aed; | 533 | struct allocExtDesc *aed; |
538 | 534 | ||
539 | eloc.logicalBlockNum = start; | 535 | eloc.logicalBlockNum = start; |
@@ -541,9 +537,9 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
541 | (count << sb->s_blocksize_bits); | 537 | (count << sb->s_blocksize_bits); |
542 | 538 | ||
543 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 539 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
544 | adsize = sizeof(short_ad); | 540 | adsize = sizeof(struct short_ad); |
545 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 541 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
546 | adsize = sizeof(long_ad); | 542 | adsize = sizeof(struct long_ad); |
547 | else { | 543 | else { |
548 | brelse(oepos.bh); | 544 | brelse(oepos.bh); |
549 | brelse(epos.bh); | 545 | brelse(epos.bh); |
@@ -563,7 +559,7 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
563 | elen -= sb->s_blocksize; | 559 | elen -= sb->s_blocksize; |
564 | 560 | ||
565 | epos.bh = udf_tread(sb, | 561 | epos.bh = udf_tread(sb, |
566 | udf_get_lb_pblock(sb, epos.block, 0)); | 562 | udf_get_lb_pblock(sb, &epos.block, 0)); |
567 | if (!epos.bh) { | 563 | if (!epos.bh) { |
568 | brelse(oepos.bh); | 564 | brelse(oepos.bh); |
569 | goto error_return; | 565 | goto error_return; |
@@ -601,15 +597,15 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
601 | if (sbi->s_udfrev >= 0x0200) | 597 | if (sbi->s_udfrev >= 0x0200) |
602 | udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, | 598 | udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, |
603 | 3, 1, epos.block.logicalBlockNum, | 599 | 3, 1, epos.block.logicalBlockNum, |
604 | sizeof(tag)); | 600 | sizeof(struct tag)); |
605 | else | 601 | else |
606 | udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, | 602 | udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, |
607 | 2, 1, epos.block.logicalBlockNum, | 603 | 2, 1, epos.block.logicalBlockNum, |
608 | sizeof(tag)); | 604 | sizeof(struct tag)); |
609 | 605 | ||
610 | switch (iinfo->i_alloc_type) { | 606 | switch (iinfo->i_alloc_type) { |
611 | case ICBTAG_FLAG_AD_SHORT: | 607 | case ICBTAG_FLAG_AD_SHORT: |
612 | sad = (short_ad *)sptr; | 608 | sad = (struct short_ad *)sptr; |
613 | sad->extLength = cpu_to_le32( | 609 | sad->extLength = cpu_to_le32( |
614 | EXT_NEXT_EXTENT_ALLOCDECS | | 610 | EXT_NEXT_EXTENT_ALLOCDECS | |
615 | sb->s_blocksize); | 611 | sb->s_blocksize); |
@@ -617,7 +613,7 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
617 | cpu_to_le32(epos.block.logicalBlockNum); | 613 | cpu_to_le32(epos.block.logicalBlockNum); |
618 | break; | 614 | break; |
619 | case ICBTAG_FLAG_AD_LONG: | 615 | case ICBTAG_FLAG_AD_LONG: |
620 | lad = (long_ad *)sptr; | 616 | lad = (struct long_ad *)sptr; |
621 | lad->extLength = cpu_to_le32( | 617 | lad->extLength = cpu_to_le32( |
622 | EXT_NEXT_EXTENT_ALLOCDECS | | 618 | EXT_NEXT_EXTENT_ALLOCDECS | |
623 | sb->s_blocksize); | 619 | sb->s_blocksize); |
@@ -635,7 +631,7 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
635 | 631 | ||
636 | /* It's possible that stealing the block emptied the extent */ | 632 | /* It's possible that stealing the block emptied the extent */ |
637 | if (elen) { | 633 | if (elen) { |
638 | udf_write_aext(table, &epos, eloc, elen, 1); | 634 | udf_write_aext(table, &epos, &eloc, elen, 1); |
639 | 635 | ||
640 | if (!epos.bh) { | 636 | if (!epos.bh) { |
641 | iinfo->i_lenAlloc += adsize; | 637 | iinfo->i_lenAlloc += adsize; |
@@ -653,7 +649,6 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
653 | brelse(oepos.bh); | 649 | brelse(oepos.bh); |
654 | 650 | ||
655 | error_return: | 651 | error_return: |
656 | sb->s_dirt = 1; | ||
657 | mutex_unlock(&sbi->s_alloc_mutex); | 652 | mutex_unlock(&sbi->s_alloc_mutex); |
658 | return; | 653 | return; |
659 | } | 654 | } |
@@ -666,7 +661,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, | |||
666 | struct udf_sb_info *sbi = UDF_SB(sb); | 661 | struct udf_sb_info *sbi = UDF_SB(sb); |
667 | int alloc_count = 0; | 662 | int alloc_count = 0; |
668 | uint32_t elen, adsize; | 663 | uint32_t elen, adsize; |
669 | kernel_lb_addr eloc; | 664 | struct kernel_lb_addr eloc; |
670 | struct extent_position epos; | 665 | struct extent_position epos; |
671 | int8_t etype = -1; | 666 | int8_t etype = -1; |
672 | struct udf_inode_info *iinfo; | 667 | struct udf_inode_info *iinfo; |
@@ -677,9 +672,9 @@ static int udf_table_prealloc_blocks(struct super_block *sb, | |||
677 | 672 | ||
678 | iinfo = UDF_I(table); | 673 | iinfo = UDF_I(table); |
679 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 674 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
680 | adsize = sizeof(short_ad); | 675 | adsize = sizeof(struct short_ad); |
681 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 676 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
682 | adsize = sizeof(long_ad); | 677 | adsize = sizeof(struct long_ad); |
683 | else | 678 | else |
684 | return 0; | 679 | return 0; |
685 | 680 | ||
@@ -707,7 +702,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, | |||
707 | alloc_count = block_count; | 702 | alloc_count = block_count; |
708 | eloc.logicalBlockNum += alloc_count; | 703 | eloc.logicalBlockNum += alloc_count; |
709 | elen -= (alloc_count << sb->s_blocksize_bits); | 704 | elen -= (alloc_count << sb->s_blocksize_bits); |
710 | udf_write_aext(table, &epos, eloc, | 705 | udf_write_aext(table, &epos, &eloc, |
711 | (etype << 30) | elen, 1); | 706 | (etype << 30) | elen, 1); |
712 | } else | 707 | } else |
713 | udf_delete_aext(table, epos, eloc, | 708 | udf_delete_aext(table, epos, eloc, |
@@ -718,10 +713,8 @@ static int udf_table_prealloc_blocks(struct super_block *sb, | |||
718 | 713 | ||
719 | brelse(epos.bh); | 714 | brelse(epos.bh); |
720 | 715 | ||
721 | if (alloc_count && udf_add_free_space(sbi, partition, -alloc_count)) { | 716 | if (alloc_count) |
722 | mark_buffer_dirty(sbi->s_lvid_bh); | 717 | udf_add_free_space(sb, partition, -alloc_count); |
723 | sb->s_dirt = 1; | ||
724 | } | ||
725 | mutex_unlock(&sbi->s_alloc_mutex); | 718 | mutex_unlock(&sbi->s_alloc_mutex); |
726 | return alloc_count; | 719 | return alloc_count; |
727 | } | 720 | } |
@@ -735,7 +728,7 @@ static int udf_table_new_block(struct super_block *sb, | |||
735 | uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF; | 728 | uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF; |
736 | uint32_t newblock = 0, adsize; | 729 | uint32_t newblock = 0, adsize; |
737 | uint32_t elen, goal_elen = 0; | 730 | uint32_t elen, goal_elen = 0; |
738 | kernel_lb_addr eloc, uninitialized_var(goal_eloc); | 731 | struct kernel_lb_addr eloc, uninitialized_var(goal_eloc); |
739 | struct extent_position epos, goal_epos; | 732 | struct extent_position epos, goal_epos; |
740 | int8_t etype; | 733 | int8_t etype; |
741 | struct udf_inode_info *iinfo = UDF_I(table); | 734 | struct udf_inode_info *iinfo = UDF_I(table); |
@@ -743,9 +736,9 @@ static int udf_table_new_block(struct super_block *sb, | |||
743 | *err = -ENOSPC; | 736 | *err = -ENOSPC; |
744 | 737 | ||
745 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 738 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
746 | adsize = sizeof(short_ad); | 739 | adsize = sizeof(struct short_ad); |
747 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 740 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
748 | adsize = sizeof(long_ad); | 741 | adsize = sizeof(struct long_ad); |
749 | else | 742 | else |
750 | return newblock; | 743 | return newblock; |
751 | 744 | ||
@@ -814,46 +807,37 @@ static int udf_table_new_block(struct super_block *sb, | |||
814 | } | 807 | } |
815 | 808 | ||
816 | if (goal_elen) | 809 | if (goal_elen) |
817 | udf_write_aext(table, &goal_epos, goal_eloc, goal_elen, 1); | 810 | udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1); |
818 | else | 811 | else |
819 | udf_delete_aext(table, goal_epos, goal_eloc, goal_elen); | 812 | udf_delete_aext(table, goal_epos, goal_eloc, goal_elen); |
820 | brelse(goal_epos.bh); | 813 | brelse(goal_epos.bh); |
821 | 814 | ||
822 | if (udf_add_free_space(sbi, partition, -1)) | 815 | udf_add_free_space(sb, partition, -1); |
823 | mark_buffer_dirty(sbi->s_lvid_bh); | ||
824 | 816 | ||
825 | sb->s_dirt = 1; | ||
826 | mutex_unlock(&sbi->s_alloc_mutex); | 817 | mutex_unlock(&sbi->s_alloc_mutex); |
827 | *err = 0; | 818 | *err = 0; |
828 | return newblock; | 819 | return newblock; |
829 | } | 820 | } |
830 | 821 | ||
831 | inline void udf_free_blocks(struct super_block *sb, | 822 | void udf_free_blocks(struct super_block *sb, struct inode *inode, |
832 | struct inode *inode, | 823 | struct kernel_lb_addr *bloc, uint32_t offset, |
833 | kernel_lb_addr bloc, uint32_t offset, | 824 | uint32_t count) |
834 | uint32_t count) | ||
835 | { | 825 | { |
836 | uint16_t partition = bloc.partitionReferenceNum; | 826 | uint16_t partition = bloc->partitionReferenceNum; |
837 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; | 827 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; |
838 | 828 | ||
839 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { | 829 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { |
840 | return udf_bitmap_free_blocks(sb, inode, | 830 | udf_bitmap_free_blocks(sb, inode, map->s_uspace.s_bitmap, |
841 | map->s_uspace.s_bitmap, | 831 | bloc, offset, count); |
842 | bloc, offset, count); | ||
843 | } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { | 832 | } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { |
844 | return udf_table_free_blocks(sb, inode, | 833 | udf_table_free_blocks(sb, inode, map->s_uspace.s_table, |
845 | map->s_uspace.s_table, | 834 | bloc, offset, count); |
846 | bloc, offset, count); | ||
847 | } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) { | 835 | } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) { |
848 | return udf_bitmap_free_blocks(sb, inode, | 836 | udf_bitmap_free_blocks(sb, inode, map->s_fspace.s_bitmap, |
849 | map->s_fspace.s_bitmap, | 837 | bloc, offset, count); |
850 | bloc, offset, count); | ||
851 | } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) { | 838 | } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) { |
852 | return udf_table_free_blocks(sb, inode, | 839 | udf_table_free_blocks(sb, inode, map->s_fspace.s_table, |
853 | map->s_fspace.s_table, | 840 | bloc, offset, count); |
854 | bloc, offset, count); | ||
855 | } else { | ||
856 | return; | ||
857 | } | 841 | } |
858 | } | 842 | } |
859 | 843 | ||
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 62dc270c69d1..2efd4d5291b6 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c | |||
@@ -51,7 +51,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
51 | uint8_t lfi; | 51 | uint8_t lfi; |
52 | loff_t size = udf_ext0_offset(dir) + dir->i_size; | 52 | loff_t size = udf_ext0_offset(dir) + dir->i_size; |
53 | struct buffer_head *tmp, *bha[16]; | 53 | struct buffer_head *tmp, *bha[16]; |
54 | kernel_lb_addr eloc; | 54 | struct kernel_lb_addr eloc; |
55 | uint32_t elen; | 55 | uint32_t elen; |
56 | sector_t offset; | 56 | sector_t offset; |
57 | int i, num, ret = 0; | 57 | int i, num, ret = 0; |
@@ -80,13 +80,13 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
80 | ret = -ENOENT; | 80 | ret = -ENOENT; |
81 | goto out; | 81 | goto out; |
82 | } | 82 | } |
83 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); | 83 | block = udf_get_lb_pblock(dir->i_sb, &eloc, offset); |
84 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { | 84 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { |
85 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 85 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
86 | epos.offset -= sizeof(short_ad); | 86 | epos.offset -= sizeof(struct short_ad); |
87 | else if (iinfo->i_alloc_type == | 87 | else if (iinfo->i_alloc_type == |
88 | ICBTAG_FLAG_AD_LONG) | 88 | ICBTAG_FLAG_AD_LONG) |
89 | epos.offset -= sizeof(long_ad); | 89 | epos.offset -= sizeof(struct long_ad); |
90 | } else { | 90 | } else { |
91 | offset = 0; | 91 | offset = 0; |
92 | } | 92 | } |
@@ -101,7 +101,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
101 | if (i + offset > (elen >> dir->i_sb->s_blocksize_bits)) | 101 | if (i + offset > (elen >> dir->i_sb->s_blocksize_bits)) |
102 | i = (elen >> dir->i_sb->s_blocksize_bits) - offset; | 102 | i = (elen >> dir->i_sb->s_blocksize_bits) - offset; |
103 | for (num = 0; i > 0; i--) { | 103 | for (num = 0; i > 0; i--) { |
104 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset + i); | 104 | block = udf_get_lb_pblock(dir->i_sb, &eloc, offset + i); |
105 | tmp = udf_tgetblk(dir->i_sb, block); | 105 | tmp = udf_tgetblk(dir->i_sb, block); |
106 | if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp)) | 106 | if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp)) |
107 | bha[num++] = tmp; | 107 | bha[num++] = tmp; |
@@ -161,9 +161,9 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
161 | memcpy(fname, "..", flen); | 161 | memcpy(fname, "..", flen); |
162 | dt_type = DT_DIR; | 162 | dt_type = DT_DIR; |
163 | } else { | 163 | } else { |
164 | kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation); | 164 | struct kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation); |
165 | 165 | ||
166 | iblock = udf_get_lb_pblock(dir->i_sb, tloc, 0); | 166 | iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0); |
167 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); | 167 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); |
168 | dt_type = DT_UNKNOWN; | 168 | dt_type = DT_UNKNOWN; |
169 | } | 169 | } |
diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 2820f8fcf4cc..1d2c570704c8 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c | |||
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | #if 0 | 21 | #if 0 |
22 | static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad, | 22 | static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad, |
23 | uint8_t ad_size, kernel_lb_addr fe_loc, | 23 | uint8_t ad_size, struct kernel_lb_addr fe_loc, |
24 | int *pos, int *offset, struct buffer_head **bh, | 24 | int *pos, int *offset, struct buffer_head **bh, |
25 | int *error) | 25 | int *error) |
26 | { | 26 | { |
@@ -75,7 +75,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, | |||
75 | struct udf_fileident_bh *fibh, | 75 | struct udf_fileident_bh *fibh, |
76 | struct fileIdentDesc *cfi, | 76 | struct fileIdentDesc *cfi, |
77 | struct extent_position *epos, | 77 | struct extent_position *epos, |
78 | kernel_lb_addr *eloc, uint32_t *elen, | 78 | struct kernel_lb_addr *eloc, uint32_t *elen, |
79 | sector_t *offset) | 79 | sector_t *offset) |
80 | { | 80 | { |
81 | struct fileIdentDesc *fi; | 81 | struct fileIdentDesc *fi; |
@@ -111,7 +111,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, | |||
111 | (EXT_RECORDED_ALLOCATED >> 30)) | 111 | (EXT_RECORDED_ALLOCATED >> 30)) |
112 | return NULL; | 112 | return NULL; |
113 | 113 | ||
114 | block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); | 114 | block = udf_get_lb_pblock(dir->i_sb, eloc, *offset); |
115 | 115 | ||
116 | (*offset)++; | 116 | (*offset)++; |
117 | 117 | ||
@@ -131,7 +131,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, | |||
131 | if (i + *offset > (*elen >> blocksize_bits)) | 131 | if (i + *offset > (*elen >> blocksize_bits)) |
132 | i = (*elen >> blocksize_bits)-*offset; | 132 | i = (*elen >> blocksize_bits)-*offset; |
133 | for (num = 0; i > 0; i--) { | 133 | for (num = 0; i > 0; i--) { |
134 | block = udf_get_lb_pblock(dir->i_sb, *eloc, | 134 | block = udf_get_lb_pblock(dir->i_sb, eloc, |
135 | *offset + i); | 135 | *offset + i); |
136 | tmp = udf_tgetblk(dir->i_sb, block); | 136 | tmp = udf_tgetblk(dir->i_sb, block); |
137 | if (tmp && !buffer_uptodate(tmp) && | 137 | if (tmp && !buffer_uptodate(tmp) && |
@@ -169,7 +169,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, | |||
169 | (EXT_RECORDED_ALLOCATED >> 30)) | 169 | (EXT_RECORDED_ALLOCATED >> 30)) |
170 | return NULL; | 170 | return NULL; |
171 | 171 | ||
172 | block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); | 172 | block = udf_get_lb_pblock(dir->i_sb, eloc, *offset); |
173 | 173 | ||
174 | (*offset)++; | 174 | (*offset)++; |
175 | 175 | ||
@@ -249,9 +249,9 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset) | |||
249 | } | 249 | } |
250 | 250 | ||
251 | #if 0 | 251 | #if 0 |
252 | static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset) | 252 | static struct extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset) |
253 | { | 253 | { |
254 | extent_ad *ext; | 254 | struct extent_ad *ext; |
255 | struct fileEntry *fe; | 255 | struct fileEntry *fe; |
256 | uint8_t *ptr; | 256 | uint8_t *ptr; |
257 | 257 | ||
@@ -274,54 +274,54 @@ static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset) | |||
274 | if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs))) | 274 | if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs))) |
275 | ptr += *offset; | 275 | ptr += *offset; |
276 | 276 | ||
277 | ext = (extent_ad *)ptr; | 277 | ext = (struct extent_ad *)ptr; |
278 | 278 | ||
279 | *offset = *offset + sizeof(extent_ad); | 279 | *offset = *offset + sizeof(struct extent_ad); |
280 | return ext; | 280 | return ext; |
281 | } | 281 | } |
282 | #endif | 282 | #endif |
283 | 283 | ||
284 | short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset, | 284 | struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset, |
285 | int inc) | 285 | int inc) |
286 | { | 286 | { |
287 | short_ad *sa; | 287 | struct short_ad *sa; |
288 | 288 | ||
289 | if ((!ptr) || (!offset)) { | 289 | if ((!ptr) || (!offset)) { |
290 | printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n"); | 290 | printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n"); |
291 | return NULL; | 291 | return NULL; |
292 | } | 292 | } |
293 | 293 | ||
294 | if ((*offset + sizeof(short_ad)) > maxoffset) | 294 | if ((*offset + sizeof(struct short_ad)) > maxoffset) |
295 | return NULL; | 295 | return NULL; |
296 | else { | 296 | else { |
297 | sa = (short_ad *)ptr; | 297 | sa = (struct short_ad *)ptr; |
298 | if (sa->extLength == 0) | 298 | if (sa->extLength == 0) |
299 | return NULL; | 299 | return NULL; |
300 | } | 300 | } |
301 | 301 | ||
302 | if (inc) | 302 | if (inc) |
303 | *offset += sizeof(short_ad); | 303 | *offset += sizeof(struct short_ad); |
304 | return sa; | 304 | return sa; |
305 | } | 305 | } |
306 | 306 | ||
307 | long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc) | 307 | struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc) |
308 | { | 308 | { |
309 | long_ad *la; | 309 | struct long_ad *la; |
310 | 310 | ||
311 | if ((!ptr) || (!offset)) { | 311 | if ((!ptr) || (!offset)) { |
312 | printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n"); | 312 | printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n"); |
313 | return NULL; | 313 | return NULL; |
314 | } | 314 | } |
315 | 315 | ||
316 | if ((*offset + sizeof(long_ad)) > maxoffset) | 316 | if ((*offset + sizeof(struct long_ad)) > maxoffset) |
317 | return NULL; | 317 | return NULL; |
318 | else { | 318 | else { |
319 | la = (long_ad *)ptr; | 319 | la = (struct long_ad *)ptr; |
320 | if (la->extLength == 0) | 320 | if (la->extLength == 0) |
321 | return NULL; | 321 | return NULL; |
322 | } | 322 | } |
323 | 323 | ||
324 | if (inc) | 324 | if (inc) |
325 | *offset += sizeof(long_ad); | 325 | *offset += sizeof(struct long_ad); |
326 | return la; | 326 | return la; |
327 | } | 327 | } |
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h index a0974df82b31..4792b771aa80 100644 --- a/fs/udf/ecma_167.h +++ b/fs/udf/ecma_167.h | |||
@@ -38,10 +38,10 @@ | |||
38 | #define _ECMA_167_H 1 | 38 | #define _ECMA_167_H 1 |
39 | 39 | ||
40 | /* Character set specification (ECMA 167r3 1/7.2.1) */ | 40 | /* Character set specification (ECMA 167r3 1/7.2.1) */ |
41 | typedef struct { | 41 | struct charspec { |
42 | uint8_t charSetType; | 42 | uint8_t charSetType; |
43 | uint8_t charSetInfo[63]; | 43 | uint8_t charSetInfo[63]; |
44 | } __attribute__ ((packed)) charspec; | 44 | } __attribute__ ((packed)); |
45 | 45 | ||
46 | /* Character Set Type (ECMA 167r3 1/7.2.1.1) */ | 46 | /* Character Set Type (ECMA 167r3 1/7.2.1.1) */ |
47 | #define CHARSPEC_TYPE_CS0 0x00 /* (1/7.2.2) */ | 47 | #define CHARSPEC_TYPE_CS0 0x00 /* (1/7.2.2) */ |
@@ -57,7 +57,7 @@ typedef struct { | |||
57 | typedef uint8_t dstring; | 57 | typedef uint8_t dstring; |
58 | 58 | ||
59 | /* Timestamp (ECMA 167r3 1/7.3) */ | 59 | /* Timestamp (ECMA 167r3 1/7.3) */ |
60 | typedef struct { | 60 | struct timestamp { |
61 | __le16 typeAndTimezone; | 61 | __le16 typeAndTimezone; |
62 | __le16 year; | 62 | __le16 year; |
63 | uint8_t month; | 63 | uint8_t month; |
@@ -68,7 +68,7 @@ typedef struct { | |||
68 | uint8_t centiseconds; | 68 | uint8_t centiseconds; |
69 | uint8_t hundredsOfMicroseconds; | 69 | uint8_t hundredsOfMicroseconds; |
70 | uint8_t microseconds; | 70 | uint8_t microseconds; |
71 | } __attribute__ ((packed)) timestamp; | 71 | } __attribute__ ((packed)); |
72 | 72 | ||
73 | /* Type and Time Zone (ECMA 167r3 1/7.3.1) */ | 73 | /* Type and Time Zone (ECMA 167r3 1/7.3.1) */ |
74 | #define TIMESTAMP_TYPE_MASK 0xF000 | 74 | #define TIMESTAMP_TYPE_MASK 0xF000 |
@@ -78,11 +78,11 @@ typedef struct { | |||
78 | #define TIMESTAMP_TIMEZONE_MASK 0x0FFF | 78 | #define TIMESTAMP_TIMEZONE_MASK 0x0FFF |
79 | 79 | ||
80 | /* Entity identifier (ECMA 167r3 1/7.4) */ | 80 | /* Entity identifier (ECMA 167r3 1/7.4) */ |
81 | typedef struct { | 81 | struct regid { |
82 | uint8_t flags; | 82 | uint8_t flags; |
83 | uint8_t ident[23]; | 83 | uint8_t ident[23]; |
84 | uint8_t identSuffix[8]; | 84 | uint8_t identSuffix[8]; |
85 | } __attribute__ ((packed)) regid; | 85 | } __attribute__ ((packed)); |
86 | 86 | ||
87 | /* Flags (ECMA 167r3 1/7.4.1) */ | 87 | /* Flags (ECMA 167r3 1/7.4.1) */ |
88 | #define ENTITYID_FLAGS_DIRTY 0x00 | 88 | #define ENTITYID_FLAGS_DIRTY 0x00 |
@@ -126,38 +126,38 @@ struct terminatingExtendedAreaDesc { | |||
126 | 126 | ||
127 | /* Boot Descriptor (ECMA 167r3 2/9.4) */ | 127 | /* Boot Descriptor (ECMA 167r3 2/9.4) */ |
128 | struct bootDesc { | 128 | struct bootDesc { |
129 | uint8_t structType; | 129 | uint8_t structType; |
130 | uint8_t stdIdent[VSD_STD_ID_LEN]; | 130 | uint8_t stdIdent[VSD_STD_ID_LEN]; |
131 | uint8_t structVersion; | 131 | uint8_t structVersion; |
132 | uint8_t reserved1; | 132 | uint8_t reserved1; |
133 | regid archType; | 133 | struct regid archType; |
134 | regid bootIdent; | 134 | struct regid bootIdent; |
135 | __le32 bootExtLocation; | 135 | __le32 bootExtLocation; |
136 | __le32 bootExtLength; | 136 | __le32 bootExtLength; |
137 | __le64 loadAddress; | 137 | __le64 loadAddress; |
138 | __le64 startAddress; | 138 | __le64 startAddress; |
139 | timestamp descCreationDateAndTime; | 139 | struct timestamp descCreationDateAndTime; |
140 | __le16 flags; | 140 | __le16 flags; |
141 | uint8_t reserved2[32]; | 141 | uint8_t reserved2[32]; |
142 | uint8_t bootUse[1906]; | 142 | uint8_t bootUse[1906]; |
143 | } __attribute__ ((packed)); | 143 | } __attribute__ ((packed)); |
144 | 144 | ||
145 | /* Flags (ECMA 167r3 2/9.4.12) */ | 145 | /* Flags (ECMA 167r3 2/9.4.12) */ |
146 | #define BOOT_FLAGS_ERASE 0x01 | 146 | #define BOOT_FLAGS_ERASE 0x01 |
147 | 147 | ||
148 | /* Extent Descriptor (ECMA 167r3 3/7.1) */ | 148 | /* Extent Descriptor (ECMA 167r3 3/7.1) */ |
149 | typedef struct { | 149 | struct extent_ad { |
150 | __le32 extLength; | 150 | __le32 extLength; |
151 | __le32 extLocation; | 151 | __le32 extLocation; |
152 | } __attribute__ ((packed)) extent_ad; | 152 | } __attribute__ ((packed)); |
153 | 153 | ||
154 | typedef struct { | 154 | struct kernel_extent_ad { |
155 | uint32_t extLength; | 155 | uint32_t extLength; |
156 | uint32_t extLocation; | 156 | uint32_t extLocation; |
157 | } kernel_extent_ad; | 157 | }; |
158 | 158 | ||
159 | /* Descriptor Tag (ECMA 167r3 3/7.2) */ | 159 | /* Descriptor Tag (ECMA 167r3 3/7.2) */ |
160 | typedef struct { | 160 | struct tag { |
161 | __le16 tagIdent; | 161 | __le16 tagIdent; |
162 | __le16 descVersion; | 162 | __le16 descVersion; |
163 | uint8_t tagChecksum; | 163 | uint8_t tagChecksum; |
@@ -166,7 +166,7 @@ typedef struct { | |||
166 | __le16 descCRC; | 166 | __le16 descCRC; |
167 | __le16 descCRCLength; | 167 | __le16 descCRCLength; |
168 | __le32 tagLocation; | 168 | __le32 tagLocation; |
169 | } __attribute__ ((packed)) tag; | 169 | } __attribute__ ((packed)); |
170 | 170 | ||
171 | /* Tag Identifier (ECMA 167r3 3/7.2.1) */ | 171 | /* Tag Identifier (ECMA 167r3 3/7.2.1) */ |
172 | #define TAG_IDENT_PVD 0x0001 | 172 | #define TAG_IDENT_PVD 0x0001 |
@@ -190,28 +190,28 @@ struct NSRDesc { | |||
190 | 190 | ||
191 | /* Primary Volume Descriptor (ECMA 167r3 3/10.1) */ | 191 | /* Primary Volume Descriptor (ECMA 167r3 3/10.1) */ |
192 | struct primaryVolDesc { | 192 | struct primaryVolDesc { |
193 | tag descTag; | 193 | struct tag descTag; |
194 | __le32 volDescSeqNum; | 194 | __le32 volDescSeqNum; |
195 | __le32 primaryVolDescNum; | 195 | __le32 primaryVolDescNum; |
196 | dstring volIdent[32]; | 196 | dstring volIdent[32]; |
197 | __le16 volSeqNum; | 197 | __le16 volSeqNum; |
198 | __le16 maxVolSeqNum; | 198 | __le16 maxVolSeqNum; |
199 | __le16 interchangeLvl; | 199 | __le16 interchangeLvl; |
200 | __le16 maxInterchangeLvl; | 200 | __le16 maxInterchangeLvl; |
201 | __le32 charSetList; | 201 | __le32 charSetList; |
202 | __le32 maxCharSetList; | 202 | __le32 maxCharSetList; |
203 | dstring volSetIdent[128]; | 203 | dstring volSetIdent[128]; |
204 | charspec descCharSet; | 204 | struct charspec descCharSet; |
205 | charspec explanatoryCharSet; | 205 | struct charspec explanatoryCharSet; |
206 | extent_ad volAbstract; | 206 | struct extent_ad volAbstract; |
207 | extent_ad volCopyright; | 207 | struct extent_ad volCopyright; |
208 | regid appIdent; | 208 | struct regid appIdent; |
209 | timestamp recordingDateAndTime; | 209 | struct timestamp recordingDateAndTime; |
210 | regid impIdent; | 210 | struct regid impIdent; |
211 | uint8_t impUse[64]; | 211 | uint8_t impUse[64]; |
212 | __le32 predecessorVolDescSeqLocation; | 212 | __le32 predecessorVolDescSeqLocation; |
213 | __le16 flags; | 213 | __le16 flags; |
214 | uint8_t reserved[22]; | 214 | uint8_t reserved[22]; |
215 | } __attribute__ ((packed)); | 215 | } __attribute__ ((packed)); |
216 | 216 | ||
217 | /* Flags (ECMA 167r3 3/10.1.21) */ | 217 | /* Flags (ECMA 167r3 3/10.1.21) */ |
@@ -219,40 +219,40 @@ struct primaryVolDesc { | |||
219 | 219 | ||
220 | /* Anchor Volume Descriptor Pointer (ECMA 167r3 3/10.2) */ | 220 | /* Anchor Volume Descriptor Pointer (ECMA 167r3 3/10.2) */ |
221 | struct anchorVolDescPtr { | 221 | struct anchorVolDescPtr { |
222 | tag descTag; | 222 | struct tag descTag; |
223 | extent_ad mainVolDescSeqExt; | 223 | struct extent_ad mainVolDescSeqExt; |
224 | extent_ad reserveVolDescSeqExt; | 224 | struct extent_ad reserveVolDescSeqExt; |
225 | uint8_t reserved[480]; | 225 | uint8_t reserved[480]; |
226 | } __attribute__ ((packed)); | 226 | } __attribute__ ((packed)); |
227 | 227 | ||
228 | /* Volume Descriptor Pointer (ECMA 167r3 3/10.3) */ | 228 | /* Volume Descriptor Pointer (ECMA 167r3 3/10.3) */ |
229 | struct volDescPtr { | 229 | struct volDescPtr { |
230 | tag descTag; | 230 | struct tag descTag; |
231 | __le32 volDescSeqNum; | 231 | __le32 volDescSeqNum; |
232 | extent_ad nextVolDescSeqExt; | 232 | struct extent_ad nextVolDescSeqExt; |
233 | uint8_t reserved[484]; | 233 | uint8_t reserved[484]; |
234 | } __attribute__ ((packed)); | 234 | } __attribute__ ((packed)); |
235 | 235 | ||
236 | /* Implementation Use Volume Descriptor (ECMA 167r3 3/10.4) */ | 236 | /* Implementation Use Volume Descriptor (ECMA 167r3 3/10.4) */ |
237 | struct impUseVolDesc { | 237 | struct impUseVolDesc { |
238 | tag descTag; | 238 | struct tag descTag; |
239 | __le32 volDescSeqNum; | 239 | __le32 volDescSeqNum; |
240 | regid impIdent; | 240 | struct regid impIdent; |
241 | uint8_t impUse[460]; | 241 | uint8_t impUse[460]; |
242 | } __attribute__ ((packed)); | 242 | } __attribute__ ((packed)); |
243 | 243 | ||
244 | /* Partition Descriptor (ECMA 167r3 3/10.5) */ | 244 | /* Partition Descriptor (ECMA 167r3 3/10.5) */ |
245 | struct partitionDesc { | 245 | struct partitionDesc { |
246 | tag descTag; | 246 | struct tag descTag; |
247 | __le32 volDescSeqNum; | 247 | __le32 volDescSeqNum; |
248 | __le16 partitionFlags; | 248 | __le16 partitionFlags; |
249 | __le16 partitionNumber; | 249 | __le16 partitionNumber; |
250 | regid partitionContents; | 250 | struct regid partitionContents; |
251 | uint8_t partitionContentsUse[128]; | 251 | uint8_t partitionContentsUse[128]; |
252 | __le32 accessType; | 252 | __le32 accessType; |
253 | __le32 partitionStartingLocation; | 253 | __le32 partitionStartingLocation; |
254 | __le32 partitionLength; | 254 | __le32 partitionLength; |
255 | regid impIdent; | 255 | struct regid impIdent; |
256 | uint8_t impUse[128]; | 256 | uint8_t impUse[128]; |
257 | uint8_t reserved[156]; | 257 | uint8_t reserved[156]; |
258 | } __attribute__ ((packed)); | 258 | } __attribute__ ((packed)); |
@@ -278,19 +278,19 @@ struct partitionDesc { | |||
278 | 278 | ||
279 | /* Logical Volume Descriptor (ECMA 167r3 3/10.6) */ | 279 | /* Logical Volume Descriptor (ECMA 167r3 3/10.6) */ |
280 | struct logicalVolDesc { | 280 | struct logicalVolDesc { |
281 | tag descTag; | 281 | struct tag descTag; |
282 | __le32 volDescSeqNum; | 282 | __le32 volDescSeqNum; |
283 | charspec descCharSet; | 283 | struct charspec descCharSet; |
284 | dstring logicalVolIdent[128]; | 284 | dstring logicalVolIdent[128]; |
285 | __le32 logicalBlockSize; | 285 | __le32 logicalBlockSize; |
286 | regid domainIdent; | 286 | struct regid domainIdent; |
287 | uint8_t logicalVolContentsUse[16]; | 287 | uint8_t logicalVolContentsUse[16]; |
288 | __le32 mapTableLength; | 288 | __le32 mapTableLength; |
289 | __le32 numPartitionMaps; | 289 | __le32 numPartitionMaps; |
290 | regid impIdent; | 290 | struct regid impIdent; |
291 | uint8_t impUse[128]; | 291 | uint8_t impUse[128]; |
292 | extent_ad integritySeqExt; | 292 | struct extent_ad integritySeqExt; |
293 | uint8_t partitionMaps[0]; | 293 | uint8_t partitionMaps[0]; |
294 | } __attribute__ ((packed)); | 294 | } __attribute__ ((packed)); |
295 | 295 | ||
296 | /* Generic Partition Map (ECMA 167r3 3/10.7.1) */ | 296 | /* Generic Partition Map (ECMA 167r3 3/10.7.1) */ |
@@ -322,30 +322,30 @@ struct genericPartitionMap2 { | |||
322 | 322 | ||
323 | /* Unallocated Space Descriptor (ECMA 167r3 3/10.8) */ | 323 | /* Unallocated Space Descriptor (ECMA 167r3 3/10.8) */ |
324 | struct unallocSpaceDesc { | 324 | struct unallocSpaceDesc { |
325 | tag descTag; | 325 | struct tag descTag; |
326 | __le32 volDescSeqNum; | 326 | __le32 volDescSeqNum; |
327 | __le32 numAllocDescs; | 327 | __le32 numAllocDescs; |
328 | extent_ad allocDescs[0]; | 328 | struct extent_ad allocDescs[0]; |
329 | } __attribute__ ((packed)); | 329 | } __attribute__ ((packed)); |
330 | 330 | ||
331 | /* Terminating Descriptor (ECMA 167r3 3/10.9) */ | 331 | /* Terminating Descriptor (ECMA 167r3 3/10.9) */ |
332 | struct terminatingDesc { | 332 | struct terminatingDesc { |
333 | tag descTag; | 333 | struct tag descTag; |
334 | uint8_t reserved[496]; | 334 | uint8_t reserved[496]; |
335 | } __attribute__ ((packed)); | 335 | } __attribute__ ((packed)); |
336 | 336 | ||
337 | /* Logical Volume Integrity Descriptor (ECMA 167r3 3/10.10) */ | 337 | /* Logical Volume Integrity Descriptor (ECMA 167r3 3/10.10) */ |
338 | struct logicalVolIntegrityDesc { | 338 | struct logicalVolIntegrityDesc { |
339 | tag descTag; | 339 | struct tag descTag; |
340 | timestamp recordingDateAndTime; | 340 | struct timestamp recordingDateAndTime; |
341 | __le32 integrityType; | 341 | __le32 integrityType; |
342 | extent_ad nextIntegrityExt; | 342 | struct extent_ad nextIntegrityExt; |
343 | uint8_t logicalVolContentsUse[32]; | 343 | uint8_t logicalVolContentsUse[32]; |
344 | __le32 numOfPartitions; | 344 | __le32 numOfPartitions; |
345 | __le32 lengthOfImpUse; | 345 | __le32 lengthOfImpUse; |
346 | __le32 freeSpaceTable[0]; | 346 | __le32 freeSpaceTable[0]; |
347 | __le32 sizeTable[0]; | 347 | __le32 sizeTable[0]; |
348 | uint8_t impUse[0]; | 348 | uint8_t impUse[0]; |
349 | } __attribute__ ((packed)); | 349 | } __attribute__ ((packed)); |
350 | 350 | ||
351 | /* Integrity Type (ECMA 167r3 3/10.10.3) */ | 351 | /* Integrity Type (ECMA 167r3 3/10.10.3) */ |
@@ -353,50 +353,50 @@ struct logicalVolIntegrityDesc { | |||
353 | #define LVID_INTEGRITY_TYPE_CLOSE 0x00000001 | 353 | #define LVID_INTEGRITY_TYPE_CLOSE 0x00000001 |
354 | 354 | ||
355 | /* Recorded Address (ECMA 167r3 4/7.1) */ | 355 | /* Recorded Address (ECMA 167r3 4/7.1) */ |
356 | typedef struct { | 356 | struct lb_addr { |
357 | __le32 logicalBlockNum; | 357 | __le32 logicalBlockNum; |
358 | __le16 partitionReferenceNum; | 358 | __le16 partitionReferenceNum; |
359 | } __attribute__ ((packed)) lb_addr; | 359 | } __attribute__ ((packed)); |
360 | 360 | ||
361 | /* ... and its in-core analog */ | 361 | /* ... and its in-core analog */ |
362 | typedef struct { | 362 | struct kernel_lb_addr { |
363 | uint32_t logicalBlockNum; | 363 | uint32_t logicalBlockNum; |
364 | uint16_t partitionReferenceNum; | 364 | uint16_t partitionReferenceNum; |
365 | } kernel_lb_addr; | 365 | }; |
366 | 366 | ||
367 | /* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */ | 367 | /* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */ |
368 | typedef struct { | 368 | struct short_ad { |
369 | __le32 extLength; | 369 | __le32 extLength; |
370 | __le32 extPosition; | 370 | __le32 extPosition; |
371 | } __attribute__ ((packed)) short_ad; | 371 | } __attribute__ ((packed)); |
372 | 372 | ||
373 | /* Long Allocation Descriptor (ECMA 167r3 4/14.14.2) */ | 373 | /* Long Allocation Descriptor (ECMA 167r3 4/14.14.2) */ |
374 | typedef struct { | 374 | struct long_ad { |
375 | __le32 extLength; | 375 | __le32 extLength; |
376 | lb_addr extLocation; | 376 | struct lb_addr extLocation; |
377 | uint8_t impUse[6]; | 377 | uint8_t impUse[6]; |
378 | } __attribute__ ((packed)) long_ad; | 378 | } __attribute__ ((packed)); |
379 | 379 | ||
380 | typedef struct { | 380 | struct kernel_long_ad { |
381 | uint32_t extLength; | 381 | uint32_t extLength; |
382 | kernel_lb_addr extLocation; | 382 | struct kernel_lb_addr extLocation; |
383 | uint8_t impUse[6]; | 383 | uint8_t impUse[6]; |
384 | } kernel_long_ad; | 384 | }; |
385 | 385 | ||
386 | /* Extended Allocation Descriptor (ECMA 167r3 4/14.14.3) */ | 386 | /* Extended Allocation Descriptor (ECMA 167r3 4/14.14.3) */ |
387 | typedef struct { | 387 | struct ext_ad { |
388 | __le32 extLength; | 388 | __le32 extLength; |
389 | __le32 recordedLength; | 389 | __le32 recordedLength; |
390 | __le32 informationLength; | 390 | __le32 informationLength; |
391 | lb_addr extLocation; | 391 | struct lb_addr extLocation; |
392 | } __attribute__ ((packed)) ext_ad; | 392 | } __attribute__ ((packed)); |
393 | 393 | ||
394 | typedef struct { | 394 | struct kernel_ext_ad { |
395 | uint32_t extLength; | 395 | uint32_t extLength; |
396 | uint32_t recordedLength; | 396 | uint32_t recordedLength; |
397 | uint32_t informationLength; | 397 | uint32_t informationLength; |
398 | kernel_lb_addr extLocation; | 398 | struct kernel_lb_addr extLocation; |
399 | } kernel_ext_ad; | 399 | }; |
400 | 400 | ||
401 | /* Descriptor Tag (ECMA 167r3 4/7.2 - See 3/7.2) */ | 401 | /* Descriptor Tag (ECMA 167r3 4/7.2 - See 3/7.2) */ |
402 | 402 | ||
@@ -415,44 +415,44 @@ typedef struct { | |||
415 | 415 | ||
416 | /* File Set Descriptor (ECMA 167r3 4/14.1) */ | 416 | /* File Set Descriptor (ECMA 167r3 4/14.1) */ |
417 | struct fileSetDesc { | 417 | struct fileSetDesc { |
418 | tag descTag; | 418 | struct tag descTag; |
419 | timestamp recordingDateAndTime; | 419 | struct timestamp recordingDateAndTime; |
420 | __le16 interchangeLvl; | 420 | __le16 interchangeLvl; |
421 | __le16 maxInterchangeLvl; | 421 | __le16 maxInterchangeLvl; |
422 | __le32 charSetList; | 422 | __le32 charSetList; |
423 | __le32 maxCharSetList; | 423 | __le32 maxCharSetList; |
424 | __le32 fileSetNum; | 424 | __le32 fileSetNum; |
425 | __le32 fileSetDescNum; | 425 | __le32 fileSetDescNum; |
426 | charspec logicalVolIdentCharSet; | 426 | struct charspec logicalVolIdentCharSet; |
427 | dstring logicalVolIdent[128]; | 427 | dstring logicalVolIdent[128]; |
428 | charspec fileSetCharSet; | 428 | struct charspec fileSetCharSet; |
429 | dstring fileSetIdent[32]; | 429 | dstring fileSetIdent[32]; |
430 | dstring copyrightFileIdent[32]; | 430 | dstring copyrightFileIdent[32]; |
431 | dstring abstractFileIdent[32]; | 431 | dstring abstractFileIdent[32]; |
432 | long_ad rootDirectoryICB; | 432 | struct long_ad rootDirectoryICB; |
433 | regid domainIdent; | 433 | struct regid domainIdent; |
434 | long_ad nextExt; | 434 | struct long_ad nextExt; |
435 | long_ad streamDirectoryICB; | 435 | struct long_ad streamDirectoryICB; |
436 | uint8_t reserved[32]; | 436 | uint8_t reserved[32]; |
437 | } __attribute__ ((packed)); | 437 | } __attribute__ ((packed)); |
438 | 438 | ||
439 | /* Partition Header Descriptor (ECMA 167r3 4/14.3) */ | 439 | /* Partition Header Descriptor (ECMA 167r3 4/14.3) */ |
440 | struct partitionHeaderDesc { | 440 | struct partitionHeaderDesc { |
441 | short_ad unallocSpaceTable; | 441 | struct short_ad unallocSpaceTable; |
442 | short_ad unallocSpaceBitmap; | 442 | struct short_ad unallocSpaceBitmap; |
443 | short_ad partitionIntegrityTable; | 443 | struct short_ad partitionIntegrityTable; |
444 | short_ad freedSpaceTable; | 444 | struct short_ad freedSpaceTable; |
445 | short_ad freedSpaceBitmap; | 445 | struct short_ad freedSpaceBitmap; |
446 | uint8_t reserved[88]; | 446 | uint8_t reserved[88]; |
447 | } __attribute__ ((packed)); | 447 | } __attribute__ ((packed)); |
448 | 448 | ||
449 | /* File Identifier Descriptor (ECMA 167r3 4/14.4) */ | 449 | /* File Identifier Descriptor (ECMA 167r3 4/14.4) */ |
450 | struct fileIdentDesc { | 450 | struct fileIdentDesc { |
451 | tag descTag; | 451 | struct tag descTag; |
452 | __le16 fileVersionNum; | 452 | __le16 fileVersionNum; |
453 | uint8_t fileCharacteristics; | 453 | uint8_t fileCharacteristics; |
454 | uint8_t lengthFileIdent; | 454 | uint8_t lengthFileIdent; |
455 | long_ad icb; | 455 | struct long_ad icb; |
456 | __le16 lengthOfImpUse; | 456 | __le16 lengthOfImpUse; |
457 | uint8_t impUse[0]; | 457 | uint8_t impUse[0]; |
458 | uint8_t fileIdent[0]; | 458 | uint8_t fileIdent[0]; |
@@ -468,22 +468,22 @@ struct fileIdentDesc { | |||
468 | 468 | ||
469 | /* Allocation Ext Descriptor (ECMA 167r3 4/14.5) */ | 469 | /* Allocation Ext Descriptor (ECMA 167r3 4/14.5) */ |
470 | struct allocExtDesc { | 470 | struct allocExtDesc { |
471 | tag descTag; | 471 | struct tag descTag; |
472 | __le32 previousAllocExtLocation; | 472 | __le32 previousAllocExtLocation; |
473 | __le32 lengthAllocDescs; | 473 | __le32 lengthAllocDescs; |
474 | } __attribute__ ((packed)); | 474 | } __attribute__ ((packed)); |
475 | 475 | ||
476 | /* ICB Tag (ECMA 167r3 4/14.6) */ | 476 | /* ICB Tag (ECMA 167r3 4/14.6) */ |
477 | typedef struct { | 477 | struct icbtag { |
478 | __le32 priorRecordedNumDirectEntries; | 478 | __le32 priorRecordedNumDirectEntries; |
479 | __le16 strategyType; | 479 | __le16 strategyType; |
480 | __le16 strategyParameter; | 480 | __le16 strategyParameter; |
481 | __le16 numEntries; | 481 | __le16 numEntries; |
482 | uint8_t reserved; | 482 | uint8_t reserved; |
483 | uint8_t fileType; | 483 | uint8_t fileType; |
484 | lb_addr parentICBLocation; | 484 | struct lb_addr parentICBLocation; |
485 | __le16 flags; | 485 | __le16 flags; |
486 | } __attribute__ ((packed)) icbtag; | 486 | } __attribute__ ((packed)); |
487 | 487 | ||
488 | /* Strategy Type (ECMA 167r3 4/14.6.2) */ | 488 | /* Strategy Type (ECMA 167r3 4/14.6.2) */ |
489 | #define ICBTAG_STRATEGY_TYPE_UNDEF 0x0000 | 489 | #define ICBTAG_STRATEGY_TYPE_UNDEF 0x0000 |
@@ -528,41 +528,41 @@ typedef struct { | |||
528 | 528 | ||
529 | /* Indirect Entry (ECMA 167r3 4/14.7) */ | 529 | /* Indirect Entry (ECMA 167r3 4/14.7) */ |
530 | struct indirectEntry { | 530 | struct indirectEntry { |
531 | tag descTag; | 531 | struct tag descTag; |
532 | icbtag icbTag; | 532 | struct icbtag icbTag; |
533 | long_ad indirectICB; | 533 | struct long_ad indirectICB; |
534 | } __attribute__ ((packed)); | 534 | } __attribute__ ((packed)); |
535 | 535 | ||
536 | /* Terminal Entry (ECMA 167r3 4/14.8) */ | 536 | /* Terminal Entry (ECMA 167r3 4/14.8) */ |
537 | struct terminalEntry { | 537 | struct terminalEntry { |
538 | tag descTag; | 538 | struct tag descTag; |
539 | icbtag icbTag; | 539 | struct icbtag icbTag; |
540 | } __attribute__ ((packed)); | 540 | } __attribute__ ((packed)); |
541 | 541 | ||
542 | /* File Entry (ECMA 167r3 4/14.9) */ | 542 | /* File Entry (ECMA 167r3 4/14.9) */ |
543 | struct fileEntry { | 543 | struct fileEntry { |
544 | tag descTag; | 544 | struct tag descTag; |
545 | icbtag icbTag; | 545 | struct icbtag icbTag; |
546 | __le32 uid; | 546 | __le32 uid; |
547 | __le32 gid; | 547 | __le32 gid; |
548 | __le32 permissions; | 548 | __le32 permissions; |
549 | __le16 fileLinkCount; | 549 | __le16 fileLinkCount; |
550 | uint8_t recordFormat; | 550 | uint8_t recordFormat; |
551 | uint8_t recordDisplayAttr; | 551 | uint8_t recordDisplayAttr; |
552 | __le32 recordLength; | 552 | __le32 recordLength; |
553 | __le64 informationLength; | 553 | __le64 informationLength; |
554 | __le64 logicalBlocksRecorded; | 554 | __le64 logicalBlocksRecorded; |
555 | timestamp accessTime; | 555 | struct timestamp accessTime; |
556 | timestamp modificationTime; | 556 | struct timestamp modificationTime; |
557 | timestamp attrTime; | 557 | struct timestamp attrTime; |
558 | __le32 checkpoint; | 558 | __le32 checkpoint; |
559 | long_ad extendedAttrICB; | 559 | struct long_ad extendedAttrICB; |
560 | regid impIdent; | 560 | struct regid impIdent; |
561 | __le64 uniqueID; | 561 | __le64 uniqueID; |
562 | __le32 lengthExtendedAttr; | 562 | __le32 lengthExtendedAttr; |
563 | __le32 lengthAllocDescs; | 563 | __le32 lengthAllocDescs; |
564 | uint8_t extendedAttr[0]; | 564 | uint8_t extendedAttr[0]; |
565 | uint8_t allocDescs[0]; | 565 | uint8_t allocDescs[0]; |
566 | } __attribute__ ((packed)); | 566 | } __attribute__ ((packed)); |
567 | 567 | ||
568 | /* Permissions (ECMA 167r3 4/14.9.5) */ | 568 | /* Permissions (ECMA 167r3 4/14.9.5) */ |
@@ -604,7 +604,7 @@ struct fileEntry { | |||
604 | 604 | ||
605 | /* Extended Attribute Header Descriptor (ECMA 167r3 4/14.10.1) */ | 605 | /* Extended Attribute Header Descriptor (ECMA 167r3 4/14.10.1) */ |
606 | struct extendedAttrHeaderDesc { | 606 | struct extendedAttrHeaderDesc { |
607 | tag descTag; | 607 | struct tag descTag; |
608 | __le32 impAttrLocation; | 608 | __le32 impAttrLocation; |
609 | __le32 appAttrLocation; | 609 | __le32 appAttrLocation; |
610 | } __attribute__ ((packed)); | 610 | } __attribute__ ((packed)); |
@@ -687,7 +687,7 @@ struct impUseExtAttr { | |||
687 | uint8_t reserved[3]; | 687 | uint8_t reserved[3]; |
688 | __le32 attrLength; | 688 | __le32 attrLength; |
689 | __le32 impUseLength; | 689 | __le32 impUseLength; |
690 | regid impIdent; | 690 | struct regid impIdent; |
691 | uint8_t impUse[0]; | 691 | uint8_t impUse[0]; |
692 | } __attribute__ ((packed)); | 692 | } __attribute__ ((packed)); |
693 | 693 | ||
@@ -698,7 +698,7 @@ struct appUseExtAttr { | |||
698 | uint8_t reserved[3]; | 698 | uint8_t reserved[3]; |
699 | __le32 attrLength; | 699 | __le32 attrLength; |
700 | __le32 appUseLength; | 700 | __le32 appUseLength; |
701 | regid appIdent; | 701 | struct regid appIdent; |
702 | uint8_t appUse[0]; | 702 | uint8_t appUse[0]; |
703 | } __attribute__ ((packed)); | 703 | } __attribute__ ((packed)); |
704 | 704 | ||
@@ -712,15 +712,15 @@ struct appUseExtAttr { | |||
712 | 712 | ||
713 | /* Unallocated Space Entry (ECMA 167r3 4/14.11) */ | 713 | /* Unallocated Space Entry (ECMA 167r3 4/14.11) */ |
714 | struct unallocSpaceEntry { | 714 | struct unallocSpaceEntry { |
715 | tag descTag; | 715 | struct tag descTag; |
716 | icbtag icbTag; | 716 | struct icbtag icbTag; |
717 | __le32 lengthAllocDescs; | 717 | __le32 lengthAllocDescs; |
718 | uint8_t allocDescs[0]; | 718 | uint8_t allocDescs[0]; |
719 | } __attribute__ ((packed)); | 719 | } __attribute__ ((packed)); |
720 | 720 | ||
721 | /* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */ | 721 | /* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */ |
722 | struct spaceBitmapDesc { | 722 | struct spaceBitmapDesc { |
723 | tag descTag; | 723 | struct tag descTag; |
724 | __le32 numOfBits; | 724 | __le32 numOfBits; |
725 | __le32 numOfBytes; | 725 | __le32 numOfBytes; |
726 | uint8_t bitmap[0]; | 726 | uint8_t bitmap[0]; |
@@ -728,13 +728,13 @@ struct spaceBitmapDesc { | |||
728 | 728 | ||
729 | /* Partition Integrity Entry (ECMA 167r3 4/14.13) */ | 729 | /* Partition Integrity Entry (ECMA 167r3 4/14.13) */ |
730 | struct partitionIntegrityEntry { | 730 | struct partitionIntegrityEntry { |
731 | tag descTag; | 731 | struct tag descTag; |
732 | icbtag icbTag; | 732 | struct icbtag icbTag; |
733 | timestamp recordingDateAndTime; | 733 | struct timestamp recordingDateAndTime; |
734 | uint8_t integrityType; | 734 | uint8_t integrityType; |
735 | uint8_t reserved[175]; | 735 | uint8_t reserved[175]; |
736 | regid impIdent; | 736 | struct regid impIdent; |
737 | uint8_t impUse[256]; | 737 | uint8_t impUse[256]; |
738 | } __attribute__ ((packed)); | 738 | } __attribute__ ((packed)); |
739 | 739 | ||
740 | /* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */ | 740 | /* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */ |
@@ -765,32 +765,32 @@ struct pathComponent { | |||
765 | 765 | ||
766 | /* File Entry (ECMA 167r3 4/14.17) */ | 766 | /* File Entry (ECMA 167r3 4/14.17) */ |
767 | struct extendedFileEntry { | 767 | struct extendedFileEntry { |
768 | tag descTag; | 768 | struct tag descTag; |
769 | icbtag icbTag; | 769 | struct icbtag icbTag; |
770 | __le32 uid; | 770 | __le32 uid; |
771 | __le32 gid; | 771 | __le32 gid; |
772 | __le32 permissions; | 772 | __le32 permissions; |
773 | __le16 fileLinkCount; | 773 | __le16 fileLinkCount; |
774 | uint8_t recordFormat; | 774 | uint8_t recordFormat; |
775 | uint8_t recordDisplayAttr; | 775 | uint8_t recordDisplayAttr; |
776 | __le32 recordLength; | 776 | __le32 recordLength; |
777 | __le64 informationLength; | 777 | __le64 informationLength; |
778 | __le64 objectSize; | 778 | __le64 objectSize; |
779 | __le64 logicalBlocksRecorded; | 779 | __le64 logicalBlocksRecorded; |
780 | timestamp accessTime; | 780 | struct timestamp accessTime; |
781 | timestamp modificationTime; | 781 | struct timestamp modificationTime; |
782 | timestamp createTime; | 782 | struct timestamp createTime; |
783 | timestamp attrTime; | 783 | struct timestamp attrTime; |
784 | __le32 checkpoint; | 784 | __le32 checkpoint; |
785 | __le32 reserved; | 785 | __le32 reserved; |
786 | long_ad extendedAttrICB; | 786 | struct long_ad extendedAttrICB; |
787 | long_ad streamDirectoryICB; | 787 | struct long_ad streamDirectoryICB; |
788 | regid impIdent; | 788 | struct regid impIdent; |
789 | __le64 uniqueID; | 789 | __le64 uniqueID; |
790 | __le32 lengthExtendedAttr; | 790 | __le32 lengthExtendedAttr; |
791 | __le32 lengthAllocDescs; | 791 | __le32 lengthAllocDescs; |
792 | uint8_t extendedAttr[0]; | 792 | uint8_t extendedAttr[0]; |
793 | uint8_t allocDescs[0]; | 793 | uint8_t allocDescs[0]; |
794 | } __attribute__ ((packed)); | 794 | } __attribute__ ((packed)); |
795 | 795 | ||
796 | #endif /* _ECMA_167_H */ | 796 | #endif /* _ECMA_167_H */ |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 47dbe5613f90..c10fa39f97e2 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -49,12 +49,11 @@ void udf_free_inode(struct inode *inode) | |||
49 | le32_add_cpu(&lvidiu->numDirs, -1); | 49 | le32_add_cpu(&lvidiu->numDirs, -1); |
50 | else | 50 | else |
51 | le32_add_cpu(&lvidiu->numFiles, -1); | 51 | le32_add_cpu(&lvidiu->numFiles, -1); |
52 | 52 | udf_updated_lvid(sb); | |
53 | mark_buffer_dirty(sbi->s_lvid_bh); | ||
54 | } | 53 | } |
55 | mutex_unlock(&sbi->s_alloc_mutex); | 54 | mutex_unlock(&sbi->s_alloc_mutex); |
56 | 55 | ||
57 | udf_free_blocks(sb, NULL, UDF_I(inode)->i_location, 0, 1); | 56 | udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); |
58 | } | 57 | } |
59 | 58 | ||
60 | struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | 59 | struct inode *udf_new_inode(struct inode *dir, int mode, int *err) |
@@ -122,7 +121,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
122 | if (!(++uniqueID & 0x00000000FFFFFFFFUL)) | 121 | if (!(++uniqueID & 0x00000000FFFFFFFFUL)) |
123 | uniqueID += 16; | 122 | uniqueID += 16; |
124 | lvhd->uniqueID = cpu_to_le64(uniqueID); | 123 | lvhd->uniqueID = cpu_to_le64(uniqueID); |
125 | mark_buffer_dirty(sbi->s_lvid_bh); | 124 | udf_updated_lvid(sb); |
126 | } | 125 | } |
127 | mutex_unlock(&sbi->s_alloc_mutex); | 126 | mutex_unlock(&sbi->s_alloc_mutex); |
128 | inode->i_mode = mode; | 127 | inode->i_mode = mode; |
@@ -138,7 +137,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
138 | iinfo->i_location.logicalBlockNum = block; | 137 | iinfo->i_location.logicalBlockNum = block; |
139 | iinfo->i_location.partitionReferenceNum = | 138 | iinfo->i_location.partitionReferenceNum = |
140 | dinfo->i_location.partitionReferenceNum; | 139 | dinfo->i_location.partitionReferenceNum; |
141 | inode->i_ino = udf_get_lb_pblock(sb, iinfo->i_location, 0); | 140 | inode->i_ino = udf_get_lb_pblock(sb, &iinfo->i_location, 0); |
142 | inode->i_blocks = 0; | 141 | inode->i_blocks = 0; |
143 | iinfo->i_lenEAttr = 0; | 142 | iinfo->i_lenEAttr = 0; |
144 | iinfo->i_lenAlloc = 0; | 143 | iinfo->i_lenAlloc = 0; |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 30ebde490f7f..e7533f785636 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -55,15 +55,15 @@ static int udf_alloc_i_data(struct inode *inode, size_t size); | |||
55 | static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, | 55 | static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, |
56 | sector_t *, int *); | 56 | sector_t *, int *); |
57 | static int8_t udf_insert_aext(struct inode *, struct extent_position, | 57 | static int8_t udf_insert_aext(struct inode *, struct extent_position, |
58 | kernel_lb_addr, uint32_t); | 58 | struct kernel_lb_addr, uint32_t); |
59 | static void udf_split_extents(struct inode *, int *, int, int, | 59 | static void udf_split_extents(struct inode *, int *, int, int, |
60 | kernel_long_ad[EXTENT_MERGE_SIZE], int *); | 60 | struct kernel_long_ad[EXTENT_MERGE_SIZE], int *); |
61 | static void udf_prealloc_extents(struct inode *, int, int, | 61 | static void udf_prealloc_extents(struct inode *, int, int, |
62 | kernel_long_ad[EXTENT_MERGE_SIZE], int *); | 62 | struct kernel_long_ad[EXTENT_MERGE_SIZE], int *); |
63 | static void udf_merge_extents(struct inode *, | 63 | static void udf_merge_extents(struct inode *, |
64 | kernel_long_ad[EXTENT_MERGE_SIZE], int *); | 64 | struct kernel_long_ad[EXTENT_MERGE_SIZE], int *); |
65 | static void udf_update_extents(struct inode *, | 65 | static void udf_update_extents(struct inode *, |
66 | kernel_long_ad[EXTENT_MERGE_SIZE], int, int, | 66 | struct kernel_long_ad[EXTENT_MERGE_SIZE], int, int, |
67 | struct extent_position *); | 67 | struct extent_position *); |
68 | static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); | 68 | static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); |
69 | 69 | ||
@@ -200,7 +200,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block, | |||
200 | { | 200 | { |
201 | int newblock; | 201 | int newblock; |
202 | struct buffer_head *dbh = NULL; | 202 | struct buffer_head *dbh = NULL; |
203 | kernel_lb_addr eloc; | 203 | struct kernel_lb_addr eloc; |
204 | uint32_t elen; | 204 | uint32_t elen; |
205 | uint8_t alloctype; | 205 | uint8_t alloctype; |
206 | struct extent_position epos; | 206 | struct extent_position epos; |
@@ -281,7 +281,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block, | |||
281 | epos.bh = NULL; | 281 | epos.bh = NULL; |
282 | epos.block = iinfo->i_location; | 282 | epos.block = iinfo->i_location; |
283 | epos.offset = udf_file_entry_alloc_offset(inode); | 283 | epos.offset = udf_file_entry_alloc_offset(inode); |
284 | udf_add_aext(inode, &epos, eloc, elen, 0); | 284 | udf_add_aext(inode, &epos, &eloc, elen, 0); |
285 | /* UniqueID stuff */ | 285 | /* UniqueID stuff */ |
286 | 286 | ||
287 | brelse(epos.bh); | 287 | brelse(epos.bh); |
@@ -359,12 +359,12 @@ static struct buffer_head *udf_getblk(struct inode *inode, long block, | |||
359 | 359 | ||
360 | /* Extend the file by 'blocks' blocks, return the number of extents added */ | 360 | /* Extend the file by 'blocks' blocks, return the number of extents added */ |
361 | int udf_extend_file(struct inode *inode, struct extent_position *last_pos, | 361 | int udf_extend_file(struct inode *inode, struct extent_position *last_pos, |
362 | kernel_long_ad *last_ext, sector_t blocks) | 362 | struct kernel_long_ad *last_ext, sector_t blocks) |
363 | { | 363 | { |
364 | sector_t add; | 364 | sector_t add; |
365 | int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); | 365 | int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); |
366 | struct super_block *sb = inode->i_sb; | 366 | struct super_block *sb = inode->i_sb; |
367 | kernel_lb_addr prealloc_loc = {}; | 367 | struct kernel_lb_addr prealloc_loc = {}; |
368 | int prealloc_len = 0; | 368 | int prealloc_len = 0; |
369 | struct udf_inode_info *iinfo; | 369 | struct udf_inode_info *iinfo; |
370 | 370 | ||
@@ -411,11 +411,11 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, | |||
411 | } | 411 | } |
412 | 412 | ||
413 | if (fake) { | 413 | if (fake) { |
414 | udf_add_aext(inode, last_pos, last_ext->extLocation, | 414 | udf_add_aext(inode, last_pos, &last_ext->extLocation, |
415 | last_ext->extLength, 1); | 415 | last_ext->extLength, 1); |
416 | count++; | 416 | count++; |
417 | } else | 417 | } else |
418 | udf_write_aext(inode, last_pos, last_ext->extLocation, | 418 | udf_write_aext(inode, last_pos, &last_ext->extLocation, |
419 | last_ext->extLength, 1); | 419 | last_ext->extLength, 1); |
420 | 420 | ||
421 | /* Managed to do everything necessary? */ | 421 | /* Managed to do everything necessary? */ |
@@ -432,7 +432,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, | |||
432 | /* Create enough extents to cover the whole hole */ | 432 | /* Create enough extents to cover the whole hole */ |
433 | while (blocks > add) { | 433 | while (blocks > add) { |
434 | blocks -= add; | 434 | blocks -= add; |
435 | if (udf_add_aext(inode, last_pos, last_ext->extLocation, | 435 | if (udf_add_aext(inode, last_pos, &last_ext->extLocation, |
436 | last_ext->extLength, 1) == -1) | 436 | last_ext->extLength, 1) == -1) |
437 | return -1; | 437 | return -1; |
438 | count++; | 438 | count++; |
@@ -440,7 +440,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, | |||
440 | if (blocks) { | 440 | if (blocks) { |
441 | last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | | 441 | last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | |
442 | (blocks << sb->s_blocksize_bits); | 442 | (blocks << sb->s_blocksize_bits); |
443 | if (udf_add_aext(inode, last_pos, last_ext->extLocation, | 443 | if (udf_add_aext(inode, last_pos, &last_ext->extLocation, |
444 | last_ext->extLength, 1) == -1) | 444 | last_ext->extLength, 1) == -1) |
445 | return -1; | 445 | return -1; |
446 | count++; | 446 | count++; |
@@ -449,7 +449,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, | |||
449 | out: | 449 | out: |
450 | /* Do we have some preallocated blocks saved? */ | 450 | /* Do we have some preallocated blocks saved? */ |
451 | if (prealloc_len) { | 451 | if (prealloc_len) { |
452 | if (udf_add_aext(inode, last_pos, prealloc_loc, | 452 | if (udf_add_aext(inode, last_pos, &prealloc_loc, |
453 | prealloc_len, 1) == -1) | 453 | prealloc_len, 1) == -1) |
454 | return -1; | 454 | return -1; |
455 | last_ext->extLocation = prealloc_loc; | 455 | last_ext->extLocation = prealloc_loc; |
@@ -459,9 +459,9 @@ out: | |||
459 | 459 | ||
460 | /* last_pos should point to the last written extent... */ | 460 | /* last_pos should point to the last written extent... */ |
461 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 461 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
462 | last_pos->offset -= sizeof(short_ad); | 462 | last_pos->offset -= sizeof(struct short_ad); |
463 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 463 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
464 | last_pos->offset -= sizeof(long_ad); | 464 | last_pos->offset -= sizeof(struct long_ad); |
465 | else | 465 | else |
466 | return -1; | 466 | return -1; |
467 | 467 | ||
@@ -473,11 +473,11 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
473 | { | 473 | { |
474 | static sector_t last_block; | 474 | static sector_t last_block; |
475 | struct buffer_head *result = NULL; | 475 | struct buffer_head *result = NULL; |
476 | kernel_long_ad laarr[EXTENT_MERGE_SIZE]; | 476 | struct kernel_long_ad laarr[EXTENT_MERGE_SIZE]; |
477 | struct extent_position prev_epos, cur_epos, next_epos; | 477 | struct extent_position prev_epos, cur_epos, next_epos; |
478 | int count = 0, startnum = 0, endnum = 0; | 478 | int count = 0, startnum = 0, endnum = 0; |
479 | uint32_t elen = 0, tmpelen; | 479 | uint32_t elen = 0, tmpelen; |
480 | kernel_lb_addr eloc, tmpeloc; | 480 | struct kernel_lb_addr eloc, tmpeloc; |
481 | int c = 1; | 481 | int c = 1; |
482 | loff_t lbcount = 0, b_off = 0; | 482 | loff_t lbcount = 0, b_off = 0; |
483 | uint32_t newblocknum, newblock; | 483 | uint32_t newblocknum, newblock; |
@@ -550,12 +550,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
550 | elen = EXT_RECORDED_ALLOCATED | | 550 | elen = EXT_RECORDED_ALLOCATED | |
551 | ((elen + inode->i_sb->s_blocksize - 1) & | 551 | ((elen + inode->i_sb->s_blocksize - 1) & |
552 | ~(inode->i_sb->s_blocksize - 1)); | 552 | ~(inode->i_sb->s_blocksize - 1)); |
553 | etype = udf_write_aext(inode, &cur_epos, eloc, elen, 1); | 553 | etype = udf_write_aext(inode, &cur_epos, &eloc, elen, 1); |
554 | } | 554 | } |
555 | brelse(prev_epos.bh); | 555 | brelse(prev_epos.bh); |
556 | brelse(cur_epos.bh); | 556 | brelse(cur_epos.bh); |
557 | brelse(next_epos.bh); | 557 | brelse(next_epos.bh); |
558 | newblock = udf_get_lb_pblock(inode->i_sb, eloc, offset); | 558 | newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset); |
559 | *phys = newblock; | 559 | *phys = newblock; |
560 | return NULL; | 560 | return NULL; |
561 | } | 561 | } |
@@ -572,7 +572,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
572 | } else { | 572 | } else { |
573 | /* Create a fake extent when there's not one */ | 573 | /* Create a fake extent when there's not one */ |
574 | memset(&laarr[0].extLocation, 0x00, | 574 | memset(&laarr[0].extLocation, 0x00, |
575 | sizeof(kernel_lb_addr)); | 575 | sizeof(struct kernel_lb_addr)); |
576 | laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; | 576 | laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; |
577 | /* Will udf_extend_file() create real extent from | 577 | /* Will udf_extend_file() create real extent from |
578 | a fake one? */ | 578 | a fake one? */ |
@@ -602,7 +602,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
602 | laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | | 602 | laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | |
603 | inode->i_sb->s_blocksize; | 603 | inode->i_sb->s_blocksize; |
604 | memset(&laarr[c].extLocation, 0x00, | 604 | memset(&laarr[c].extLocation, 0x00, |
605 | sizeof(kernel_lb_addr)); | 605 | sizeof(struct kernel_lb_addr)); |
606 | count++; | 606 | count++; |
607 | endnum++; | 607 | endnum++; |
608 | } | 608 | } |
@@ -699,7 +699,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
699 | 699 | ||
700 | static void udf_split_extents(struct inode *inode, int *c, int offset, | 700 | static void udf_split_extents(struct inode *inode, int *c, int offset, |
701 | int newblocknum, | 701 | int newblocknum, |
702 | kernel_long_ad laarr[EXTENT_MERGE_SIZE], | 702 | struct kernel_long_ad laarr[EXTENT_MERGE_SIZE], |
703 | int *endnum) | 703 | int *endnum) |
704 | { | 704 | { |
705 | unsigned long blocksize = inode->i_sb->s_blocksize; | 705 | unsigned long blocksize = inode->i_sb->s_blocksize; |
@@ -726,7 +726,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, | |||
726 | if (offset) { | 726 | if (offset) { |
727 | if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { | 727 | if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { |
728 | udf_free_blocks(inode->i_sb, inode, | 728 | udf_free_blocks(inode->i_sb, inode, |
729 | laarr[curr].extLocation, | 729 | &laarr[curr].extLocation, |
730 | 0, offset); | 730 | 0, offset); |
731 | laarr[curr].extLength = | 731 | laarr[curr].extLength = |
732 | EXT_NOT_RECORDED_NOT_ALLOCATED | | 732 | EXT_NOT_RECORDED_NOT_ALLOCATED | |
@@ -763,7 +763,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, | |||
763 | } | 763 | } |
764 | 764 | ||
765 | static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, | 765 | static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, |
766 | kernel_long_ad laarr[EXTENT_MERGE_SIZE], | 766 | struct kernel_long_ad laarr[EXTENT_MERGE_SIZE], |
767 | int *endnum) | 767 | int *endnum) |
768 | { | 768 | { |
769 | int start, length = 0, currlength = 0, i; | 769 | int start, length = 0, currlength = 0, i; |
@@ -817,7 +817,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, | |||
817 | inode->i_sb->s_blocksize_bits); | 817 | inode->i_sb->s_blocksize_bits); |
818 | else { | 818 | else { |
819 | memmove(&laarr[c + 2], &laarr[c + 1], | 819 | memmove(&laarr[c + 2], &laarr[c + 1], |
820 | sizeof(long_ad) * (*endnum - (c + 1))); | 820 | sizeof(struct long_ad) * (*endnum - (c + 1))); |
821 | (*endnum)++; | 821 | (*endnum)++; |
822 | laarr[c + 1].extLocation.logicalBlockNum = next; | 822 | laarr[c + 1].extLocation.logicalBlockNum = next; |
823 | laarr[c + 1].extLocation.partitionReferenceNum = | 823 | laarr[c + 1].extLocation.partitionReferenceNum = |
@@ -846,7 +846,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, | |||
846 | if (*endnum > (i + 1)) | 846 | if (*endnum > (i + 1)) |
847 | memmove(&laarr[i], | 847 | memmove(&laarr[i], |
848 | &laarr[i + 1], | 848 | &laarr[i + 1], |
849 | sizeof(long_ad) * | 849 | sizeof(struct long_ad) * |
850 | (*endnum - (i + 1))); | 850 | (*endnum - (i + 1))); |
851 | i--; | 851 | i--; |
852 | (*endnum)--; | 852 | (*endnum)--; |
@@ -859,7 +859,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, | |||
859 | } | 859 | } |
860 | 860 | ||
861 | static void udf_merge_extents(struct inode *inode, | 861 | static void udf_merge_extents(struct inode *inode, |
862 | kernel_long_ad laarr[EXTENT_MERGE_SIZE], | 862 | struct kernel_long_ad laarr[EXTENT_MERGE_SIZE], |
863 | int *endnum) | 863 | int *endnum) |
864 | { | 864 | { |
865 | int i; | 865 | int i; |
@@ -867,8 +867,8 @@ static void udf_merge_extents(struct inode *inode, | |||
867 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; | 867 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; |
868 | 868 | ||
869 | for (i = 0; i < (*endnum - 1); i++) { | 869 | for (i = 0; i < (*endnum - 1); i++) { |
870 | kernel_long_ad *li /*l[i]*/ = &laarr[i]; | 870 | struct kernel_long_ad *li /*l[i]*/ = &laarr[i]; |
871 | kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1]; | 871 | struct kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1]; |
872 | 872 | ||
873 | if (((li->extLength >> 30) == (lip1->extLength >> 30)) && | 873 | if (((li->extLength >> 30) == (lip1->extLength >> 30)) && |
874 | (((li->extLength >> 30) == | 874 | (((li->extLength >> 30) == |
@@ -902,7 +902,7 @@ static void udf_merge_extents(struct inode *inode, | |||
902 | blocksize - 1) & ~(blocksize - 1)); | 902 | blocksize - 1) & ~(blocksize - 1)); |
903 | if (*endnum > (i + 2)) | 903 | if (*endnum > (i + 2)) |
904 | memmove(&laarr[i + 1], &laarr[i + 2], | 904 | memmove(&laarr[i + 1], &laarr[i + 2], |
905 | sizeof(long_ad) * | 905 | sizeof(struct long_ad) * |
906 | (*endnum - (i + 2))); | 906 | (*endnum - (i + 2))); |
907 | i--; | 907 | i--; |
908 | (*endnum)--; | 908 | (*endnum)--; |
@@ -911,7 +911,7 @@ static void udf_merge_extents(struct inode *inode, | |||
911 | (EXT_NOT_RECORDED_ALLOCATED >> 30)) && | 911 | (EXT_NOT_RECORDED_ALLOCATED >> 30)) && |
912 | ((lip1->extLength >> 30) == | 912 | ((lip1->extLength >> 30) == |
913 | (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) { | 913 | (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) { |
914 | udf_free_blocks(inode->i_sb, inode, li->extLocation, 0, | 914 | udf_free_blocks(inode->i_sb, inode, &li->extLocation, 0, |
915 | ((li->extLength & | 915 | ((li->extLength & |
916 | UDF_EXTENT_LENGTH_MASK) + | 916 | UDF_EXTENT_LENGTH_MASK) + |
917 | blocksize - 1) >> blocksize_bits); | 917 | blocksize - 1) >> blocksize_bits); |
@@ -937,7 +937,7 @@ static void udf_merge_extents(struct inode *inode, | |||
937 | blocksize - 1) & ~(blocksize - 1)); | 937 | blocksize - 1) & ~(blocksize - 1)); |
938 | if (*endnum > (i + 2)) | 938 | if (*endnum > (i + 2)) |
939 | memmove(&laarr[i + 1], &laarr[i + 2], | 939 | memmove(&laarr[i + 1], &laarr[i + 2], |
940 | sizeof(long_ad) * | 940 | sizeof(struct long_ad) * |
941 | (*endnum - (i + 2))); | 941 | (*endnum - (i + 2))); |
942 | i--; | 942 | i--; |
943 | (*endnum)--; | 943 | (*endnum)--; |
@@ -945,7 +945,7 @@ static void udf_merge_extents(struct inode *inode, | |||
945 | } else if ((li->extLength >> 30) == | 945 | } else if ((li->extLength >> 30) == |
946 | (EXT_NOT_RECORDED_ALLOCATED >> 30)) { | 946 | (EXT_NOT_RECORDED_ALLOCATED >> 30)) { |
947 | udf_free_blocks(inode->i_sb, inode, | 947 | udf_free_blocks(inode->i_sb, inode, |
948 | li->extLocation, 0, | 948 | &li->extLocation, 0, |
949 | ((li->extLength & | 949 | ((li->extLength & |
950 | UDF_EXTENT_LENGTH_MASK) + | 950 | UDF_EXTENT_LENGTH_MASK) + |
951 | blocksize - 1) >> blocksize_bits); | 951 | blocksize - 1) >> blocksize_bits); |
@@ -959,12 +959,12 @@ static void udf_merge_extents(struct inode *inode, | |||
959 | } | 959 | } |
960 | 960 | ||
961 | static void udf_update_extents(struct inode *inode, | 961 | static void udf_update_extents(struct inode *inode, |
962 | kernel_long_ad laarr[EXTENT_MERGE_SIZE], | 962 | struct kernel_long_ad laarr[EXTENT_MERGE_SIZE], |
963 | int startnum, int endnum, | 963 | int startnum, int endnum, |
964 | struct extent_position *epos) | 964 | struct extent_position *epos) |
965 | { | 965 | { |
966 | int start = 0, i; | 966 | int start = 0, i; |
967 | kernel_lb_addr tmploc; | 967 | struct kernel_lb_addr tmploc; |
968 | uint32_t tmplen; | 968 | uint32_t tmplen; |
969 | 969 | ||
970 | if (startnum > endnum) { | 970 | if (startnum > endnum) { |
@@ -983,7 +983,7 @@ static void udf_update_extents(struct inode *inode, | |||
983 | 983 | ||
984 | for (i = start; i < endnum; i++) { | 984 | for (i = start; i < endnum; i++) { |
985 | udf_next_aext(inode, epos, &tmploc, &tmplen, 0); | 985 | udf_next_aext(inode, epos, &tmploc, &tmplen, 0); |
986 | udf_write_aext(inode, epos, laarr[i].extLocation, | 986 | udf_write_aext(inode, epos, &laarr[i].extLocation, |
987 | laarr[i].extLength, 1); | 987 | laarr[i].extLength, 1); |
988 | } | 988 | } |
989 | } | 989 | } |
@@ -1076,7 +1076,7 @@ static void __udf_read_inode(struct inode *inode) | |||
1076 | * i_nlink = 1 | 1076 | * i_nlink = 1 |
1077 | * i_op = NULL; | 1077 | * i_op = NULL; |
1078 | */ | 1078 | */ |
1079 | bh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 0, &ident); | 1079 | bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); |
1080 | if (!bh) { | 1080 | if (!bh) { |
1081 | printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", | 1081 | printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", |
1082 | inode->i_ino); | 1082 | inode->i_ino); |
@@ -1098,24 +1098,24 @@ static void __udf_read_inode(struct inode *inode) | |||
1098 | if (fe->icbTag.strategyType == cpu_to_le16(4096)) { | 1098 | if (fe->icbTag.strategyType == cpu_to_le16(4096)) { |
1099 | struct buffer_head *ibh; | 1099 | struct buffer_head *ibh; |
1100 | 1100 | ||
1101 | ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1, | 1101 | ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1, |
1102 | &ident); | 1102 | &ident); |
1103 | if (ident == TAG_IDENT_IE && ibh) { | 1103 | if (ident == TAG_IDENT_IE && ibh) { |
1104 | struct buffer_head *nbh = NULL; | 1104 | struct buffer_head *nbh = NULL; |
1105 | kernel_lb_addr loc; | 1105 | struct kernel_lb_addr loc; |
1106 | struct indirectEntry *ie; | 1106 | struct indirectEntry *ie; |
1107 | 1107 | ||
1108 | ie = (struct indirectEntry *)ibh->b_data; | 1108 | ie = (struct indirectEntry *)ibh->b_data; |
1109 | loc = lelb_to_cpu(ie->indirectICB.extLocation); | 1109 | loc = lelb_to_cpu(ie->indirectICB.extLocation); |
1110 | 1110 | ||
1111 | if (ie->indirectICB.extLength && | 1111 | if (ie->indirectICB.extLength && |
1112 | (nbh = udf_read_ptagged(inode->i_sb, loc, 0, | 1112 | (nbh = udf_read_ptagged(inode->i_sb, &loc, 0, |
1113 | &ident))) { | 1113 | &ident))) { |
1114 | if (ident == TAG_IDENT_FE || | 1114 | if (ident == TAG_IDENT_FE || |
1115 | ident == TAG_IDENT_EFE) { | 1115 | ident == TAG_IDENT_EFE) { |
1116 | memcpy(&iinfo->i_location, | 1116 | memcpy(&iinfo->i_location, |
1117 | &loc, | 1117 | &loc, |
1118 | sizeof(kernel_lb_addr)); | 1118 | sizeof(struct kernel_lb_addr)); |
1119 | brelse(bh); | 1119 | brelse(bh); |
1120 | brelse(ibh); | 1120 | brelse(ibh); |
1121 | brelse(nbh); | 1121 | brelse(nbh); |
@@ -1222,8 +1222,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1222 | inode->i_size = le64_to_cpu(fe->informationLength); | 1222 | inode->i_size = le64_to_cpu(fe->informationLength); |
1223 | iinfo->i_lenExtents = inode->i_size; | 1223 | iinfo->i_lenExtents = inode->i_size; |
1224 | 1224 | ||
1225 | inode->i_mode = udf_convert_permissions(fe); | 1225 | if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY && |
1226 | inode->i_mode &= ~UDF_SB(inode->i_sb)->s_umask; | 1226 | sbi->s_fmode != UDF_INVALID_MODE) |
1227 | inode->i_mode = sbi->s_fmode; | ||
1228 | else if (fe->icbTag.fileType == ICBTAG_FILE_TYPE_DIRECTORY && | ||
1229 | sbi->s_dmode != UDF_INVALID_MODE) | ||
1230 | inode->i_mode = sbi->s_dmode; | ||
1231 | else | ||
1232 | inode->i_mode = udf_convert_permissions(fe); | ||
1233 | inode->i_mode &= ~sbi->s_umask; | ||
1227 | 1234 | ||
1228 | if (iinfo->i_efe == 0) { | 1235 | if (iinfo->i_efe == 0) { |
1229 | inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << | 1236 | inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << |
@@ -1396,7 +1403,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1396 | 1403 | ||
1397 | bh = udf_tread(inode->i_sb, | 1404 | bh = udf_tread(inode->i_sb, |
1398 | udf_get_lb_pblock(inode->i_sb, | 1405 | udf_get_lb_pblock(inode->i_sb, |
1399 | iinfo->i_location, 0)); | 1406 | &iinfo->i_location, 0)); |
1400 | if (!bh) { | 1407 | if (!bh) { |
1401 | udf_debug("bread failure\n"); | 1408 | udf_debug("bread failure\n"); |
1402 | return -EIO; | 1409 | return -EIO; |
@@ -1416,13 +1423,13 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1416 | iinfo->i_ext.i_data, inode->i_sb->s_blocksize - | 1423 | iinfo->i_ext.i_data, inode->i_sb->s_blocksize - |
1417 | sizeof(struct unallocSpaceEntry)); | 1424 | sizeof(struct unallocSpaceEntry)); |
1418 | crclen = sizeof(struct unallocSpaceEntry) + | 1425 | crclen = sizeof(struct unallocSpaceEntry) + |
1419 | iinfo->i_lenAlloc - sizeof(tag); | 1426 | iinfo->i_lenAlloc - sizeof(struct tag); |
1420 | use->descTag.tagLocation = cpu_to_le32( | 1427 | use->descTag.tagLocation = cpu_to_le32( |
1421 | iinfo->i_location. | 1428 | iinfo->i_location. |
1422 | logicalBlockNum); | 1429 | logicalBlockNum); |
1423 | use->descTag.descCRCLength = cpu_to_le16(crclen); | 1430 | use->descTag.descCRCLength = cpu_to_le16(crclen); |
1424 | use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use + | 1431 | use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use + |
1425 | sizeof(tag), | 1432 | sizeof(struct tag), |
1426 | crclen)); | 1433 | crclen)); |
1427 | use->descTag.tagChecksum = udf_tag_checksum(&use->descTag); | 1434 | use->descTag.tagChecksum = udf_tag_checksum(&use->descTag); |
1428 | 1435 | ||
@@ -1459,23 +1466,23 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1459 | fe->informationLength = cpu_to_le64(inode->i_size); | 1466 | fe->informationLength = cpu_to_le64(inode->i_size); |
1460 | 1467 | ||
1461 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 1468 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
1462 | regid *eid; | 1469 | struct regid *eid; |
1463 | struct deviceSpec *dsea = | 1470 | struct deviceSpec *dsea = |
1464 | (struct deviceSpec *)udf_get_extendedattr(inode, 12, 1); | 1471 | (struct deviceSpec *)udf_get_extendedattr(inode, 12, 1); |
1465 | if (!dsea) { | 1472 | if (!dsea) { |
1466 | dsea = (struct deviceSpec *) | 1473 | dsea = (struct deviceSpec *) |
1467 | udf_add_extendedattr(inode, | 1474 | udf_add_extendedattr(inode, |
1468 | sizeof(struct deviceSpec) + | 1475 | sizeof(struct deviceSpec) + |
1469 | sizeof(regid), 12, 0x3); | 1476 | sizeof(struct regid), 12, 0x3); |
1470 | dsea->attrType = cpu_to_le32(12); | 1477 | dsea->attrType = cpu_to_le32(12); |
1471 | dsea->attrSubtype = 1; | 1478 | dsea->attrSubtype = 1; |
1472 | dsea->attrLength = cpu_to_le32( | 1479 | dsea->attrLength = cpu_to_le32( |
1473 | sizeof(struct deviceSpec) + | 1480 | sizeof(struct deviceSpec) + |
1474 | sizeof(regid)); | 1481 | sizeof(struct regid)); |
1475 | dsea->impUseLength = cpu_to_le32(sizeof(regid)); | 1482 | dsea->impUseLength = cpu_to_le32(sizeof(struct regid)); |
1476 | } | 1483 | } |
1477 | eid = (regid *)dsea->impUse; | 1484 | eid = (struct regid *)dsea->impUse; |
1478 | memset(eid, 0, sizeof(regid)); | 1485 | memset(eid, 0, sizeof(struct regid)); |
1479 | strcpy(eid->ident, UDF_ID_DEVELOPER); | 1486 | strcpy(eid->ident, UDF_ID_DEVELOPER); |
1480 | eid->identSuffix[0] = UDF_OS_CLASS_UNIX; | 1487 | eid->identSuffix[0] = UDF_OS_CLASS_UNIX; |
1481 | eid->identSuffix[1] = UDF_OS_ID_LINUX; | 1488 | eid->identSuffix[1] = UDF_OS_ID_LINUX; |
@@ -1494,7 +1501,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1494 | udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); | 1501 | udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); |
1495 | udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); | 1502 | udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); |
1496 | udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime); | 1503 | udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime); |
1497 | memset(&(fe->impIdent), 0, sizeof(regid)); | 1504 | memset(&(fe->impIdent), 0, sizeof(struct regid)); |
1498 | strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); | 1505 | strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); |
1499 | fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; | 1506 | fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; |
1500 | fe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; | 1507 | fe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; |
@@ -1533,7 +1540,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1533 | udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime); | 1540 | udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime); |
1534 | udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime); | 1541 | udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime); |
1535 | 1542 | ||
1536 | memset(&(efe->impIdent), 0, sizeof(regid)); | 1543 | memset(&(efe->impIdent), 0, sizeof(struct regid)); |
1537 | strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); | 1544 | strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); |
1538 | efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; | 1545 | efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; |
1539 | efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; | 1546 | efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; |
@@ -1584,9 +1591,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1584 | fe->descTag.tagLocation = cpu_to_le32( | 1591 | fe->descTag.tagLocation = cpu_to_le32( |
1585 | iinfo->i_location.logicalBlockNum); | 1592 | iinfo->i_location.logicalBlockNum); |
1586 | crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc - | 1593 | crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc - |
1587 | sizeof(tag); | 1594 | sizeof(struct tag); |
1588 | fe->descTag.descCRCLength = cpu_to_le16(crclen); | 1595 | fe->descTag.descCRCLength = cpu_to_le16(crclen); |
1589 | fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(tag), | 1596 | fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(struct tag), |
1590 | crclen)); | 1597 | crclen)); |
1591 | fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag); | 1598 | fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag); |
1592 | 1599 | ||
@@ -1606,7 +1613,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1606 | return err; | 1613 | return err; |
1607 | } | 1614 | } |
1608 | 1615 | ||
1609 | struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) | 1616 | struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino) |
1610 | { | 1617 | { |
1611 | unsigned long block = udf_get_lb_pblock(sb, ino, 0); | 1618 | unsigned long block = udf_get_lb_pblock(sb, ino, 0); |
1612 | struct inode *inode = iget_locked(sb, block); | 1619 | struct inode *inode = iget_locked(sb, block); |
@@ -1615,7 +1622,7 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) | |||
1615 | return NULL; | 1622 | return NULL; |
1616 | 1623 | ||
1617 | if (inode->i_state & I_NEW) { | 1624 | if (inode->i_state & I_NEW) { |
1618 | memcpy(&UDF_I(inode)->i_location, &ino, sizeof(kernel_lb_addr)); | 1625 | memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); |
1619 | __udf_read_inode(inode); | 1626 | __udf_read_inode(inode); |
1620 | unlock_new_inode(inode); | 1627 | unlock_new_inode(inode); |
1621 | } | 1628 | } |
@@ -1623,10 +1630,10 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) | |||
1623 | if (is_bad_inode(inode)) | 1630 | if (is_bad_inode(inode)) |
1624 | goto out_iput; | 1631 | goto out_iput; |
1625 | 1632 | ||
1626 | if (ino.logicalBlockNum >= UDF_SB(sb)-> | 1633 | if (ino->logicalBlockNum >= UDF_SB(sb)-> |
1627 | s_partmaps[ino.partitionReferenceNum].s_partition_len) { | 1634 | s_partmaps[ino->partitionReferenceNum].s_partition_len) { |
1628 | udf_debug("block=%d, partition=%d out of range\n", | 1635 | udf_debug("block=%d, partition=%d out of range\n", |
1629 | ino.logicalBlockNum, ino.partitionReferenceNum); | 1636 | ino->logicalBlockNum, ino->partitionReferenceNum); |
1630 | make_bad_inode(inode); | 1637 | make_bad_inode(inode); |
1631 | goto out_iput; | 1638 | goto out_iput; |
1632 | } | 1639 | } |
@@ -1639,11 +1646,11 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) | |||
1639 | } | 1646 | } |
1640 | 1647 | ||
1641 | int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, | 1648 | int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, |
1642 | kernel_lb_addr eloc, uint32_t elen, int inc) | 1649 | struct kernel_lb_addr *eloc, uint32_t elen, int inc) |
1643 | { | 1650 | { |
1644 | int adsize; | 1651 | int adsize; |
1645 | short_ad *sad = NULL; | 1652 | struct short_ad *sad = NULL; |
1646 | long_ad *lad = NULL; | 1653 | struct long_ad *lad = NULL; |
1647 | struct allocExtDesc *aed; | 1654 | struct allocExtDesc *aed; |
1648 | int8_t etype; | 1655 | int8_t etype; |
1649 | uint8_t *ptr; | 1656 | uint8_t *ptr; |
@@ -1657,9 +1664,9 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, | |||
1657 | ptr = epos->bh->b_data + epos->offset; | 1664 | ptr = epos->bh->b_data + epos->offset; |
1658 | 1665 | ||
1659 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 1666 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
1660 | adsize = sizeof(short_ad); | 1667 | adsize = sizeof(struct short_ad); |
1661 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 1668 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
1662 | adsize = sizeof(long_ad); | 1669 | adsize = sizeof(struct long_ad); |
1663 | else | 1670 | else |
1664 | return -1; | 1671 | return -1; |
1665 | 1672 | ||
@@ -1667,7 +1674,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, | |||
1667 | char *sptr, *dptr; | 1674 | char *sptr, *dptr; |
1668 | struct buffer_head *nbh; | 1675 | struct buffer_head *nbh; |
1669 | int err, loffset; | 1676 | int err, loffset; |
1670 | kernel_lb_addr obloc = epos->block; | 1677 | struct kernel_lb_addr obloc = epos->block; |
1671 | 1678 | ||
1672 | epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL, | 1679 | epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL, |
1673 | obloc.partitionReferenceNum, | 1680 | obloc.partitionReferenceNum, |
@@ -1675,7 +1682,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, | |||
1675 | if (!epos->block.logicalBlockNum) | 1682 | if (!epos->block.logicalBlockNum) |
1676 | return -1; | 1683 | return -1; |
1677 | nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, | 1684 | nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, |
1678 | epos->block, | 1685 | &epos->block, |
1679 | 0)); | 1686 | 0)); |
1680 | if (!nbh) | 1687 | if (!nbh) |
1681 | return -1; | 1688 | return -1; |
@@ -1712,20 +1719,20 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, | |||
1712 | } | 1719 | } |
1713 | if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200) | 1720 | if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200) |
1714 | udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, | 1721 | udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, |
1715 | epos->block.logicalBlockNum, sizeof(tag)); | 1722 | epos->block.logicalBlockNum, sizeof(struct tag)); |
1716 | else | 1723 | else |
1717 | udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, | 1724 | udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, |
1718 | epos->block.logicalBlockNum, sizeof(tag)); | 1725 | epos->block.logicalBlockNum, sizeof(struct tag)); |
1719 | switch (iinfo->i_alloc_type) { | 1726 | switch (iinfo->i_alloc_type) { |
1720 | case ICBTAG_FLAG_AD_SHORT: | 1727 | case ICBTAG_FLAG_AD_SHORT: |
1721 | sad = (short_ad *)sptr; | 1728 | sad = (struct short_ad *)sptr; |
1722 | sad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | | 1729 | sad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | |
1723 | inode->i_sb->s_blocksize); | 1730 | inode->i_sb->s_blocksize); |
1724 | sad->extPosition = | 1731 | sad->extPosition = |
1725 | cpu_to_le32(epos->block.logicalBlockNum); | 1732 | cpu_to_le32(epos->block.logicalBlockNum); |
1726 | break; | 1733 | break; |
1727 | case ICBTAG_FLAG_AD_LONG: | 1734 | case ICBTAG_FLAG_AD_LONG: |
1728 | lad = (long_ad *)sptr; | 1735 | lad = (struct long_ad *)sptr; |
1729 | lad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | | 1736 | lad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | |
1730 | inode->i_sb->s_blocksize); | 1737 | inode->i_sb->s_blocksize); |
1731 | lad->extLocation = cpu_to_lelb(epos->block); | 1738 | lad->extLocation = cpu_to_lelb(epos->block); |
@@ -1769,12 +1776,12 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, | |||
1769 | } | 1776 | } |
1770 | 1777 | ||
1771 | int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, | 1778 | int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, |
1772 | kernel_lb_addr eloc, uint32_t elen, int inc) | 1779 | struct kernel_lb_addr *eloc, uint32_t elen, int inc) |
1773 | { | 1780 | { |
1774 | int adsize; | 1781 | int adsize; |
1775 | uint8_t *ptr; | 1782 | uint8_t *ptr; |
1776 | short_ad *sad; | 1783 | struct short_ad *sad; |
1777 | long_ad *lad; | 1784 | struct long_ad *lad; |
1778 | struct udf_inode_info *iinfo = UDF_I(inode); | 1785 | struct udf_inode_info *iinfo = UDF_I(inode); |
1779 | 1786 | ||
1780 | if (!epos->bh) | 1787 | if (!epos->bh) |
@@ -1786,17 +1793,17 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, | |||
1786 | 1793 | ||
1787 | switch (iinfo->i_alloc_type) { | 1794 | switch (iinfo->i_alloc_type) { |
1788 | case ICBTAG_FLAG_AD_SHORT: | 1795 | case ICBTAG_FLAG_AD_SHORT: |
1789 | sad = (short_ad *)ptr; | 1796 | sad = (struct short_ad *)ptr; |
1790 | sad->extLength = cpu_to_le32(elen); | 1797 | sad->extLength = cpu_to_le32(elen); |
1791 | sad->extPosition = cpu_to_le32(eloc.logicalBlockNum); | 1798 | sad->extPosition = cpu_to_le32(eloc->logicalBlockNum); |
1792 | adsize = sizeof(short_ad); | 1799 | adsize = sizeof(struct short_ad); |
1793 | break; | 1800 | break; |
1794 | case ICBTAG_FLAG_AD_LONG: | 1801 | case ICBTAG_FLAG_AD_LONG: |
1795 | lad = (long_ad *)ptr; | 1802 | lad = (struct long_ad *)ptr; |
1796 | lad->extLength = cpu_to_le32(elen); | 1803 | lad->extLength = cpu_to_le32(elen); |
1797 | lad->extLocation = cpu_to_lelb(eloc); | 1804 | lad->extLocation = cpu_to_lelb(*eloc); |
1798 | memset(lad->impUse, 0x00, sizeof(lad->impUse)); | 1805 | memset(lad->impUse, 0x00, sizeof(lad->impUse)); |
1799 | adsize = sizeof(long_ad); | 1806 | adsize = sizeof(struct long_ad); |
1800 | break; | 1807 | break; |
1801 | default: | 1808 | default: |
1802 | return -1; | 1809 | return -1; |
@@ -1823,7 +1830,7 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, | |||
1823 | } | 1830 | } |
1824 | 1831 | ||
1825 | int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, | 1832 | int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, |
1826 | kernel_lb_addr *eloc, uint32_t *elen, int inc) | 1833 | struct kernel_lb_addr *eloc, uint32_t *elen, int inc) |
1827 | { | 1834 | { |
1828 | int8_t etype; | 1835 | int8_t etype; |
1829 | 1836 | ||
@@ -1833,7 +1840,7 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, | |||
1833 | epos->block = *eloc; | 1840 | epos->block = *eloc; |
1834 | epos->offset = sizeof(struct allocExtDesc); | 1841 | epos->offset = sizeof(struct allocExtDesc); |
1835 | brelse(epos->bh); | 1842 | brelse(epos->bh); |
1836 | block = udf_get_lb_pblock(inode->i_sb, epos->block, 0); | 1843 | block = udf_get_lb_pblock(inode->i_sb, &epos->block, 0); |
1837 | epos->bh = udf_tread(inode->i_sb, block); | 1844 | epos->bh = udf_tread(inode->i_sb, block); |
1838 | if (!epos->bh) { | 1845 | if (!epos->bh) { |
1839 | udf_debug("reading block %d failed!\n", block); | 1846 | udf_debug("reading block %d failed!\n", block); |
@@ -1845,13 +1852,13 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, | |||
1845 | } | 1852 | } |
1846 | 1853 | ||
1847 | int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, | 1854 | int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, |
1848 | kernel_lb_addr *eloc, uint32_t *elen, int inc) | 1855 | struct kernel_lb_addr *eloc, uint32_t *elen, int inc) |
1849 | { | 1856 | { |
1850 | int alen; | 1857 | int alen; |
1851 | int8_t etype; | 1858 | int8_t etype; |
1852 | uint8_t *ptr; | 1859 | uint8_t *ptr; |
1853 | short_ad *sad; | 1860 | struct short_ad *sad; |
1854 | long_ad *lad; | 1861 | struct long_ad *lad; |
1855 | struct udf_inode_info *iinfo = UDF_I(inode); | 1862 | struct udf_inode_info *iinfo = UDF_I(inode); |
1856 | 1863 | ||
1857 | if (!epos->bh) { | 1864 | if (!epos->bh) { |
@@ -1900,9 +1907,9 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, | |||
1900 | } | 1907 | } |
1901 | 1908 | ||
1902 | static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos, | 1909 | static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos, |
1903 | kernel_lb_addr neloc, uint32_t nelen) | 1910 | struct kernel_lb_addr neloc, uint32_t nelen) |
1904 | { | 1911 | { |
1905 | kernel_lb_addr oeloc; | 1912 | struct kernel_lb_addr oeloc; |
1906 | uint32_t oelen; | 1913 | uint32_t oelen; |
1907 | int8_t etype; | 1914 | int8_t etype; |
1908 | 1915 | ||
@@ -1910,18 +1917,18 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos, | |||
1910 | get_bh(epos.bh); | 1917 | get_bh(epos.bh); |
1911 | 1918 | ||
1912 | while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) { | 1919 | while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) { |
1913 | udf_write_aext(inode, &epos, neloc, nelen, 1); | 1920 | udf_write_aext(inode, &epos, &neloc, nelen, 1); |
1914 | neloc = oeloc; | 1921 | neloc = oeloc; |
1915 | nelen = (etype << 30) | oelen; | 1922 | nelen = (etype << 30) | oelen; |
1916 | } | 1923 | } |
1917 | udf_add_aext(inode, &epos, neloc, nelen, 1); | 1924 | udf_add_aext(inode, &epos, &neloc, nelen, 1); |
1918 | brelse(epos.bh); | 1925 | brelse(epos.bh); |
1919 | 1926 | ||
1920 | return (nelen >> 30); | 1927 | return (nelen >> 30); |
1921 | } | 1928 | } |
1922 | 1929 | ||
1923 | int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | 1930 | int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, |
1924 | kernel_lb_addr eloc, uint32_t elen) | 1931 | struct kernel_lb_addr eloc, uint32_t elen) |
1925 | { | 1932 | { |
1926 | struct extent_position oepos; | 1933 | struct extent_position oepos; |
1927 | int adsize; | 1934 | int adsize; |
@@ -1936,9 +1943,9 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
1936 | 1943 | ||
1937 | iinfo = UDF_I(inode); | 1944 | iinfo = UDF_I(inode); |
1938 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 1945 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
1939 | adsize = sizeof(short_ad); | 1946 | adsize = sizeof(struct short_ad); |
1940 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 1947 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
1941 | adsize = sizeof(long_ad); | 1948 | adsize = sizeof(struct long_ad); |
1942 | else | 1949 | else |
1943 | adsize = 0; | 1950 | adsize = 0; |
1944 | 1951 | ||
@@ -1947,7 +1954,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
1947 | return -1; | 1954 | return -1; |
1948 | 1955 | ||
1949 | while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { | 1956 | while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { |
1950 | udf_write_aext(inode, &oepos, eloc, (etype << 30) | elen, 1); | 1957 | udf_write_aext(inode, &oepos, &eloc, (etype << 30) | elen, 1); |
1951 | if (oepos.bh != epos.bh) { | 1958 | if (oepos.bh != epos.bh) { |
1952 | oepos.block = epos.block; | 1959 | oepos.block = epos.block; |
1953 | brelse(oepos.bh); | 1960 | brelse(oepos.bh); |
@@ -1956,13 +1963,13 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
1956 | oepos.offset = epos.offset - adsize; | 1963 | oepos.offset = epos.offset - adsize; |
1957 | } | 1964 | } |
1958 | } | 1965 | } |
1959 | memset(&eloc, 0x00, sizeof(kernel_lb_addr)); | 1966 | memset(&eloc, 0x00, sizeof(struct kernel_lb_addr)); |
1960 | elen = 0; | 1967 | elen = 0; |
1961 | 1968 | ||
1962 | if (epos.bh != oepos.bh) { | 1969 | if (epos.bh != oepos.bh) { |
1963 | udf_free_blocks(inode->i_sb, inode, epos.block, 0, 1); | 1970 | udf_free_blocks(inode->i_sb, inode, &epos.block, 0, 1); |
1964 | udf_write_aext(inode, &oepos, eloc, elen, 1); | 1971 | udf_write_aext(inode, &oepos, &eloc, elen, 1); |
1965 | udf_write_aext(inode, &oepos, eloc, elen, 1); | 1972 | udf_write_aext(inode, &oepos, &eloc, elen, 1); |
1966 | if (!oepos.bh) { | 1973 | if (!oepos.bh) { |
1967 | iinfo->i_lenAlloc -= (adsize * 2); | 1974 | iinfo->i_lenAlloc -= (adsize * 2); |
1968 | mark_inode_dirty(inode); | 1975 | mark_inode_dirty(inode); |
@@ -1979,7 +1986,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
1979 | mark_buffer_dirty_inode(oepos.bh, inode); | 1986 | mark_buffer_dirty_inode(oepos.bh, inode); |
1980 | } | 1987 | } |
1981 | } else { | 1988 | } else { |
1982 | udf_write_aext(inode, &oepos, eloc, elen, 1); | 1989 | udf_write_aext(inode, &oepos, &eloc, elen, 1); |
1983 | if (!oepos.bh) { | 1990 | if (!oepos.bh) { |
1984 | iinfo->i_lenAlloc -= adsize; | 1991 | iinfo->i_lenAlloc -= adsize; |
1985 | mark_inode_dirty(inode); | 1992 | mark_inode_dirty(inode); |
@@ -2004,7 +2011,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
2004 | } | 2011 | } |
2005 | 2012 | ||
2006 | int8_t inode_bmap(struct inode *inode, sector_t block, | 2013 | int8_t inode_bmap(struct inode *inode, sector_t block, |
2007 | struct extent_position *pos, kernel_lb_addr *eloc, | 2014 | struct extent_position *pos, struct kernel_lb_addr *eloc, |
2008 | uint32_t *elen, sector_t *offset) | 2015 | uint32_t *elen, sector_t *offset) |
2009 | { | 2016 | { |
2010 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; | 2017 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; |
@@ -2036,7 +2043,7 @@ int8_t inode_bmap(struct inode *inode, sector_t block, | |||
2036 | 2043 | ||
2037 | long udf_block_map(struct inode *inode, sector_t block) | 2044 | long udf_block_map(struct inode *inode, sector_t block) |
2038 | { | 2045 | { |
2039 | kernel_lb_addr eloc; | 2046 | struct kernel_lb_addr eloc; |
2040 | uint32_t elen; | 2047 | uint32_t elen; |
2041 | sector_t offset; | 2048 | sector_t offset; |
2042 | struct extent_position epos = {}; | 2049 | struct extent_position epos = {}; |
@@ -2046,7 +2053,7 @@ long udf_block_map(struct inode *inode, sector_t block) | |||
2046 | 2053 | ||
2047 | if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == | 2054 | if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == |
2048 | (EXT_RECORDED_ALLOCATED >> 30)) | 2055 | (EXT_RECORDED_ALLOCATED >> 30)) |
2049 | ret = udf_get_lb_pblock(inode->i_sb, eloc, offset); | 2056 | ret = udf_get_lb_pblock(inode->i_sb, &eloc, offset); |
2050 | else | 2057 | else |
2051 | ret = 0; | 2058 | ret = 0; |
2052 | 2059 | ||
diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 84bf0fd4a4f1..9215700c00a4 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c | |||
@@ -134,10 +134,10 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size, | |||
134 | } | 134 | } |
135 | } | 135 | } |
136 | /* rewrite CRC + checksum of eahd */ | 136 | /* rewrite CRC + checksum of eahd */ |
137 | crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag); | 137 | crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(struct tag); |
138 | eahd->descTag.descCRCLength = cpu_to_le16(crclen); | 138 | eahd->descTag.descCRCLength = cpu_to_le16(crclen); |
139 | eahd->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)eahd + | 139 | eahd->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)eahd + |
140 | sizeof(tag), crclen)); | 140 | sizeof(struct tag), crclen)); |
141 | eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag); | 141 | eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag); |
142 | iinfo->i_lenEAttr += size; | 142 | iinfo->i_lenEAttr += size; |
143 | return (struct genericFormat *)&ea[offset]; | 143 | return (struct genericFormat *)&ea[offset]; |
@@ -202,7 +202,7 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, | |||
202 | struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, | 202 | struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, |
203 | uint32_t location, uint16_t *ident) | 203 | uint32_t location, uint16_t *ident) |
204 | { | 204 | { |
205 | tag *tag_p; | 205 | struct tag *tag_p; |
206 | struct buffer_head *bh = NULL; | 206 | struct buffer_head *bh = NULL; |
207 | 207 | ||
208 | /* Read the block */ | 208 | /* Read the block */ |
@@ -216,7 +216,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, | |||
216 | return NULL; | 216 | return NULL; |
217 | } | 217 | } |
218 | 218 | ||
219 | tag_p = (tag *)(bh->b_data); | 219 | tag_p = (struct tag *)(bh->b_data); |
220 | 220 | ||
221 | *ident = le16_to_cpu(tag_p->tagIdent); | 221 | *ident = le16_to_cpu(tag_p->tagIdent); |
222 | 222 | ||
@@ -241,9 +241,9 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, | |||
241 | } | 241 | } |
242 | 242 | ||
243 | /* Verify the descriptor CRC */ | 243 | /* Verify the descriptor CRC */ |
244 | if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize || | 244 | if (le16_to_cpu(tag_p->descCRCLength) + sizeof(struct tag) > sb->s_blocksize || |
245 | le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, | 245 | le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, |
246 | bh->b_data + sizeof(tag), | 246 | bh->b_data + sizeof(struct tag), |
247 | le16_to_cpu(tag_p->descCRCLength))) | 247 | le16_to_cpu(tag_p->descCRCLength))) |
248 | return bh; | 248 | return bh; |
249 | 249 | ||
@@ -255,27 +255,28 @@ error_out: | |||
255 | return NULL; | 255 | return NULL; |
256 | } | 256 | } |
257 | 257 | ||
258 | struct buffer_head *udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc, | 258 | struct buffer_head *udf_read_ptagged(struct super_block *sb, |
259 | struct kernel_lb_addr *loc, | ||
259 | uint32_t offset, uint16_t *ident) | 260 | uint32_t offset, uint16_t *ident) |
260 | { | 261 | { |
261 | return udf_read_tagged(sb, udf_get_lb_pblock(sb, loc, offset), | 262 | return udf_read_tagged(sb, udf_get_lb_pblock(sb, loc, offset), |
262 | loc.logicalBlockNum + offset, ident); | 263 | loc->logicalBlockNum + offset, ident); |
263 | } | 264 | } |
264 | 265 | ||
265 | void udf_update_tag(char *data, int length) | 266 | void udf_update_tag(char *data, int length) |
266 | { | 267 | { |
267 | tag *tptr = (tag *)data; | 268 | struct tag *tptr = (struct tag *)data; |
268 | length -= sizeof(tag); | 269 | length -= sizeof(struct tag); |
269 | 270 | ||
270 | tptr->descCRCLength = cpu_to_le16(length); | 271 | tptr->descCRCLength = cpu_to_le16(length); |
271 | tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(tag), length)); | 272 | tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(struct tag), length)); |
272 | tptr->tagChecksum = udf_tag_checksum(tptr); | 273 | tptr->tagChecksum = udf_tag_checksum(tptr); |
273 | } | 274 | } |
274 | 275 | ||
275 | void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum, | 276 | void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum, |
276 | uint32_t loc, int length) | 277 | uint32_t loc, int length) |
277 | { | 278 | { |
278 | tag *tptr = (tag *)data; | 279 | struct tag *tptr = (struct tag *)data; |
279 | tptr->tagIdent = cpu_to_le16(ident); | 280 | tptr->tagIdent = cpu_to_le16(ident); |
280 | tptr->descVersion = cpu_to_le16(version); | 281 | tptr->descVersion = cpu_to_le16(version); |
281 | tptr->tagSerialNum = cpu_to_le16(snum); | 282 | tptr->tagSerialNum = cpu_to_le16(snum); |
@@ -283,12 +284,12 @@ void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum, | |||
283 | udf_update_tag(data, length); | 284 | udf_update_tag(data, length); |
284 | } | 285 | } |
285 | 286 | ||
286 | u8 udf_tag_checksum(const tag *t) | 287 | u8 udf_tag_checksum(const struct tag *t) |
287 | { | 288 | { |
288 | u8 *data = (u8 *)t; | 289 | u8 *data = (u8 *)t; |
289 | u8 checksum = 0; | 290 | u8 checksum = 0; |
290 | int i; | 291 | int i; |
291 | for (i = 0; i < sizeof(tag); ++i) | 292 | for (i = 0; i < sizeof(struct tag); ++i) |
292 | if (i != 4) /* position of checksum */ | 293 | if (i != 4) /* position of checksum */ |
293 | checksum += data[i]; | 294 | checksum += data[i]; |
294 | return checksum; | 295 | return checksum; |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f84bfaa8d941..6a29fa34c478 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -47,7 +47,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, | |||
47 | struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh, | 47 | struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh, |
48 | uint8_t *impuse, uint8_t *fileident) | 48 | uint8_t *impuse, uint8_t *fileident) |
49 | { | 49 | { |
50 | uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(tag); | 50 | uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(struct tag); |
51 | uint16_t crc; | 51 | uint16_t crc; |
52 | int offset; | 52 | int offset; |
53 | uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse); | 53 | uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse); |
@@ -99,18 +99,18 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, | |||
99 | memset(fibh->ebh->b_data, 0x00, padlen + offset); | 99 | memset(fibh->ebh->b_data, 0x00, padlen + offset); |
100 | } | 100 | } |
101 | 101 | ||
102 | crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(tag), | 102 | crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(struct tag), |
103 | sizeof(struct fileIdentDesc) - sizeof(tag)); | 103 | sizeof(struct fileIdentDesc) - sizeof(struct tag)); |
104 | 104 | ||
105 | if (fibh->sbh == fibh->ebh) { | 105 | if (fibh->sbh == fibh->ebh) { |
106 | crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, | 106 | crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, |
107 | crclen + sizeof(tag) - | 107 | crclen + sizeof(struct tag) - |
108 | sizeof(struct fileIdentDesc)); | 108 | sizeof(struct fileIdentDesc)); |
109 | } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) { | 109 | } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) { |
110 | crc = crc_itu_t(crc, fibh->ebh->b_data + | 110 | crc = crc_itu_t(crc, fibh->ebh->b_data + |
111 | sizeof(struct fileIdentDesc) + | 111 | sizeof(struct fileIdentDesc) + |
112 | fibh->soffset, | 112 | fibh->soffset, |
113 | crclen + sizeof(tag) - | 113 | crclen + sizeof(struct tag) - |
114 | sizeof(struct fileIdentDesc)); | 114 | sizeof(struct fileIdentDesc)); |
115 | } else { | 115 | } else { |
116 | crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, | 116 | crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, |
@@ -154,7 +154,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, | |||
154 | uint8_t lfi; | 154 | uint8_t lfi; |
155 | uint16_t liu; | 155 | uint16_t liu; |
156 | loff_t size; | 156 | loff_t size; |
157 | kernel_lb_addr eloc; | 157 | struct kernel_lb_addr eloc; |
158 | uint32_t elen; | 158 | uint32_t elen; |
159 | sector_t offset; | 159 | sector_t offset; |
160 | struct extent_position epos = {}; | 160 | struct extent_position epos = {}; |
@@ -171,12 +171,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, | |||
171 | if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, | 171 | if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, |
172 | &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) | 172 | &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) |
173 | goto out_err; | 173 | goto out_err; |
174 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); | 174 | block = udf_get_lb_pblock(dir->i_sb, &eloc, offset); |
175 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { | 175 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { |
176 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 176 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
177 | epos.offset -= sizeof(short_ad); | 177 | epos.offset -= sizeof(struct short_ad); |
178 | else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 178 | else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
179 | epos.offset -= sizeof(long_ad); | 179 | epos.offset -= sizeof(struct long_ad); |
180 | } else | 180 | } else |
181 | offset = 0; | 181 | offset = 0; |
182 | 182 | ||
@@ -268,7 +268,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, | |||
268 | #ifdef UDF_RECOVERY | 268 | #ifdef UDF_RECOVERY |
269 | /* temporary shorthand for specifying files by inode number */ | 269 | /* temporary shorthand for specifying files by inode number */ |
270 | if (!strncmp(dentry->d_name.name, ".B=", 3)) { | 270 | if (!strncmp(dentry->d_name.name, ".B=", 3)) { |
271 | kernel_lb_addr lb = { | 271 | struct kernel_lb_addr lb = { |
272 | .logicalBlockNum = 0, | 272 | .logicalBlockNum = 0, |
273 | .partitionReferenceNum = | 273 | .partitionReferenceNum = |
274 | simple_strtoul(dentry->d_name.name + 3, | 274 | simple_strtoul(dentry->d_name.name + 3, |
@@ -283,11 +283,14 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, | |||
283 | #endif /* UDF_RECOVERY */ | 283 | #endif /* UDF_RECOVERY */ |
284 | 284 | ||
285 | if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) { | 285 | if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) { |
286 | struct kernel_lb_addr loc; | ||
287 | |||
286 | if (fibh.sbh != fibh.ebh) | 288 | if (fibh.sbh != fibh.ebh) |
287 | brelse(fibh.ebh); | 289 | brelse(fibh.ebh); |
288 | brelse(fibh.sbh); | 290 | brelse(fibh.sbh); |
289 | 291 | ||
290 | inode = udf_iget(dir->i_sb, lelb_to_cpu(cfi.icb.extLocation)); | 292 | loc = lelb_to_cpu(cfi.icb.extLocation); |
293 | inode = udf_iget(dir->i_sb, &loc); | ||
291 | if (!inode) { | 294 | if (!inode) { |
292 | unlock_kernel(); | 295 | unlock_kernel(); |
293 | return ERR_PTR(-EACCES); | 296 | return ERR_PTR(-EACCES); |
@@ -313,7 +316,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, | |||
313 | uint8_t lfi; | 316 | uint8_t lfi; |
314 | uint16_t liu; | 317 | uint16_t liu; |
315 | int block; | 318 | int block; |
316 | kernel_lb_addr eloc; | 319 | struct kernel_lb_addr eloc; |
317 | uint32_t elen = 0; | 320 | uint32_t elen = 0; |
318 | sector_t offset; | 321 | sector_t offset; |
319 | struct extent_position epos = {}; | 322 | struct extent_position epos = {}; |
@@ -351,16 +354,16 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, | |||
351 | if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, | 354 | if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, |
352 | &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) { | 355 | &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) { |
353 | block = udf_get_lb_pblock(dir->i_sb, | 356 | block = udf_get_lb_pblock(dir->i_sb, |
354 | dinfo->i_location, 0); | 357 | &dinfo->i_location, 0); |
355 | fibh->soffset = fibh->eoffset = sb->s_blocksize; | 358 | fibh->soffset = fibh->eoffset = sb->s_blocksize; |
356 | goto add; | 359 | goto add; |
357 | } | 360 | } |
358 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); | 361 | block = udf_get_lb_pblock(dir->i_sb, &eloc, offset); |
359 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { | 362 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { |
360 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 363 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
361 | epos.offset -= sizeof(short_ad); | 364 | epos.offset -= sizeof(struct short_ad); |
362 | else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 365 | else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
363 | epos.offset -= sizeof(long_ad); | 366 | epos.offset -= sizeof(struct long_ad); |
364 | } else | 367 | } else |
365 | offset = 0; | 368 | offset = 0; |
366 | 369 | ||
@@ -409,10 +412,10 @@ add: | |||
409 | if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) { | 412 | if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) { |
410 | elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); | 413 | elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); |
411 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 414 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
412 | epos.offset -= sizeof(short_ad); | 415 | epos.offset -= sizeof(struct short_ad); |
413 | else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 416 | else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
414 | epos.offset -= sizeof(long_ad); | 417 | epos.offset -= sizeof(struct long_ad); |
415 | udf_write_aext(dir, &epos, eloc, elen, 1); | 418 | udf_write_aext(dir, &epos, &eloc, elen, 1); |
416 | } | 419 | } |
417 | f_pos += nfidlen; | 420 | f_pos += nfidlen; |
418 | 421 | ||
@@ -494,10 +497,10 @@ add: | |||
494 | memset(cfi, 0, sizeof(struct fileIdentDesc)); | 497 | memset(cfi, 0, sizeof(struct fileIdentDesc)); |
495 | if (UDF_SB(sb)->s_udfrev >= 0x0200) | 498 | if (UDF_SB(sb)->s_udfrev >= 0x0200) |
496 | udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block, | 499 | udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block, |
497 | sizeof(tag)); | 500 | sizeof(struct tag)); |
498 | else | 501 | else |
499 | udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block, | 502 | udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block, |
500 | sizeof(tag)); | 503 | sizeof(struct tag)); |
501 | cfi->fileVersionNum = cpu_to_le16(1); | 504 | cfi->fileVersionNum = cpu_to_le16(1); |
502 | cfi->lengthFileIdent = namelen; | 505 | cfi->lengthFileIdent = namelen; |
503 | cfi->lengthOfImpUse = cpu_to_le16(0); | 506 | cfi->lengthOfImpUse = cpu_to_le16(0); |
@@ -530,7 +533,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, | |||
530 | cfi->fileCharacteristics |= FID_FILE_CHAR_DELETED; | 533 | cfi->fileCharacteristics |= FID_FILE_CHAR_DELETED; |
531 | 534 | ||
532 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT)) | 535 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT)) |
533 | memset(&(cfi->icb), 0x00, sizeof(long_ad)); | 536 | memset(&(cfi->icb), 0x00, sizeof(struct long_ad)); |
534 | 537 | ||
535 | return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); | 538 | return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); |
536 | } | 539 | } |
@@ -710,7 +713,7 @@ static int empty_dir(struct inode *dir) | |||
710 | loff_t f_pos; | 713 | loff_t f_pos; |
711 | loff_t size = udf_ext0_offset(dir) + dir->i_size; | 714 | loff_t size = udf_ext0_offset(dir) + dir->i_size; |
712 | int block; | 715 | int block; |
713 | kernel_lb_addr eloc; | 716 | struct kernel_lb_addr eloc; |
714 | uint32_t elen; | 717 | uint32_t elen; |
715 | sector_t offset; | 718 | sector_t offset; |
716 | struct extent_position epos = {}; | 719 | struct extent_position epos = {}; |
@@ -724,12 +727,12 @@ static int empty_dir(struct inode *dir) | |||
724 | else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, | 727 | else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, |
725 | &epos, &eloc, &elen, &offset) == | 728 | &epos, &eloc, &elen, &offset) == |
726 | (EXT_RECORDED_ALLOCATED >> 30)) { | 729 | (EXT_RECORDED_ALLOCATED >> 30)) { |
727 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); | 730 | block = udf_get_lb_pblock(dir->i_sb, &eloc, offset); |
728 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { | 731 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { |
729 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 732 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
730 | epos.offset -= sizeof(short_ad); | 733 | epos.offset -= sizeof(struct short_ad); |
731 | else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 734 | else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
732 | epos.offset -= sizeof(long_ad); | 735 | epos.offset -= sizeof(struct long_ad); |
733 | } else | 736 | } else |
734 | offset = 0; | 737 | offset = 0; |
735 | 738 | ||
@@ -778,7 +781,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) | |||
778 | struct inode *inode = dentry->d_inode; | 781 | struct inode *inode = dentry->d_inode; |
779 | struct udf_fileident_bh fibh; | 782 | struct udf_fileident_bh fibh; |
780 | struct fileIdentDesc *fi, cfi; | 783 | struct fileIdentDesc *fi, cfi; |
781 | kernel_lb_addr tloc; | 784 | struct kernel_lb_addr tloc; |
782 | 785 | ||
783 | retval = -ENOENT; | 786 | retval = -ENOENT; |
784 | lock_kernel(); | 787 | lock_kernel(); |
@@ -788,7 +791,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) | |||
788 | 791 | ||
789 | retval = -EIO; | 792 | retval = -EIO; |
790 | tloc = lelb_to_cpu(cfi.icb.extLocation); | 793 | tloc = lelb_to_cpu(cfi.icb.extLocation); |
791 | if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino) | 794 | if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino) |
792 | goto end_rmdir; | 795 | goto end_rmdir; |
793 | retval = -ENOTEMPTY; | 796 | retval = -ENOTEMPTY; |
794 | if (!empty_dir(inode)) | 797 | if (!empty_dir(inode)) |
@@ -824,7 +827,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) | |||
824 | struct udf_fileident_bh fibh; | 827 | struct udf_fileident_bh fibh; |
825 | struct fileIdentDesc *fi; | 828 | struct fileIdentDesc *fi; |
826 | struct fileIdentDesc cfi; | 829 | struct fileIdentDesc cfi; |
827 | kernel_lb_addr tloc; | 830 | struct kernel_lb_addr tloc; |
828 | 831 | ||
829 | retval = -ENOENT; | 832 | retval = -ENOENT; |
830 | lock_kernel(); | 833 | lock_kernel(); |
@@ -834,7 +837,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) | |||
834 | 837 | ||
835 | retval = -EIO; | 838 | retval = -EIO; |
836 | tloc = lelb_to_cpu(cfi.icb.extLocation); | 839 | tloc = lelb_to_cpu(cfi.icb.extLocation); |
837 | if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino) | 840 | if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino) |
838 | goto end_unlink; | 841 | goto end_unlink; |
839 | 842 | ||
840 | if (!inode->i_nlink) { | 843 | if (!inode->i_nlink) { |
@@ -897,7 +900,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
897 | inode->i_op = &page_symlink_inode_operations; | 900 | inode->i_op = &page_symlink_inode_operations; |
898 | 901 | ||
899 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { | 902 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { |
900 | kernel_lb_addr eloc; | 903 | struct kernel_lb_addr eloc; |
901 | uint32_t bsize; | 904 | uint32_t bsize; |
902 | 905 | ||
903 | block = udf_new_block(inode->i_sb, inode, | 906 | block = udf_new_block(inode->i_sb, inode, |
@@ -913,7 +916,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
913 | iinfo->i_location.partitionReferenceNum; | 916 | iinfo->i_location.partitionReferenceNum; |
914 | bsize = inode->i_sb->s_blocksize; | 917 | bsize = inode->i_sb->s_blocksize; |
915 | iinfo->i_lenExtents = bsize; | 918 | iinfo->i_lenExtents = bsize; |
916 | udf_add_aext(inode, &epos, eloc, bsize, 0); | 919 | udf_add_aext(inode, &epos, &eloc, bsize, 0); |
917 | brelse(epos.bh); | 920 | brelse(epos.bh); |
918 | 921 | ||
919 | block = udf_get_pblock(inode->i_sb, block, | 922 | block = udf_get_pblock(inode->i_sb, block, |
@@ -1108,7 +1111,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1108 | struct fileIdentDesc ocfi, ncfi; | 1111 | struct fileIdentDesc ocfi, ncfi; |
1109 | struct buffer_head *dir_bh = NULL; | 1112 | struct buffer_head *dir_bh = NULL; |
1110 | int retval = -ENOENT; | 1113 | int retval = -ENOENT; |
1111 | kernel_lb_addr tloc; | 1114 | struct kernel_lb_addr tloc; |
1112 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); | 1115 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); |
1113 | 1116 | ||
1114 | lock_kernel(); | 1117 | lock_kernel(); |
@@ -1119,7 +1122,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1119 | brelse(ofibh.sbh); | 1122 | brelse(ofibh.sbh); |
1120 | } | 1123 | } |
1121 | tloc = lelb_to_cpu(ocfi.icb.extLocation); | 1124 | tloc = lelb_to_cpu(ocfi.icb.extLocation); |
1122 | if (!ofi || udf_get_lb_pblock(old_dir->i_sb, tloc, 0) | 1125 | if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) |
1123 | != old_inode->i_ino) | 1126 | != old_inode->i_ino) |
1124 | goto end_rename; | 1127 | goto end_rename; |
1125 | 1128 | ||
@@ -1158,7 +1161,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1158 | if (!dir_fi) | 1161 | if (!dir_fi) |
1159 | goto end_rename; | 1162 | goto end_rename; |
1160 | tloc = lelb_to_cpu(dir_fi->icb.extLocation); | 1163 | tloc = lelb_to_cpu(dir_fi->icb.extLocation); |
1161 | if (udf_get_lb_pblock(old_inode->i_sb, tloc, 0) != | 1164 | if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) != |
1162 | old_dir->i_ino) | 1165 | old_dir->i_ino) |
1163 | goto end_rename; | 1166 | goto end_rename; |
1164 | 1167 | ||
@@ -1187,7 +1190,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1187 | */ | 1190 | */ |
1188 | ncfi.fileVersionNum = ocfi.fileVersionNum; | 1191 | ncfi.fileVersionNum = ocfi.fileVersionNum; |
1189 | ncfi.fileCharacteristics = ocfi.fileCharacteristics; | 1192 | ncfi.fileCharacteristics = ocfi.fileCharacteristics; |
1190 | memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(long_ad)); | 1193 | memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(struct long_ad)); |
1191 | udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL); | 1194 | udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL); |
1192 | 1195 | ||
1193 | /* The old fid may have moved - find it again */ | 1196 | /* The old fid may have moved - find it again */ |
@@ -1242,6 +1245,7 @@ end_rename: | |||
1242 | 1245 | ||
1243 | static struct dentry *udf_get_parent(struct dentry *child) | 1246 | static struct dentry *udf_get_parent(struct dentry *child) |
1244 | { | 1247 | { |
1248 | struct kernel_lb_addr tloc; | ||
1245 | struct inode *inode = NULL; | 1249 | struct inode *inode = NULL; |
1246 | struct qstr dotdot = {.name = "..", .len = 2}; | 1250 | struct qstr dotdot = {.name = "..", .len = 2}; |
1247 | struct fileIdentDesc cfi; | 1251 | struct fileIdentDesc cfi; |
@@ -1255,8 +1259,8 @@ static struct dentry *udf_get_parent(struct dentry *child) | |||
1255 | brelse(fibh.ebh); | 1259 | brelse(fibh.ebh); |
1256 | brelse(fibh.sbh); | 1260 | brelse(fibh.sbh); |
1257 | 1261 | ||
1258 | inode = udf_iget(child->d_inode->i_sb, | 1262 | tloc = lelb_to_cpu(cfi.icb.extLocation); |
1259 | lelb_to_cpu(cfi.icb.extLocation)); | 1263 | inode = udf_iget(child->d_inode->i_sb, &tloc); |
1260 | if (!inode) | 1264 | if (!inode) |
1261 | goto out_unlock; | 1265 | goto out_unlock; |
1262 | unlock_kernel(); | 1266 | unlock_kernel(); |
@@ -1272,14 +1276,14 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, | |||
1272 | u16 partref, __u32 generation) | 1276 | u16 partref, __u32 generation) |
1273 | { | 1277 | { |
1274 | struct inode *inode; | 1278 | struct inode *inode; |
1275 | kernel_lb_addr loc; | 1279 | struct kernel_lb_addr loc; |
1276 | 1280 | ||
1277 | if (block == 0) | 1281 | if (block == 0) |
1278 | return ERR_PTR(-ESTALE); | 1282 | return ERR_PTR(-ESTALE); |
1279 | 1283 | ||
1280 | loc.logicalBlockNum = block; | 1284 | loc.logicalBlockNum = block; |
1281 | loc.partitionReferenceNum = partref; | 1285 | loc.partitionReferenceNum = partref; |
1282 | inode = udf_iget(sb, loc); | 1286 | inode = udf_iget(sb, &loc); |
1283 | 1287 | ||
1284 | if (inode == NULL) | 1288 | if (inode == NULL) |
1285 | return ERR_PTR(-ENOMEM); | 1289 | return ERR_PTR(-ENOMEM); |
@@ -1318,7 +1322,7 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, | |||
1318 | { | 1322 | { |
1319 | int len = *lenp; | 1323 | int len = *lenp; |
1320 | struct inode *inode = de->d_inode; | 1324 | struct inode *inode = de->d_inode; |
1321 | kernel_lb_addr location = UDF_I(inode)->i_location; | 1325 | struct kernel_lb_addr location = UDF_I(inode)->i_location; |
1322 | struct fid *fid = (struct fid *)fh; | 1326 | struct fid *fid = (struct fid *)fh; |
1323 | int type = FILEID_UDF_WITHOUT_PARENT; | 1327 | int type = FILEID_UDF_WITHOUT_PARENT; |
1324 | 1328 | ||
diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h index 65ff47902bd2..fbff74654df2 100644 --- a/fs/udf/osta_udf.h +++ b/fs/udf/osta_udf.h | |||
@@ -85,7 +85,7 @@ struct appIdentSuffix { | |||
85 | /* Logical Volume Integrity Descriptor (UDF 2.50 2.2.6) */ | 85 | /* Logical Volume Integrity Descriptor (UDF 2.50 2.2.6) */ |
86 | /* Implementation Use (UDF 2.50 2.2.6.4) */ | 86 | /* Implementation Use (UDF 2.50 2.2.6.4) */ |
87 | struct logicalVolIntegrityDescImpUse { | 87 | struct logicalVolIntegrityDescImpUse { |
88 | regid impIdent; | 88 | struct regid impIdent; |
89 | __le32 numFiles; | 89 | __le32 numFiles; |
90 | __le32 numDirs; | 90 | __le32 numDirs; |
91 | __le16 minUDFReadRev; | 91 | __le16 minUDFReadRev; |
@@ -97,12 +97,12 @@ struct logicalVolIntegrityDescImpUse { | |||
97 | /* Implementation Use Volume Descriptor (UDF 2.50 2.2.7) */ | 97 | /* Implementation Use Volume Descriptor (UDF 2.50 2.2.7) */ |
98 | /* Implementation Use (UDF 2.50 2.2.7.2) */ | 98 | /* Implementation Use (UDF 2.50 2.2.7.2) */ |
99 | struct impUseVolDescImpUse { | 99 | struct impUseVolDescImpUse { |
100 | charspec LVICharset; | 100 | struct charspec LVICharset; |
101 | dstring logicalVolIdent[128]; | 101 | dstring logicalVolIdent[128]; |
102 | dstring LVInfo1[36]; | 102 | dstring LVInfo1[36]; |
103 | dstring LVInfo2[36]; | 103 | dstring LVInfo2[36]; |
104 | dstring LVInfo3[36]; | 104 | dstring LVInfo3[36]; |
105 | regid impIdent; | 105 | struct regid impIdent; |
106 | uint8_t impUse[128]; | 106 | uint8_t impUse[128]; |
107 | } __attribute__ ((packed)); | 107 | } __attribute__ ((packed)); |
108 | 108 | ||
@@ -110,7 +110,7 @@ struct udfPartitionMap2 { | |||
110 | uint8_t partitionMapType; | 110 | uint8_t partitionMapType; |
111 | uint8_t partitionMapLength; | 111 | uint8_t partitionMapLength; |
112 | uint8_t reserved1[2]; | 112 | uint8_t reserved1[2]; |
113 | regid partIdent; | 113 | struct regid partIdent; |
114 | __le16 volSeqNum; | 114 | __le16 volSeqNum; |
115 | __le16 partitionNum; | 115 | __le16 partitionNum; |
116 | } __attribute__ ((packed)); | 116 | } __attribute__ ((packed)); |
@@ -120,7 +120,7 @@ struct virtualPartitionMap { | |||
120 | uint8_t partitionMapType; | 120 | uint8_t partitionMapType; |
121 | uint8_t partitionMapLength; | 121 | uint8_t partitionMapLength; |
122 | uint8_t reserved1[2]; | 122 | uint8_t reserved1[2]; |
123 | regid partIdent; | 123 | struct regid partIdent; |
124 | __le16 volSeqNum; | 124 | __le16 volSeqNum; |
125 | __le16 partitionNum; | 125 | __le16 partitionNum; |
126 | uint8_t reserved2[24]; | 126 | uint8_t reserved2[24]; |
@@ -131,7 +131,7 @@ struct sparablePartitionMap { | |||
131 | uint8_t partitionMapType; | 131 | uint8_t partitionMapType; |
132 | uint8_t partitionMapLength; | 132 | uint8_t partitionMapLength; |
133 | uint8_t reserved1[2]; | 133 | uint8_t reserved1[2]; |
134 | regid partIdent; | 134 | struct regid partIdent; |
135 | __le16 volSeqNum; | 135 | __le16 volSeqNum; |
136 | __le16 partitionNum; | 136 | __le16 partitionNum; |
137 | __le16 packetLength; | 137 | __le16 packetLength; |
@@ -146,7 +146,7 @@ struct metadataPartitionMap { | |||
146 | uint8_t partitionMapType; | 146 | uint8_t partitionMapType; |
147 | uint8_t partitionMapLength; | 147 | uint8_t partitionMapLength; |
148 | uint8_t reserved1[2]; | 148 | uint8_t reserved1[2]; |
149 | regid partIdent; | 149 | struct regid partIdent; |
150 | __le16 volSeqNum; | 150 | __le16 volSeqNum; |
151 | __le16 partitionNum; | 151 | __le16 partitionNum; |
152 | __le32 metadataFileLoc; | 152 | __le32 metadataFileLoc; |
@@ -161,7 +161,7 @@ struct metadataPartitionMap { | |||
161 | /* Virtual Allocation Table (UDF 1.5 2.2.10) */ | 161 | /* Virtual Allocation Table (UDF 1.5 2.2.10) */ |
162 | struct virtualAllocationTable15 { | 162 | struct virtualAllocationTable15 { |
163 | __le32 VirtualSector[0]; | 163 | __le32 VirtualSector[0]; |
164 | regid vatIdent; | 164 | struct regid vatIdent; |
165 | __le32 previousVATICBLoc; | 165 | __le32 previousVATICBLoc; |
166 | } __attribute__ ((packed)); | 166 | } __attribute__ ((packed)); |
167 | 167 | ||
@@ -192,8 +192,8 @@ struct sparingEntry { | |||
192 | } __attribute__ ((packed)); | 192 | } __attribute__ ((packed)); |
193 | 193 | ||
194 | struct sparingTable { | 194 | struct sparingTable { |
195 | tag descTag; | 195 | struct tag descTag; |
196 | regid sparingIdent; | 196 | struct regid sparingIdent; |
197 | __le16 reallocationTableLen; | 197 | __le16 reallocationTableLen; |
198 | __le16 reserved; | 198 | __le16 reserved; |
199 | __le32 sequenceNum; | 199 | __le32 sequenceNum; |
@@ -206,7 +206,7 @@ struct sparingTable { | |||
206 | #define ICBTAG_FILE_TYPE_MIRROR 0xFB | 206 | #define ICBTAG_FILE_TYPE_MIRROR 0xFB |
207 | #define ICBTAG_FILE_TYPE_BITMAP 0xFC | 207 | #define ICBTAG_FILE_TYPE_BITMAP 0xFC |
208 | 208 | ||
209 | /* struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */ | 209 | /* struct struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */ |
210 | struct allocDescImpUse { | 210 | struct allocDescImpUse { |
211 | __le16 flags; | 211 | __le16 flags; |
212 | uint8_t impUse[4]; | 212 | uint8_t impUse[4]; |
diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 96dfd207c3d6..4b540ee632d5 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c | |||
@@ -273,7 +273,7 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, | |||
273 | { | 273 | { |
274 | struct super_block *sb = inode->i_sb; | 274 | struct super_block *sb = inode->i_sb; |
275 | struct udf_part_map *map; | 275 | struct udf_part_map *map; |
276 | kernel_lb_addr eloc; | 276 | struct kernel_lb_addr eloc; |
277 | uint32_t elen; | 277 | uint32_t elen; |
278 | sector_t ext_offset; | 278 | sector_t ext_offset; |
279 | struct extent_position epos = {}; | 279 | struct extent_position epos = {}; |
diff --git a/fs/udf/super.c b/fs/udf/super.c index e25e7010627b..72348cc855a4 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -81,16 +81,13 @@ static char error_buf[1024]; | |||
81 | /* These are the "meat" - everything else is stuffing */ | 81 | /* These are the "meat" - everything else is stuffing */ |
82 | static int udf_fill_super(struct super_block *, void *, int); | 82 | static int udf_fill_super(struct super_block *, void *, int); |
83 | static void udf_put_super(struct super_block *); | 83 | static void udf_put_super(struct super_block *); |
84 | static void udf_write_super(struct super_block *); | 84 | static int udf_sync_fs(struct super_block *, int); |
85 | static int udf_remount_fs(struct super_block *, int *, char *); | 85 | static int udf_remount_fs(struct super_block *, int *, char *); |
86 | static int udf_check_valid(struct super_block *, int, int); | 86 | static void udf_load_logicalvolint(struct super_block *, struct kernel_extent_ad); |
87 | static int udf_vrs(struct super_block *sb, int silent); | 87 | static int udf_find_fileset(struct super_block *, struct kernel_lb_addr *, |
88 | static void udf_load_logicalvolint(struct super_block *, kernel_extent_ad); | 88 | struct kernel_lb_addr *); |
89 | static void udf_find_anchor(struct super_block *); | ||
90 | static int udf_find_fileset(struct super_block *, kernel_lb_addr *, | ||
91 | kernel_lb_addr *); | ||
92 | static void udf_load_fileset(struct super_block *, struct buffer_head *, | 89 | static void udf_load_fileset(struct super_block *, struct buffer_head *, |
93 | kernel_lb_addr *); | 90 | struct kernel_lb_addr *); |
94 | static void udf_open_lvid(struct super_block *); | 91 | static void udf_open_lvid(struct super_block *); |
95 | static void udf_close_lvid(struct super_block *); | 92 | static void udf_close_lvid(struct super_block *); |
96 | static unsigned int udf_count_free(struct super_block *); | 93 | static unsigned int udf_count_free(struct super_block *); |
@@ -181,7 +178,7 @@ static const struct super_operations udf_sb_ops = { | |||
181 | .delete_inode = udf_delete_inode, | 178 | .delete_inode = udf_delete_inode, |
182 | .clear_inode = udf_clear_inode, | 179 | .clear_inode = udf_clear_inode, |
183 | .put_super = udf_put_super, | 180 | .put_super = udf_put_super, |
184 | .write_super = udf_write_super, | 181 | .sync_fs = udf_sync_fs, |
185 | .statfs = udf_statfs, | 182 | .statfs = udf_statfs, |
186 | .remount_fs = udf_remount_fs, | 183 | .remount_fs = udf_remount_fs, |
187 | .show_options = udf_show_options, | 184 | .show_options = udf_show_options, |
@@ -201,6 +198,8 @@ struct udf_options { | |||
201 | mode_t umask; | 198 | mode_t umask; |
202 | gid_t gid; | 199 | gid_t gid; |
203 | uid_t uid; | 200 | uid_t uid; |
201 | mode_t fmode; | ||
202 | mode_t dmode; | ||
204 | struct nls_table *nls_map; | 203 | struct nls_table *nls_map; |
205 | }; | 204 | }; |
206 | 205 | ||
@@ -258,7 +257,7 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
258 | 257 | ||
259 | if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) | 258 | if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) |
260 | seq_puts(seq, ",nostrict"); | 259 | seq_puts(seq, ",nostrict"); |
261 | if (sb->s_blocksize != UDF_DEFAULT_BLOCKSIZE) | 260 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_BLOCKSIZE_SET)) |
262 | seq_printf(seq, ",bs=%lu", sb->s_blocksize); | 261 | seq_printf(seq, ",bs=%lu", sb->s_blocksize); |
263 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE)) | 262 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE)) |
264 | seq_puts(seq, ",unhide"); | 263 | seq_puts(seq, ",unhide"); |
@@ -282,18 +281,16 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
282 | seq_printf(seq, ",gid=%u", sbi->s_gid); | 281 | seq_printf(seq, ",gid=%u", sbi->s_gid); |
283 | if (sbi->s_umask != 0) | 282 | if (sbi->s_umask != 0) |
284 | seq_printf(seq, ",umask=%o", sbi->s_umask); | 283 | seq_printf(seq, ",umask=%o", sbi->s_umask); |
284 | if (sbi->s_fmode != UDF_INVALID_MODE) | ||
285 | seq_printf(seq, ",mode=%o", sbi->s_fmode); | ||
286 | if (sbi->s_dmode != UDF_INVALID_MODE) | ||
287 | seq_printf(seq, ",dmode=%o", sbi->s_dmode); | ||
285 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET)) | 288 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET)) |
286 | seq_printf(seq, ",session=%u", sbi->s_session); | 289 | seq_printf(seq, ",session=%u", sbi->s_session); |
287 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET)) | 290 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET)) |
288 | seq_printf(seq, ",lastblock=%u", sbi->s_last_block); | 291 | seq_printf(seq, ",lastblock=%u", sbi->s_last_block); |
289 | /* | 292 | if (sbi->s_anchor != 0) |
290 | * s_anchor[2] could be zeroed out in case there is no anchor | 293 | seq_printf(seq, ",anchor=%u", sbi->s_anchor); |
291 | * in the specified block, but then the "anchor=N" option | ||
292 | * originally given by the user wasn't effective, so it's OK | ||
293 | * if we don't show it. | ||
294 | */ | ||
295 | if (sbi->s_anchor[2] != 0) | ||
296 | seq_printf(seq, ",anchor=%u", sbi->s_anchor[2]); | ||
297 | /* | 294 | /* |
298 | * volume, partition, fileset and rootdir seem to be ignored | 295 | * volume, partition, fileset and rootdir seem to be ignored |
299 | * currently | 296 | * currently |
@@ -317,6 +314,8 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
317 | * | 314 | * |
318 | * gid= Set the default group. | 315 | * gid= Set the default group. |
319 | * umask= Set the default umask. | 316 | * umask= Set the default umask. |
317 | * mode= Set the default file permissions. | ||
318 | * dmode= Set the default directory permissions. | ||
320 | * uid= Set the default user. | 319 | * uid= Set the default user. |
321 | * bs= Set the block size. | 320 | * bs= Set the block size. |
322 | * unhide Show otherwise hidden files. | 321 | * unhide Show otherwise hidden files. |
@@ -366,7 +365,8 @@ enum { | |||
366 | Opt_gid, Opt_uid, Opt_umask, Opt_session, Opt_lastblock, | 365 | Opt_gid, Opt_uid, Opt_umask, Opt_session, Opt_lastblock, |
367 | Opt_anchor, Opt_volume, Opt_partition, Opt_fileset, | 366 | Opt_anchor, Opt_volume, Opt_partition, Opt_fileset, |
368 | Opt_rootdir, Opt_utf8, Opt_iocharset, | 367 | Opt_rootdir, Opt_utf8, Opt_iocharset, |
369 | Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore | 368 | Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore, |
369 | Opt_fmode, Opt_dmode | ||
370 | }; | 370 | }; |
371 | 371 | ||
372 | static const match_table_t tokens = { | 372 | static const match_table_t tokens = { |
@@ -395,6 +395,8 @@ static const match_table_t tokens = { | |||
395 | {Opt_rootdir, "rootdir=%u"}, | 395 | {Opt_rootdir, "rootdir=%u"}, |
396 | {Opt_utf8, "utf8"}, | 396 | {Opt_utf8, "utf8"}, |
397 | {Opt_iocharset, "iocharset=%s"}, | 397 | {Opt_iocharset, "iocharset=%s"}, |
398 | {Opt_fmode, "mode=%o"}, | ||
399 | {Opt_dmode, "dmode=%o"}, | ||
398 | {Opt_err, NULL} | 400 | {Opt_err, NULL} |
399 | }; | 401 | }; |
400 | 402 | ||
@@ -405,7 +407,6 @@ static int udf_parse_options(char *options, struct udf_options *uopt, | |||
405 | int option; | 407 | int option; |
406 | 408 | ||
407 | uopt->novrs = 0; | 409 | uopt->novrs = 0; |
408 | uopt->blocksize = UDF_DEFAULT_BLOCKSIZE; | ||
409 | uopt->partition = 0xFFFF; | 410 | uopt->partition = 0xFFFF; |
410 | uopt->session = 0xFFFFFFFF; | 411 | uopt->session = 0xFFFFFFFF; |
411 | uopt->lastblock = 0; | 412 | uopt->lastblock = 0; |
@@ -428,10 +429,12 @@ static int udf_parse_options(char *options, struct udf_options *uopt, | |||
428 | switch (token) { | 429 | switch (token) { |
429 | case Opt_novrs: | 430 | case Opt_novrs: |
430 | uopt->novrs = 1; | 431 | uopt->novrs = 1; |
432 | break; | ||
431 | case Opt_bs: | 433 | case Opt_bs: |
432 | if (match_int(&args[0], &option)) | 434 | if (match_int(&args[0], &option)) |
433 | return 0; | 435 | return 0; |
434 | uopt->blocksize = option; | 436 | uopt->blocksize = option; |
437 | uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET); | ||
435 | break; | 438 | break; |
436 | case Opt_unhide: | 439 | case Opt_unhide: |
437 | uopt->flags |= (1 << UDF_FLAG_UNHIDE); | 440 | uopt->flags |= (1 << UDF_FLAG_UNHIDE); |
@@ -531,6 +534,16 @@ static int udf_parse_options(char *options, struct udf_options *uopt, | |||
531 | case Opt_gforget: | 534 | case Opt_gforget: |
532 | uopt->flags |= (1 << UDF_FLAG_GID_FORGET); | 535 | uopt->flags |= (1 << UDF_FLAG_GID_FORGET); |
533 | break; | 536 | break; |
537 | case Opt_fmode: | ||
538 | if (match_octal(args, &option)) | ||
539 | return 0; | ||
540 | uopt->fmode = option & 0777; | ||
541 | break; | ||
542 | case Opt_dmode: | ||
543 | if (match_octal(args, &option)) | ||
544 | return 0; | ||
545 | uopt->dmode = option & 0777; | ||
546 | break; | ||
534 | default: | 547 | default: |
535 | printk(KERN_ERR "udf: bad mount option \"%s\" " | 548 | printk(KERN_ERR "udf: bad mount option \"%s\" " |
536 | "or missing value\n", p); | 549 | "or missing value\n", p); |
@@ -540,17 +553,6 @@ static int udf_parse_options(char *options, struct udf_options *uopt, | |||
540 | return 1; | 553 | return 1; |
541 | } | 554 | } |
542 | 555 | ||
543 | static void udf_write_super(struct super_block *sb) | ||
544 | { | ||
545 | lock_kernel(); | ||
546 | |||
547 | if (!(sb->s_flags & MS_RDONLY)) | ||
548 | udf_open_lvid(sb); | ||
549 | sb->s_dirt = 0; | ||
550 | |||
551 | unlock_kernel(); | ||
552 | } | ||
553 | |||
554 | static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | 556 | static int udf_remount_fs(struct super_block *sb, int *flags, char *options) |
555 | { | 557 | { |
556 | struct udf_options uopt; | 558 | struct udf_options uopt; |
@@ -560,6 +562,8 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
560 | uopt.uid = sbi->s_uid; | 562 | uopt.uid = sbi->s_uid; |
561 | uopt.gid = sbi->s_gid; | 563 | uopt.gid = sbi->s_gid; |
562 | uopt.umask = sbi->s_umask; | 564 | uopt.umask = sbi->s_umask; |
565 | uopt.fmode = sbi->s_fmode; | ||
566 | uopt.dmode = sbi->s_dmode; | ||
563 | 567 | ||
564 | if (!udf_parse_options(options, &uopt, true)) | 568 | if (!udf_parse_options(options, &uopt, true)) |
565 | return -EINVAL; | 569 | return -EINVAL; |
@@ -568,6 +572,8 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
568 | sbi->s_uid = uopt.uid; | 572 | sbi->s_uid = uopt.uid; |
569 | sbi->s_gid = uopt.gid; | 573 | sbi->s_gid = uopt.gid; |
570 | sbi->s_umask = uopt.umask; | 574 | sbi->s_umask = uopt.umask; |
575 | sbi->s_fmode = uopt.fmode; | ||
576 | sbi->s_dmode = uopt.dmode; | ||
571 | 577 | ||
572 | if (sbi->s_lvid_bh) { | 578 | if (sbi->s_lvid_bh) { |
573 | int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); | 579 | int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); |
@@ -585,22 +591,19 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
585 | return 0; | 591 | return 0; |
586 | } | 592 | } |
587 | 593 | ||
588 | static int udf_vrs(struct super_block *sb, int silent) | 594 | /* Check Volume Structure Descriptors (ECMA 167 2/9.1) */ |
595 | /* We also check any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */ | ||
596 | static loff_t udf_check_vsd(struct super_block *sb) | ||
589 | { | 597 | { |
590 | struct volStructDesc *vsd = NULL; | 598 | struct volStructDesc *vsd = NULL; |
591 | loff_t sector = 32768; | 599 | loff_t sector = 32768; |
592 | int sectorsize; | 600 | int sectorsize; |
593 | struct buffer_head *bh = NULL; | 601 | struct buffer_head *bh = NULL; |
594 | int iso9660 = 0; | ||
595 | int nsr02 = 0; | 602 | int nsr02 = 0; |
596 | int nsr03 = 0; | 603 | int nsr03 = 0; |
597 | struct udf_sb_info *sbi; | 604 | struct udf_sb_info *sbi; |
598 | 605 | ||
599 | /* Block size must be a multiple of 512 */ | ||
600 | if (sb->s_blocksize & 511) | ||
601 | return 0; | ||
602 | sbi = UDF_SB(sb); | 606 | sbi = UDF_SB(sb); |
603 | |||
604 | if (sb->s_blocksize < sizeof(struct volStructDesc)) | 607 | if (sb->s_blocksize < sizeof(struct volStructDesc)) |
605 | sectorsize = sizeof(struct volStructDesc); | 608 | sectorsize = sizeof(struct volStructDesc); |
606 | else | 609 | else |
@@ -627,7 +630,6 @@ static int udf_vrs(struct super_block *sb, int silent) | |||
627 | break; | 630 | break; |
628 | } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, | 631 | } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, |
629 | VSD_STD_ID_LEN)) { | 632 | VSD_STD_ID_LEN)) { |
630 | iso9660 = sector; | ||
631 | switch (vsd->structType) { | 633 | switch (vsd->structType) { |
632 | case 0: | 634 | case 0: |
633 | udf_debug("ISO9660 Boot Record found\n"); | 635 | udf_debug("ISO9660 Boot Record found\n"); |
@@ -679,139 +681,9 @@ static int udf_vrs(struct super_block *sb, int silent) | |||
679 | return 0; | 681 | return 0; |
680 | } | 682 | } |
681 | 683 | ||
682 | /* | ||
683 | * Check whether there is an anchor block in the given block | ||
684 | */ | ||
685 | static int udf_check_anchor_block(struct super_block *sb, sector_t block) | ||
686 | { | ||
687 | struct buffer_head *bh; | ||
688 | uint16_t ident; | ||
689 | |||
690 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) && | ||
691 | udf_fixed_to_variable(block) >= | ||
692 | sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) | ||
693 | return 0; | ||
694 | |||
695 | bh = udf_read_tagged(sb, block, block, &ident); | ||
696 | if (!bh) | ||
697 | return 0; | ||
698 | brelse(bh); | ||
699 | |||
700 | return ident == TAG_IDENT_AVDP; | ||
701 | } | ||
702 | |||
703 | /* Search for an anchor volume descriptor pointer */ | ||
704 | static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock) | ||
705 | { | ||
706 | sector_t last[6]; | ||
707 | int i; | ||
708 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
709 | |||
710 | last[0] = lastblock; | ||
711 | last[1] = last[0] - 1; | ||
712 | last[2] = last[0] + 1; | ||
713 | last[3] = last[0] - 2; | ||
714 | last[4] = last[0] - 150; | ||
715 | last[5] = last[0] - 152; | ||
716 | |||
717 | /* according to spec, anchor is in either: | ||
718 | * block 256 | ||
719 | * lastblock-256 | ||
720 | * lastblock | ||
721 | * however, if the disc isn't closed, it could be 512 */ | ||
722 | |||
723 | for (i = 0; i < ARRAY_SIZE(last); i++) { | ||
724 | if (last[i] < 0) | ||
725 | continue; | ||
726 | if (last[i] >= sb->s_bdev->bd_inode->i_size >> | ||
727 | sb->s_blocksize_bits) | ||
728 | continue; | ||
729 | |||
730 | if (udf_check_anchor_block(sb, last[i])) { | ||
731 | sbi->s_anchor[0] = last[i]; | ||
732 | sbi->s_anchor[1] = last[i] - 256; | ||
733 | return last[i]; | ||
734 | } | ||
735 | |||
736 | if (last[i] < 256) | ||
737 | continue; | ||
738 | |||
739 | if (udf_check_anchor_block(sb, last[i] - 256)) { | ||
740 | sbi->s_anchor[1] = last[i] - 256; | ||
741 | return last[i]; | ||
742 | } | ||
743 | } | ||
744 | |||
745 | if (udf_check_anchor_block(sb, sbi->s_session + 256)) { | ||
746 | sbi->s_anchor[0] = sbi->s_session + 256; | ||
747 | return last[0]; | ||
748 | } | ||
749 | if (udf_check_anchor_block(sb, sbi->s_session + 512)) { | ||
750 | sbi->s_anchor[0] = sbi->s_session + 512; | ||
751 | return last[0]; | ||
752 | } | ||
753 | return 0; | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * Find an anchor volume descriptor. The function expects sbi->s_lastblock to | ||
758 | * be the last block on the media. | ||
759 | * | ||
760 | * Return 1 if not found, 0 if ok | ||
761 | * | ||
762 | */ | ||
763 | static void udf_find_anchor(struct super_block *sb) | ||
764 | { | ||
765 | sector_t lastblock; | ||
766 | struct buffer_head *bh = NULL; | ||
767 | uint16_t ident; | ||
768 | int i; | ||
769 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
770 | |||
771 | lastblock = udf_scan_anchors(sb, sbi->s_last_block); | ||
772 | if (lastblock) | ||
773 | goto check_anchor; | ||
774 | |||
775 | /* No anchor found? Try VARCONV conversion of block numbers */ | ||
776 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | ||
777 | /* Firstly, we try to not convert number of the last block */ | ||
778 | lastblock = udf_scan_anchors(sb, | ||
779 | udf_variable_to_fixed(sbi->s_last_block)); | ||
780 | if (lastblock) | ||
781 | goto check_anchor; | ||
782 | |||
783 | /* Secondly, we try with converted number of the last block */ | ||
784 | lastblock = udf_scan_anchors(sb, sbi->s_last_block); | ||
785 | if (!lastblock) { | ||
786 | /* VARCONV didn't help. Clear it. */ | ||
787 | UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV); | ||
788 | } | ||
789 | |||
790 | check_anchor: | ||
791 | /* | ||
792 | * Check located anchors and the anchor block supplied via | ||
793 | * mount options | ||
794 | */ | ||
795 | for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { | ||
796 | if (!sbi->s_anchor[i]) | ||
797 | continue; | ||
798 | bh = udf_read_tagged(sb, sbi->s_anchor[i], | ||
799 | sbi->s_anchor[i], &ident); | ||
800 | if (!bh) | ||
801 | sbi->s_anchor[i] = 0; | ||
802 | else { | ||
803 | brelse(bh); | ||
804 | if (ident != TAG_IDENT_AVDP) | ||
805 | sbi->s_anchor[i] = 0; | ||
806 | } | ||
807 | } | ||
808 | |||
809 | sbi->s_last_block = lastblock; | ||
810 | } | ||
811 | |||
812 | static int udf_find_fileset(struct super_block *sb, | 684 | static int udf_find_fileset(struct super_block *sb, |
813 | kernel_lb_addr *fileset, | 685 | struct kernel_lb_addr *fileset, |
814 | kernel_lb_addr *root) | 686 | struct kernel_lb_addr *root) |
815 | { | 687 | { |
816 | struct buffer_head *bh = NULL; | 688 | struct buffer_head *bh = NULL; |
817 | long lastblock; | 689 | long lastblock; |
@@ -820,7 +692,7 @@ static int udf_find_fileset(struct super_block *sb, | |||
820 | 692 | ||
821 | if (fileset->logicalBlockNum != 0xFFFFFFFF || | 693 | if (fileset->logicalBlockNum != 0xFFFFFFFF || |
822 | fileset->partitionReferenceNum != 0xFFFF) { | 694 | fileset->partitionReferenceNum != 0xFFFF) { |
823 | bh = udf_read_ptagged(sb, *fileset, 0, &ident); | 695 | bh = udf_read_ptagged(sb, fileset, 0, &ident); |
824 | 696 | ||
825 | if (!bh) { | 697 | if (!bh) { |
826 | return 1; | 698 | return 1; |
@@ -834,7 +706,7 @@ static int udf_find_fileset(struct super_block *sb, | |||
834 | sbi = UDF_SB(sb); | 706 | sbi = UDF_SB(sb); |
835 | if (!bh) { | 707 | if (!bh) { |
836 | /* Search backwards through the partitions */ | 708 | /* Search backwards through the partitions */ |
837 | kernel_lb_addr newfileset; | 709 | struct kernel_lb_addr newfileset; |
838 | 710 | ||
839 | /* --> cvg: FIXME - is it reasonable? */ | 711 | /* --> cvg: FIXME - is it reasonable? */ |
840 | return 1; | 712 | return 1; |
@@ -850,7 +722,7 @@ static int udf_find_fileset(struct super_block *sb, | |||
850 | newfileset.logicalBlockNum = 0; | 722 | newfileset.logicalBlockNum = 0; |
851 | 723 | ||
852 | do { | 724 | do { |
853 | bh = udf_read_ptagged(sb, newfileset, 0, | 725 | bh = udf_read_ptagged(sb, &newfileset, 0, |
854 | &ident); | 726 | &ident); |
855 | if (!bh) { | 727 | if (!bh) { |
856 | newfileset.logicalBlockNum++; | 728 | newfileset.logicalBlockNum++; |
@@ -902,14 +774,23 @@ static int udf_find_fileset(struct super_block *sb, | |||
902 | static int udf_load_pvoldesc(struct super_block *sb, sector_t block) | 774 | static int udf_load_pvoldesc(struct super_block *sb, sector_t block) |
903 | { | 775 | { |
904 | struct primaryVolDesc *pvoldesc; | 776 | struct primaryVolDesc *pvoldesc; |
905 | struct ustr instr; | 777 | struct ustr *instr, *outstr; |
906 | struct ustr outstr; | ||
907 | struct buffer_head *bh; | 778 | struct buffer_head *bh; |
908 | uint16_t ident; | 779 | uint16_t ident; |
780 | int ret = 1; | ||
781 | |||
782 | instr = kmalloc(sizeof(struct ustr), GFP_NOFS); | ||
783 | if (!instr) | ||
784 | return 1; | ||
785 | |||
786 | outstr = kmalloc(sizeof(struct ustr), GFP_NOFS); | ||
787 | if (!outstr) | ||
788 | goto out1; | ||
909 | 789 | ||
910 | bh = udf_read_tagged(sb, block, block, &ident); | 790 | bh = udf_read_tagged(sb, block, block, &ident); |
911 | if (!bh) | 791 | if (!bh) |
912 | return 1; | 792 | goto out2; |
793 | |||
913 | BUG_ON(ident != TAG_IDENT_PVD); | 794 | BUG_ON(ident != TAG_IDENT_PVD); |
914 | 795 | ||
915 | pvoldesc = (struct primaryVolDesc *)bh->b_data; | 796 | pvoldesc = (struct primaryVolDesc *)bh->b_data; |
@@ -917,7 +798,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) | |||
917 | if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time, | 798 | if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time, |
918 | pvoldesc->recordingDateAndTime)) { | 799 | pvoldesc->recordingDateAndTime)) { |
919 | #ifdef UDFFS_DEBUG | 800 | #ifdef UDFFS_DEBUG |
920 | timestamp *ts = &pvoldesc->recordingDateAndTime; | 801 | struct timestamp *ts = &pvoldesc->recordingDateAndTime; |
921 | udf_debug("recording time %04u/%02u/%02u" | 802 | udf_debug("recording time %04u/%02u/%02u" |
922 | " %02u:%02u (%x)\n", | 803 | " %02u:%02u (%x)\n", |
923 | le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, | 804 | le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, |
@@ -925,20 +806,25 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) | |||
925 | #endif | 806 | #endif |
926 | } | 807 | } |
927 | 808 | ||
928 | if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) | 809 | if (!udf_build_ustr(instr, pvoldesc->volIdent, 32)) |
929 | if (udf_CS0toUTF8(&outstr, &instr)) { | 810 | if (udf_CS0toUTF8(outstr, instr)) { |
930 | strncpy(UDF_SB(sb)->s_volume_ident, outstr.u_name, | 811 | strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name, |
931 | outstr.u_len > 31 ? 31 : outstr.u_len); | 812 | outstr->u_len > 31 ? 31 : outstr->u_len); |
932 | udf_debug("volIdent[] = '%s'\n", | 813 | udf_debug("volIdent[] = '%s'\n", |
933 | UDF_SB(sb)->s_volume_ident); | 814 | UDF_SB(sb)->s_volume_ident); |
934 | } | 815 | } |
935 | 816 | ||
936 | if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128)) | 817 | if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128)) |
937 | if (udf_CS0toUTF8(&outstr, &instr)) | 818 | if (udf_CS0toUTF8(outstr, instr)) |
938 | udf_debug("volSetIdent[] = '%s'\n", outstr.u_name); | 819 | udf_debug("volSetIdent[] = '%s'\n", outstr->u_name); |
939 | 820 | ||
940 | brelse(bh); | 821 | brelse(bh); |
941 | return 0; | 822 | ret = 0; |
823 | out2: | ||
824 | kfree(outstr); | ||
825 | out1: | ||
826 | kfree(instr); | ||
827 | return ret; | ||
942 | } | 828 | } |
943 | 829 | ||
944 | static int udf_load_metadata_files(struct super_block *sb, int partition) | 830 | static int udf_load_metadata_files(struct super_block *sb, int partition) |
@@ -946,7 +832,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) | |||
946 | struct udf_sb_info *sbi = UDF_SB(sb); | 832 | struct udf_sb_info *sbi = UDF_SB(sb); |
947 | struct udf_part_map *map; | 833 | struct udf_part_map *map; |
948 | struct udf_meta_data *mdata; | 834 | struct udf_meta_data *mdata; |
949 | kernel_lb_addr addr; | 835 | struct kernel_lb_addr addr; |
950 | int fe_error = 0; | 836 | int fe_error = 0; |
951 | 837 | ||
952 | map = &sbi->s_partmaps[partition]; | 838 | map = &sbi->s_partmaps[partition]; |
@@ -959,7 +845,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) | |||
959 | udf_debug("Metadata file location: block = %d part = %d\n", | 845 | udf_debug("Metadata file location: block = %d part = %d\n", |
960 | addr.logicalBlockNum, addr.partitionReferenceNum); | 846 | addr.logicalBlockNum, addr.partitionReferenceNum); |
961 | 847 | ||
962 | mdata->s_metadata_fe = udf_iget(sb, addr); | 848 | mdata->s_metadata_fe = udf_iget(sb, &addr); |
963 | 849 | ||
964 | if (mdata->s_metadata_fe == NULL) { | 850 | if (mdata->s_metadata_fe == NULL) { |
965 | udf_warning(sb, __func__, "metadata inode efe not found, " | 851 | udf_warning(sb, __func__, "metadata inode efe not found, " |
@@ -981,7 +867,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) | |||
981 | udf_debug("Mirror metadata file location: block = %d part = %d\n", | 867 | udf_debug("Mirror metadata file location: block = %d part = %d\n", |
982 | addr.logicalBlockNum, addr.partitionReferenceNum); | 868 | addr.logicalBlockNum, addr.partitionReferenceNum); |
983 | 869 | ||
984 | mdata->s_mirror_fe = udf_iget(sb, addr); | 870 | mdata->s_mirror_fe = udf_iget(sb, &addr); |
985 | 871 | ||
986 | if (mdata->s_mirror_fe == NULL) { | 872 | if (mdata->s_mirror_fe == NULL) { |
987 | if (fe_error) { | 873 | if (fe_error) { |
@@ -1013,7 +899,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) | |||
1013 | udf_debug("Bitmap file location: block = %d part = %d\n", | 899 | udf_debug("Bitmap file location: block = %d part = %d\n", |
1014 | addr.logicalBlockNum, addr.partitionReferenceNum); | 900 | addr.logicalBlockNum, addr.partitionReferenceNum); |
1015 | 901 | ||
1016 | mdata->s_bitmap_fe = udf_iget(sb, addr); | 902 | mdata->s_bitmap_fe = udf_iget(sb, &addr); |
1017 | 903 | ||
1018 | if (mdata->s_bitmap_fe == NULL) { | 904 | if (mdata->s_bitmap_fe == NULL) { |
1019 | if (sb->s_flags & MS_RDONLY) | 905 | if (sb->s_flags & MS_RDONLY) |
@@ -1037,7 +923,7 @@ error_exit: | |||
1037 | } | 923 | } |
1038 | 924 | ||
1039 | static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, | 925 | static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, |
1040 | kernel_lb_addr *root) | 926 | struct kernel_lb_addr *root) |
1041 | { | 927 | { |
1042 | struct fileSetDesc *fset; | 928 | struct fileSetDesc *fset; |
1043 | 929 | ||
@@ -1119,13 +1005,13 @@ static int udf_fill_partdesc_info(struct super_block *sb, | |||
1119 | 1005 | ||
1120 | phd = (struct partitionHeaderDesc *)p->partitionContentsUse; | 1006 | phd = (struct partitionHeaderDesc *)p->partitionContentsUse; |
1121 | if (phd->unallocSpaceTable.extLength) { | 1007 | if (phd->unallocSpaceTable.extLength) { |
1122 | kernel_lb_addr loc = { | 1008 | struct kernel_lb_addr loc = { |
1123 | .logicalBlockNum = le32_to_cpu( | 1009 | .logicalBlockNum = le32_to_cpu( |
1124 | phd->unallocSpaceTable.extPosition), | 1010 | phd->unallocSpaceTable.extPosition), |
1125 | .partitionReferenceNum = p_index, | 1011 | .partitionReferenceNum = p_index, |
1126 | }; | 1012 | }; |
1127 | 1013 | ||
1128 | map->s_uspace.s_table = udf_iget(sb, loc); | 1014 | map->s_uspace.s_table = udf_iget(sb, &loc); |
1129 | if (!map->s_uspace.s_table) { | 1015 | if (!map->s_uspace.s_table) { |
1130 | udf_debug("cannot load unallocSpaceTable (part %d)\n", | 1016 | udf_debug("cannot load unallocSpaceTable (part %d)\n", |
1131 | p_index); | 1017 | p_index); |
@@ -1154,13 +1040,13 @@ static int udf_fill_partdesc_info(struct super_block *sb, | |||
1154 | udf_debug("partitionIntegrityTable (part %d)\n", p_index); | 1040 | udf_debug("partitionIntegrityTable (part %d)\n", p_index); |
1155 | 1041 | ||
1156 | if (phd->freedSpaceTable.extLength) { | 1042 | if (phd->freedSpaceTable.extLength) { |
1157 | kernel_lb_addr loc = { | 1043 | struct kernel_lb_addr loc = { |
1158 | .logicalBlockNum = le32_to_cpu( | 1044 | .logicalBlockNum = le32_to_cpu( |
1159 | phd->freedSpaceTable.extPosition), | 1045 | phd->freedSpaceTable.extPosition), |
1160 | .partitionReferenceNum = p_index, | 1046 | .partitionReferenceNum = p_index, |
1161 | }; | 1047 | }; |
1162 | 1048 | ||
1163 | map->s_fspace.s_table = udf_iget(sb, loc); | 1049 | map->s_fspace.s_table = udf_iget(sb, &loc); |
1164 | if (!map->s_fspace.s_table) { | 1050 | if (!map->s_fspace.s_table) { |
1165 | udf_debug("cannot load freedSpaceTable (part %d)\n", | 1051 | udf_debug("cannot load freedSpaceTable (part %d)\n", |
1166 | p_index); | 1052 | p_index); |
@@ -1192,7 +1078,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) | |||
1192 | { | 1078 | { |
1193 | struct udf_sb_info *sbi = UDF_SB(sb); | 1079 | struct udf_sb_info *sbi = UDF_SB(sb); |
1194 | struct udf_part_map *map = &sbi->s_partmaps[p_index]; | 1080 | struct udf_part_map *map = &sbi->s_partmaps[p_index]; |
1195 | kernel_lb_addr ino; | 1081 | struct kernel_lb_addr ino; |
1196 | struct buffer_head *bh = NULL; | 1082 | struct buffer_head *bh = NULL; |
1197 | struct udf_inode_info *vati; | 1083 | struct udf_inode_info *vati; |
1198 | uint32_t pos; | 1084 | uint32_t pos; |
@@ -1201,7 +1087,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) | |||
1201 | /* VAT file entry is in the last recorded block */ | 1087 | /* VAT file entry is in the last recorded block */ |
1202 | ino.partitionReferenceNum = type1_index; | 1088 | ino.partitionReferenceNum = type1_index; |
1203 | ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; | 1089 | ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; |
1204 | sbi->s_vat_inode = udf_iget(sb, ino); | 1090 | sbi->s_vat_inode = udf_iget(sb, &ino); |
1205 | if (!sbi->s_vat_inode) | 1091 | if (!sbi->s_vat_inode) |
1206 | return 1; | 1092 | return 1; |
1207 | 1093 | ||
@@ -1322,7 +1208,7 @@ out_bh: | |||
1322 | } | 1208 | } |
1323 | 1209 | ||
1324 | static int udf_load_logicalvol(struct super_block *sb, sector_t block, | 1210 | static int udf_load_logicalvol(struct super_block *sb, sector_t block, |
1325 | kernel_lb_addr *fileset) | 1211 | struct kernel_lb_addr *fileset) |
1326 | { | 1212 | { |
1327 | struct logicalVolDesc *lvd; | 1213 | struct logicalVolDesc *lvd; |
1328 | int i, j, offset; | 1214 | int i, j, offset; |
@@ -1471,7 +1357,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, | |||
1471 | } | 1357 | } |
1472 | 1358 | ||
1473 | if (fileset) { | 1359 | if (fileset) { |
1474 | long_ad *la = (long_ad *)&(lvd->logicalVolContentsUse[0]); | 1360 | struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]); |
1475 | 1361 | ||
1476 | *fileset = lelb_to_cpu(la->extLocation); | 1362 | *fileset = lelb_to_cpu(la->extLocation); |
1477 | udf_debug("FileSet found in LogicalVolDesc at block=%d, " | 1363 | udf_debug("FileSet found in LogicalVolDesc at block=%d, " |
@@ -1490,7 +1376,7 @@ out_bh: | |||
1490 | * udf_load_logicalvolint | 1376 | * udf_load_logicalvolint |
1491 | * | 1377 | * |
1492 | */ | 1378 | */ |
1493 | static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) | 1379 | static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ad loc) |
1494 | { | 1380 | { |
1495 | struct buffer_head *bh = NULL; | 1381 | struct buffer_head *bh = NULL; |
1496 | uint16_t ident; | 1382 | uint16_t ident; |
@@ -1533,7 +1419,7 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) | |||
1533 | * Written, tested, and released. | 1419 | * Written, tested, and released. |
1534 | */ | 1420 | */ |
1535 | static noinline int udf_process_sequence(struct super_block *sb, long block, | 1421 | static noinline int udf_process_sequence(struct super_block *sb, long block, |
1536 | long lastblock, kernel_lb_addr *fileset) | 1422 | long lastblock, struct kernel_lb_addr *fileset) |
1537 | { | 1423 | { |
1538 | struct buffer_head *bh = NULL; | 1424 | struct buffer_head *bh = NULL; |
1539 | struct udf_vds_record vds[VDS_POS_LENGTH]; | 1425 | struct udf_vds_record vds[VDS_POS_LENGTH]; |
@@ -1655,85 +1541,199 @@ static noinline int udf_process_sequence(struct super_block *sb, long block, | |||
1655 | return 0; | 1541 | return 0; |
1656 | } | 1542 | } |
1657 | 1543 | ||
1544 | static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, | ||
1545 | struct kernel_lb_addr *fileset) | ||
1546 | { | ||
1547 | struct anchorVolDescPtr *anchor; | ||
1548 | long main_s, main_e, reserve_s, reserve_e; | ||
1549 | struct udf_sb_info *sbi; | ||
1550 | |||
1551 | sbi = UDF_SB(sb); | ||
1552 | anchor = (struct anchorVolDescPtr *)bh->b_data; | ||
1553 | |||
1554 | /* Locate the main sequence */ | ||
1555 | main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); | ||
1556 | main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength); | ||
1557 | main_e = main_e >> sb->s_blocksize_bits; | ||
1558 | main_e += main_s; | ||
1559 | |||
1560 | /* Locate the reserve sequence */ | ||
1561 | reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation); | ||
1562 | reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength); | ||
1563 | reserve_e = reserve_e >> sb->s_blocksize_bits; | ||
1564 | reserve_e += reserve_s; | ||
1565 | |||
1566 | /* Process the main & reserve sequences */ | ||
1567 | /* responsible for finding the PartitionDesc(s) */ | ||
1568 | if (!udf_process_sequence(sb, main_s, main_e, fileset)) | ||
1569 | return 1; | ||
1570 | return !udf_process_sequence(sb, reserve_s, reserve_e, fileset); | ||
1571 | } | ||
1572 | |||
1658 | /* | 1573 | /* |
1659 | * udf_check_valid() | 1574 | * Check whether there is an anchor block in the given block and |
1575 | * load Volume Descriptor Sequence if so. | ||
1660 | */ | 1576 | */ |
1661 | static int udf_check_valid(struct super_block *sb, int novrs, int silent) | 1577 | static int udf_check_anchor_block(struct super_block *sb, sector_t block, |
1578 | struct kernel_lb_addr *fileset) | ||
1662 | { | 1579 | { |
1663 | long block; | 1580 | struct buffer_head *bh; |
1664 | struct udf_sb_info *sbi = UDF_SB(sb); | 1581 | uint16_t ident; |
1582 | int ret; | ||
1665 | 1583 | ||
1666 | if (novrs) { | 1584 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) && |
1667 | udf_debug("Validity check skipped because of novrs option\n"); | 1585 | udf_fixed_to_variable(block) >= |
1586 | sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) | ||
1587 | return 0; | ||
1588 | |||
1589 | bh = udf_read_tagged(sb, block, block, &ident); | ||
1590 | if (!bh) | ||
1591 | return 0; | ||
1592 | if (ident != TAG_IDENT_AVDP) { | ||
1593 | brelse(bh); | ||
1668 | return 0; | 1594 | return 0; |
1669 | } | 1595 | } |
1670 | /* Check that it is NSR02 compliant */ | 1596 | ret = udf_load_sequence(sb, bh, fileset); |
1671 | /* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */ | 1597 | brelse(bh); |
1672 | block = udf_vrs(sb, silent); | 1598 | return ret; |
1673 | if (block == -1) | ||
1674 | udf_debug("Failed to read byte 32768. Assuming open " | ||
1675 | "disc. Skipping validity check\n"); | ||
1676 | if (block && !sbi->s_last_block) | ||
1677 | sbi->s_last_block = udf_get_last_block(sb); | ||
1678 | return !block; | ||
1679 | } | 1599 | } |
1680 | 1600 | ||
1681 | static int udf_load_sequence(struct super_block *sb, kernel_lb_addr *fileset) | 1601 | /* Search for an anchor volume descriptor pointer */ |
1602 | static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock, | ||
1603 | struct kernel_lb_addr *fileset) | ||
1682 | { | 1604 | { |
1683 | struct anchorVolDescPtr *anchor; | 1605 | sector_t last[6]; |
1684 | uint16_t ident; | ||
1685 | struct buffer_head *bh; | ||
1686 | long main_s, main_e, reserve_s, reserve_e; | ||
1687 | int i; | 1606 | int i; |
1688 | struct udf_sb_info *sbi; | 1607 | struct udf_sb_info *sbi = UDF_SB(sb); |
1689 | 1608 | int last_count = 0; | |
1690 | if (!sb) | ||
1691 | return 1; | ||
1692 | sbi = UDF_SB(sb); | ||
1693 | 1609 | ||
1694 | for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { | 1610 | /* First try user provided anchor */ |
1695 | if (!sbi->s_anchor[i]) | 1611 | if (sbi->s_anchor) { |
1612 | if (udf_check_anchor_block(sb, sbi->s_anchor, fileset)) | ||
1613 | return lastblock; | ||
1614 | } | ||
1615 | /* | ||
1616 | * according to spec, anchor is in either: | ||
1617 | * block 256 | ||
1618 | * lastblock-256 | ||
1619 | * lastblock | ||
1620 | * however, if the disc isn't closed, it could be 512. | ||
1621 | */ | ||
1622 | if (udf_check_anchor_block(sb, sbi->s_session + 256, fileset)) | ||
1623 | return lastblock; | ||
1624 | /* | ||
1625 | * The trouble is which block is the last one. Drives often misreport | ||
1626 | * this so we try various possibilities. | ||
1627 | */ | ||
1628 | last[last_count++] = lastblock; | ||
1629 | if (lastblock >= 1) | ||
1630 | last[last_count++] = lastblock - 1; | ||
1631 | last[last_count++] = lastblock + 1; | ||
1632 | if (lastblock >= 2) | ||
1633 | last[last_count++] = lastblock - 2; | ||
1634 | if (lastblock >= 150) | ||
1635 | last[last_count++] = lastblock - 150; | ||
1636 | if (lastblock >= 152) | ||
1637 | last[last_count++] = lastblock - 152; | ||
1638 | |||
1639 | for (i = 0; i < last_count; i++) { | ||
1640 | if (last[i] >= sb->s_bdev->bd_inode->i_size >> | ||
1641 | sb->s_blocksize_bits) | ||
1696 | continue; | 1642 | continue; |
1697 | 1643 | if (udf_check_anchor_block(sb, last[i], fileset)) | |
1698 | bh = udf_read_tagged(sb, sbi->s_anchor[i], sbi->s_anchor[i], | 1644 | return last[i]; |
1699 | &ident); | 1645 | if (last[i] < 256) |
1700 | if (!bh) | ||
1701 | continue; | 1646 | continue; |
1647 | if (udf_check_anchor_block(sb, last[i] - 256, fileset)) | ||
1648 | return last[i]; | ||
1649 | } | ||
1702 | 1650 | ||
1703 | anchor = (struct anchorVolDescPtr *)bh->b_data; | 1651 | /* Finally try block 512 in case media is open */ |
1652 | if (udf_check_anchor_block(sb, sbi->s_session + 512, fileset)) | ||
1653 | return last[0]; | ||
1654 | return 0; | ||
1655 | } | ||
1704 | 1656 | ||
1705 | /* Locate the main sequence */ | 1657 | /* |
1706 | main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); | 1658 | * Find an anchor volume descriptor and load Volume Descriptor Sequence from |
1707 | main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength); | 1659 | * area specified by it. The function expects sbi->s_lastblock to be the last |
1708 | main_e = main_e >> sb->s_blocksize_bits; | 1660 | * block on the media. |
1709 | main_e += main_s; | 1661 | * |
1662 | * Return 1 if ok, 0 if not found. | ||
1663 | * | ||
1664 | */ | ||
1665 | static int udf_find_anchor(struct super_block *sb, | ||
1666 | struct kernel_lb_addr *fileset) | ||
1667 | { | ||
1668 | sector_t lastblock; | ||
1669 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
1710 | 1670 | ||
1711 | /* Locate the reserve sequence */ | 1671 | lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset); |
1712 | reserve_s = le32_to_cpu( | 1672 | if (lastblock) |
1713 | anchor->reserveVolDescSeqExt.extLocation); | 1673 | goto out; |
1714 | reserve_e = le32_to_cpu( | ||
1715 | anchor->reserveVolDescSeqExt.extLength); | ||
1716 | reserve_e = reserve_e >> sb->s_blocksize_bits; | ||
1717 | reserve_e += reserve_s; | ||
1718 | 1674 | ||
1719 | brelse(bh); | 1675 | /* No anchor found? Try VARCONV conversion of block numbers */ |
1676 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | ||
1677 | /* Firstly, we try to not convert number of the last block */ | ||
1678 | lastblock = udf_scan_anchors(sb, | ||
1679 | udf_variable_to_fixed(sbi->s_last_block), | ||
1680 | fileset); | ||
1681 | if (lastblock) | ||
1682 | goto out; | ||
1720 | 1683 | ||
1721 | /* Process the main & reserve sequences */ | 1684 | /* Secondly, we try with converted number of the last block */ |
1722 | /* responsible for finding the PartitionDesc(s) */ | 1685 | lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset); |
1723 | if (!(udf_process_sequence(sb, main_s, main_e, | 1686 | if (!lastblock) { |
1724 | fileset) && | 1687 | /* VARCONV didn't help. Clear it. */ |
1725 | udf_process_sequence(sb, reserve_s, reserve_e, | 1688 | UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV); |
1726 | fileset))) | 1689 | return 0; |
1727 | break; | ||
1728 | } | 1690 | } |
1691 | out: | ||
1692 | sbi->s_last_block = lastblock; | ||
1693 | return 1; | ||
1694 | } | ||
1729 | 1695 | ||
1730 | if (i == ARRAY_SIZE(sbi->s_anchor)) { | 1696 | /* |
1731 | udf_debug("No Anchor block found\n"); | 1697 | * Check Volume Structure Descriptor, find Anchor block and load Volume |
1732 | return 1; | 1698 | * Descriptor Sequence |
1699 | */ | ||
1700 | static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, | ||
1701 | int silent, struct kernel_lb_addr *fileset) | ||
1702 | { | ||
1703 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
1704 | loff_t nsr_off; | ||
1705 | |||
1706 | if (!sb_set_blocksize(sb, uopt->blocksize)) { | ||
1707 | if (!silent) | ||
1708 | printk(KERN_WARNING "UDF-fs: Bad block size\n"); | ||
1709 | return 0; | ||
1710 | } | ||
1711 | sbi->s_last_block = uopt->lastblock; | ||
1712 | if (!uopt->novrs) { | ||
1713 | /* Check that it is NSR02 compliant */ | ||
1714 | nsr_off = udf_check_vsd(sb); | ||
1715 | if (!nsr_off) { | ||
1716 | if (!silent) | ||
1717 | printk(KERN_WARNING "UDF-fs: No VRS found\n"); | ||
1718 | return 0; | ||
1719 | } | ||
1720 | if (nsr_off == -1) | ||
1721 | udf_debug("Failed to read byte 32768. Assuming open " | ||
1722 | "disc. Skipping validity check\n"); | ||
1723 | if (!sbi->s_last_block) | ||
1724 | sbi->s_last_block = udf_get_last_block(sb); | ||
1725 | } else { | ||
1726 | udf_debug("Validity check skipped because of novrs option\n"); | ||
1733 | } | 1727 | } |
1734 | udf_debug("Using anchor in block %d\n", sbi->s_anchor[i]); | ||
1735 | 1728 | ||
1736 | return 0; | 1729 | /* Look for anchor block and load Volume Descriptor Sequence */ |
1730 | sbi->s_anchor = uopt->anchor; | ||
1731 | if (!udf_find_anchor(sb, fileset)) { | ||
1732 | if (!silent) | ||
1733 | printk(KERN_WARNING "UDF-fs: No anchor found\n"); | ||
1734 | return 0; | ||
1735 | } | ||
1736 | return 1; | ||
1737 | } | 1737 | } |
1738 | 1738 | ||
1739 | static void udf_open_lvid(struct super_block *sb) | 1739 | static void udf_open_lvid(struct super_block *sb) |
@@ -1742,9 +1742,9 @@ static void udf_open_lvid(struct super_block *sb) | |||
1742 | struct buffer_head *bh = sbi->s_lvid_bh; | 1742 | struct buffer_head *bh = sbi->s_lvid_bh; |
1743 | struct logicalVolIntegrityDesc *lvid; | 1743 | struct logicalVolIntegrityDesc *lvid; |
1744 | struct logicalVolIntegrityDescImpUse *lvidiu; | 1744 | struct logicalVolIntegrityDescImpUse *lvidiu; |
1745 | |||
1745 | if (!bh) | 1746 | if (!bh) |
1746 | return; | 1747 | return; |
1747 | |||
1748 | lvid = (struct logicalVolIntegrityDesc *)bh->b_data; | 1748 | lvid = (struct logicalVolIntegrityDesc *)bh->b_data; |
1749 | lvidiu = udf_sb_lvidiu(sbi); | 1749 | lvidiu = udf_sb_lvidiu(sbi); |
1750 | 1750 | ||
@@ -1752,14 +1752,15 @@ static void udf_open_lvid(struct super_block *sb) | |||
1752 | lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; | 1752 | lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; |
1753 | udf_time_to_disk_stamp(&lvid->recordingDateAndTime, | 1753 | udf_time_to_disk_stamp(&lvid->recordingDateAndTime, |
1754 | CURRENT_TIME); | 1754 | CURRENT_TIME); |
1755 | lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN; | 1755 | lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN); |
1756 | 1756 | ||
1757 | lvid->descTag.descCRC = cpu_to_le16( | 1757 | lvid->descTag.descCRC = cpu_to_le16( |
1758 | crc_itu_t(0, (char *)lvid + sizeof(tag), | 1758 | crc_itu_t(0, (char *)lvid + sizeof(struct tag), |
1759 | le16_to_cpu(lvid->descTag.descCRCLength))); | 1759 | le16_to_cpu(lvid->descTag.descCRCLength))); |
1760 | 1760 | ||
1761 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); | 1761 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); |
1762 | mark_buffer_dirty(bh); | 1762 | mark_buffer_dirty(bh); |
1763 | sbi->s_lvid_dirty = 0; | ||
1763 | } | 1764 | } |
1764 | 1765 | ||
1765 | static void udf_close_lvid(struct super_block *sb) | 1766 | static void udf_close_lvid(struct super_block *sb) |
@@ -1773,10 +1774,6 @@ static void udf_close_lvid(struct super_block *sb) | |||
1773 | return; | 1774 | return; |
1774 | 1775 | ||
1775 | lvid = (struct logicalVolIntegrityDesc *)bh->b_data; | 1776 | lvid = (struct logicalVolIntegrityDesc *)bh->b_data; |
1776 | |||
1777 | if (lvid->integrityType != LVID_INTEGRITY_TYPE_OPEN) | ||
1778 | return; | ||
1779 | |||
1780 | lvidiu = udf_sb_lvidiu(sbi); | 1777 | lvidiu = udf_sb_lvidiu(sbi); |
1781 | lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; | 1778 | lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; |
1782 | lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; | 1779 | lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; |
@@ -1790,11 +1787,12 @@ static void udf_close_lvid(struct super_block *sb) | |||
1790 | lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); | 1787 | lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); |
1791 | 1788 | ||
1792 | lvid->descTag.descCRC = cpu_to_le16( | 1789 | lvid->descTag.descCRC = cpu_to_le16( |
1793 | crc_itu_t(0, (char *)lvid + sizeof(tag), | 1790 | crc_itu_t(0, (char *)lvid + sizeof(struct tag), |
1794 | le16_to_cpu(lvid->descTag.descCRCLength))); | 1791 | le16_to_cpu(lvid->descTag.descCRCLength))); |
1795 | 1792 | ||
1796 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); | 1793 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); |
1797 | mark_buffer_dirty(bh); | 1794 | mark_buffer_dirty(bh); |
1795 | sbi->s_lvid_dirty = 0; | ||
1798 | } | 1796 | } |
1799 | 1797 | ||
1800 | static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) | 1798 | static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) |
@@ -1846,15 +1844,18 @@ static void udf_free_partition(struct udf_part_map *map) | |||
1846 | static int udf_fill_super(struct super_block *sb, void *options, int silent) | 1844 | static int udf_fill_super(struct super_block *sb, void *options, int silent) |
1847 | { | 1845 | { |
1848 | int i; | 1846 | int i; |
1847 | int ret; | ||
1849 | struct inode *inode = NULL; | 1848 | struct inode *inode = NULL; |
1850 | struct udf_options uopt; | 1849 | struct udf_options uopt; |
1851 | kernel_lb_addr rootdir, fileset; | 1850 | struct kernel_lb_addr rootdir, fileset; |
1852 | struct udf_sb_info *sbi; | 1851 | struct udf_sb_info *sbi; |
1853 | 1852 | ||
1854 | uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); | 1853 | uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); |
1855 | uopt.uid = -1; | 1854 | uopt.uid = -1; |
1856 | uopt.gid = -1; | 1855 | uopt.gid = -1; |
1857 | uopt.umask = 0; | 1856 | uopt.umask = 0; |
1857 | uopt.fmode = UDF_INVALID_MODE; | ||
1858 | uopt.dmode = UDF_INVALID_MODE; | ||
1858 | 1859 | ||
1859 | sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); | 1860 | sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); |
1860 | if (!sbi) | 1861 | if (!sbi) |
@@ -1892,15 +1893,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1892 | sbi->s_uid = uopt.uid; | 1893 | sbi->s_uid = uopt.uid; |
1893 | sbi->s_gid = uopt.gid; | 1894 | sbi->s_gid = uopt.gid; |
1894 | sbi->s_umask = uopt.umask; | 1895 | sbi->s_umask = uopt.umask; |
1896 | sbi->s_fmode = uopt.fmode; | ||
1897 | sbi->s_dmode = uopt.dmode; | ||
1895 | sbi->s_nls_map = uopt.nls_map; | 1898 | sbi->s_nls_map = uopt.nls_map; |
1896 | 1899 | ||
1897 | /* Set the block size for all transfers */ | ||
1898 | if (!sb_min_blocksize(sb, uopt.blocksize)) { | ||
1899 | udf_debug("Bad block size (%d)\n", uopt.blocksize); | ||
1900 | printk(KERN_ERR "udf: bad block size (%d)\n", uopt.blocksize); | ||
1901 | goto error_out; | ||
1902 | } | ||
1903 | |||
1904 | if (uopt.session == 0xFFFFFFFF) | 1900 | if (uopt.session == 0xFFFFFFFF) |
1905 | sbi->s_session = udf_get_last_session(sb); | 1901 | sbi->s_session = udf_get_last_session(sb); |
1906 | else | 1902 | else |
@@ -1908,18 +1904,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1908 | 1904 | ||
1909 | udf_debug("Multi-session=%d\n", sbi->s_session); | 1905 | udf_debug("Multi-session=%d\n", sbi->s_session); |
1910 | 1906 | ||
1911 | sbi->s_last_block = uopt.lastblock; | ||
1912 | sbi->s_anchor[0] = sbi->s_anchor[1] = 0; | ||
1913 | sbi->s_anchor[2] = uopt.anchor; | ||
1914 | |||
1915 | if (udf_check_valid(sb, uopt.novrs, silent)) { | ||
1916 | /* read volume recognition sequences */ | ||
1917 | printk(KERN_WARNING "UDF-fs: No VRS found\n"); | ||
1918 | goto error_out; | ||
1919 | } | ||
1920 | |||
1921 | udf_find_anchor(sb); | ||
1922 | |||
1923 | /* Fill in the rest of the superblock */ | 1907 | /* Fill in the rest of the superblock */ |
1924 | sb->s_op = &udf_sb_ops; | 1908 | sb->s_op = &udf_sb_ops; |
1925 | sb->s_export_op = &udf_export_ops; | 1909 | sb->s_export_op = &udf_export_ops; |
@@ -1928,7 +1912,21 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1928 | sb->s_magic = UDF_SUPER_MAGIC; | 1912 | sb->s_magic = UDF_SUPER_MAGIC; |
1929 | sb->s_time_gran = 1000; | 1913 | sb->s_time_gran = 1000; |
1930 | 1914 | ||
1931 | if (udf_load_sequence(sb, &fileset)) { | 1915 | if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) { |
1916 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); | ||
1917 | } else { | ||
1918 | uopt.blocksize = bdev_hardsect_size(sb->s_bdev); | ||
1919 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); | ||
1920 | if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { | ||
1921 | if (!silent) | ||
1922 | printk(KERN_NOTICE | ||
1923 | "UDF-fs: Rescanning with blocksize " | ||
1924 | "%d\n", UDF_DEFAULT_BLOCKSIZE); | ||
1925 | uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; | ||
1926 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); | ||
1927 | } | ||
1928 | } | ||
1929 | if (!ret) { | ||
1932 | printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); | 1930 | printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); |
1933 | goto error_out; | 1931 | goto error_out; |
1934 | } | 1932 | } |
@@ -1978,7 +1976,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1978 | } | 1976 | } |
1979 | 1977 | ||
1980 | if (!silent) { | 1978 | if (!silent) { |
1981 | timestamp ts; | 1979 | struct timestamp ts; |
1982 | udf_time_to_disk_stamp(&ts, sbi->s_record_time); | 1980 | udf_time_to_disk_stamp(&ts, sbi->s_record_time); |
1983 | udf_info("UDF: Mounting volume '%s', " | 1981 | udf_info("UDF: Mounting volume '%s', " |
1984 | "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", | 1982 | "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", |
@@ -1991,7 +1989,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1991 | /* Assign the root inode */ | 1989 | /* Assign the root inode */ |
1992 | /* assign inodes by physical block number */ | 1990 | /* assign inodes by physical block number */ |
1993 | /* perhaps it's not extensible enough, but for now ... */ | 1991 | /* perhaps it's not extensible enough, but for now ... */ |
1994 | inode = udf_iget(sb, rootdir); | 1992 | inode = udf_iget(sb, &rootdir); |
1995 | if (!inode) { | 1993 | if (!inode) { |
1996 | printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, " | 1994 | printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, " |
1997 | "partition=%d\n", | 1995 | "partition=%d\n", |
@@ -2081,11 +2079,31 @@ static void udf_put_super(struct super_block *sb) | |||
2081 | sb->s_fs_info = NULL; | 2079 | sb->s_fs_info = NULL; |
2082 | } | 2080 | } |
2083 | 2081 | ||
2082 | static int udf_sync_fs(struct super_block *sb, int wait) | ||
2083 | { | ||
2084 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
2085 | |||
2086 | mutex_lock(&sbi->s_alloc_mutex); | ||
2087 | if (sbi->s_lvid_dirty) { | ||
2088 | /* | ||
2089 | * Blockdevice will be synced later so we don't have to submit | ||
2090 | * the buffer for IO | ||
2091 | */ | ||
2092 | mark_buffer_dirty(sbi->s_lvid_bh); | ||
2093 | sb->s_dirt = 0; | ||
2094 | sbi->s_lvid_dirty = 0; | ||
2095 | } | ||
2096 | mutex_unlock(&sbi->s_alloc_mutex); | ||
2097 | |||
2098 | return 0; | ||
2099 | } | ||
2100 | |||
2084 | static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) | 2101 | static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) |
2085 | { | 2102 | { |
2086 | struct super_block *sb = dentry->d_sb; | 2103 | struct super_block *sb = dentry->d_sb; |
2087 | struct udf_sb_info *sbi = UDF_SB(sb); | 2104 | struct udf_sb_info *sbi = UDF_SB(sb); |
2088 | struct logicalVolIntegrityDescImpUse *lvidiu; | 2105 | struct logicalVolIntegrityDescImpUse *lvidiu; |
2106 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
2089 | 2107 | ||
2090 | if (sbi->s_lvid_bh != NULL) | 2108 | if (sbi->s_lvid_bh != NULL) |
2091 | lvidiu = udf_sb_lvidiu(sbi); | 2109 | lvidiu = udf_sb_lvidiu(sbi); |
@@ -2101,8 +2119,9 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
2101 | le32_to_cpu(lvidiu->numDirs)) : 0) | 2119 | le32_to_cpu(lvidiu->numDirs)) : 0) |
2102 | + buf->f_bfree; | 2120 | + buf->f_bfree; |
2103 | buf->f_ffree = buf->f_bfree; | 2121 | buf->f_ffree = buf->f_bfree; |
2104 | /* __kernel_fsid_t f_fsid */ | ||
2105 | buf->f_namelen = UDF_NAME_LEN - 2; | 2122 | buf->f_namelen = UDF_NAME_LEN - 2; |
2123 | buf->f_fsid.val[0] = (u32)id; | ||
2124 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
2106 | 2125 | ||
2107 | return 0; | 2126 | return 0; |
2108 | } | 2127 | } |
@@ -2114,7 +2133,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, | |||
2114 | unsigned int accum = 0; | 2133 | unsigned int accum = 0; |
2115 | int index; | 2134 | int index; |
2116 | int block = 0, newblock; | 2135 | int block = 0, newblock; |
2117 | kernel_lb_addr loc; | 2136 | struct kernel_lb_addr loc; |
2118 | uint32_t bytes; | 2137 | uint32_t bytes; |
2119 | uint8_t *ptr; | 2138 | uint8_t *ptr; |
2120 | uint16_t ident; | 2139 | uint16_t ident; |
@@ -2124,7 +2143,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, | |||
2124 | 2143 | ||
2125 | loc.logicalBlockNum = bitmap->s_extPosition; | 2144 | loc.logicalBlockNum = bitmap->s_extPosition; |
2126 | loc.partitionReferenceNum = UDF_SB(sb)->s_partition; | 2145 | loc.partitionReferenceNum = UDF_SB(sb)->s_partition; |
2127 | bh = udf_read_ptagged(sb, loc, 0, &ident); | 2146 | bh = udf_read_ptagged(sb, &loc, 0, &ident); |
2128 | 2147 | ||
2129 | if (!bh) { | 2148 | if (!bh) { |
2130 | printk(KERN_ERR "udf: udf_count_free failed\n"); | 2149 | printk(KERN_ERR "udf: udf_count_free failed\n"); |
@@ -2147,7 +2166,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, | |||
2147 | bytes -= cur_bytes; | 2166 | bytes -= cur_bytes; |
2148 | if (bytes) { | 2167 | if (bytes) { |
2149 | brelse(bh); | 2168 | brelse(bh); |
2150 | newblock = udf_get_lb_pblock(sb, loc, ++block); | 2169 | newblock = udf_get_lb_pblock(sb, &loc, ++block); |
2151 | bh = udf_tread(sb, newblock); | 2170 | bh = udf_tread(sb, newblock); |
2152 | if (!bh) { | 2171 | if (!bh) { |
2153 | udf_debug("read failed\n"); | 2172 | udf_debug("read failed\n"); |
@@ -2170,7 +2189,7 @@ static unsigned int udf_count_free_table(struct super_block *sb, | |||
2170 | { | 2189 | { |
2171 | unsigned int accum = 0; | 2190 | unsigned int accum = 0; |
2172 | uint32_t elen; | 2191 | uint32_t elen; |
2173 | kernel_lb_addr eloc; | 2192 | struct kernel_lb_addr eloc; |
2174 | int8_t etype; | 2193 | int8_t etype; |
2175 | struct extent_position epos; | 2194 | struct extent_position epos; |
2176 | 2195 | ||
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 65e19b4f9424..225527cdc885 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c | |||
@@ -28,10 +28,10 @@ | |||
28 | #include "udf_sb.h" | 28 | #include "udf_sb.h" |
29 | 29 | ||
30 | static void extent_trunc(struct inode *inode, struct extent_position *epos, | 30 | static void extent_trunc(struct inode *inode, struct extent_position *epos, |
31 | kernel_lb_addr eloc, int8_t etype, uint32_t elen, | 31 | struct kernel_lb_addr *eloc, int8_t etype, uint32_t elen, |
32 | uint32_t nelen) | 32 | uint32_t nelen) |
33 | { | 33 | { |
34 | kernel_lb_addr neloc = {}; | 34 | struct kernel_lb_addr neloc = {}; |
35 | int last_block = (elen + inode->i_sb->s_blocksize - 1) >> | 35 | int last_block = (elen + inode->i_sb->s_blocksize - 1) >> |
36 | inode->i_sb->s_blocksize_bits; | 36 | inode->i_sb->s_blocksize_bits; |
37 | int first_block = (nelen + inode->i_sb->s_blocksize - 1) >> | 37 | int first_block = (nelen + inode->i_sb->s_blocksize - 1) >> |
@@ -43,12 +43,12 @@ static void extent_trunc(struct inode *inode, struct extent_position *epos, | |||
43 | last_block); | 43 | last_block); |
44 | etype = (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30); | 44 | etype = (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30); |
45 | } else | 45 | } else |
46 | neloc = eloc; | 46 | neloc = *eloc; |
47 | nelen = (etype << 30) | nelen; | 47 | nelen = (etype << 30) | nelen; |
48 | } | 48 | } |
49 | 49 | ||
50 | if (elen != nelen) { | 50 | if (elen != nelen) { |
51 | udf_write_aext(inode, epos, neloc, nelen, 0); | 51 | udf_write_aext(inode, epos, &neloc, nelen, 0); |
52 | if (last_block - first_block > 0) { | 52 | if (last_block - first_block > 0) { |
53 | if (etype == (EXT_RECORDED_ALLOCATED >> 30)) | 53 | if (etype == (EXT_RECORDED_ALLOCATED >> 30)) |
54 | mark_inode_dirty(inode); | 54 | mark_inode_dirty(inode); |
@@ -68,7 +68,7 @@ static void extent_trunc(struct inode *inode, struct extent_position *epos, | |||
68 | void udf_truncate_tail_extent(struct inode *inode) | 68 | void udf_truncate_tail_extent(struct inode *inode) |
69 | { | 69 | { |
70 | struct extent_position epos = {}; | 70 | struct extent_position epos = {}; |
71 | kernel_lb_addr eloc; | 71 | struct kernel_lb_addr eloc; |
72 | uint32_t elen, nelen; | 72 | uint32_t elen, nelen; |
73 | uint64_t lbcount = 0; | 73 | uint64_t lbcount = 0; |
74 | int8_t etype = -1, netype; | 74 | int8_t etype = -1, netype; |
@@ -83,9 +83,9 @@ void udf_truncate_tail_extent(struct inode *inode) | |||
83 | return; | 83 | return; |
84 | 84 | ||
85 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 85 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
86 | adsize = sizeof(short_ad); | 86 | adsize = sizeof(struct short_ad); |
87 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 87 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
88 | adsize = sizeof(long_ad); | 88 | adsize = sizeof(struct long_ad); |
89 | else | 89 | else |
90 | BUG(); | 90 | BUG(); |
91 | 91 | ||
@@ -106,7 +106,7 @@ void udf_truncate_tail_extent(struct inode *inode) | |||
106 | (unsigned)elen); | 106 | (unsigned)elen); |
107 | nelen = elen - (lbcount - inode->i_size); | 107 | nelen = elen - (lbcount - inode->i_size); |
108 | epos.offset -= adsize; | 108 | epos.offset -= adsize; |
109 | extent_trunc(inode, &epos, eloc, etype, elen, nelen); | 109 | extent_trunc(inode, &epos, &eloc, etype, elen, nelen); |
110 | epos.offset += adsize; | 110 | epos.offset += adsize; |
111 | if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) | 111 | if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) |
112 | printk(KERN_ERR "udf_truncate_tail_extent(): " | 112 | printk(KERN_ERR "udf_truncate_tail_extent(): " |
@@ -124,7 +124,7 @@ void udf_truncate_tail_extent(struct inode *inode) | |||
124 | void udf_discard_prealloc(struct inode *inode) | 124 | void udf_discard_prealloc(struct inode *inode) |
125 | { | 125 | { |
126 | struct extent_position epos = { NULL, 0, {0, 0} }; | 126 | struct extent_position epos = { NULL, 0, {0, 0} }; |
127 | kernel_lb_addr eloc; | 127 | struct kernel_lb_addr eloc; |
128 | uint32_t elen; | 128 | uint32_t elen; |
129 | uint64_t lbcount = 0; | 129 | uint64_t lbcount = 0; |
130 | int8_t etype = -1, netype; | 130 | int8_t etype = -1, netype; |
@@ -136,9 +136,9 @@ void udf_discard_prealloc(struct inode *inode) | |||
136 | return; | 136 | return; |
137 | 137 | ||
138 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 138 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
139 | adsize = sizeof(short_ad); | 139 | adsize = sizeof(struct short_ad); |
140 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 140 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
141 | adsize = sizeof(long_ad); | 141 | adsize = sizeof(struct long_ad); |
142 | else | 142 | else |
143 | adsize = 0; | 143 | adsize = 0; |
144 | 144 | ||
@@ -152,7 +152,7 @@ void udf_discard_prealloc(struct inode *inode) | |||
152 | if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { | 152 | if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { |
153 | epos.offset -= adsize; | 153 | epos.offset -= adsize; |
154 | lbcount -= elen; | 154 | lbcount -= elen; |
155 | extent_trunc(inode, &epos, eloc, etype, elen, 0); | 155 | extent_trunc(inode, &epos, &eloc, etype, elen, 0); |
156 | if (!epos.bh) { | 156 | if (!epos.bh) { |
157 | iinfo->i_lenAlloc = | 157 | iinfo->i_lenAlloc = |
158 | epos.offset - | 158 | epos.offset - |
@@ -200,7 +200,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode, | |||
200 | void udf_truncate_extents(struct inode *inode) | 200 | void udf_truncate_extents(struct inode *inode) |
201 | { | 201 | { |
202 | struct extent_position epos; | 202 | struct extent_position epos; |
203 | kernel_lb_addr eloc, neloc = {}; | 203 | struct kernel_lb_addr eloc, neloc = {}; |
204 | uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc; | 204 | uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc; |
205 | int8_t etype; | 205 | int8_t etype; |
206 | struct super_block *sb = inode->i_sb; | 206 | struct super_block *sb = inode->i_sb; |
@@ -210,9 +210,9 @@ void udf_truncate_extents(struct inode *inode) | |||
210 | struct udf_inode_info *iinfo = UDF_I(inode); | 210 | struct udf_inode_info *iinfo = UDF_I(inode); |
211 | 211 | ||
212 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 212 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
213 | adsize = sizeof(short_ad); | 213 | adsize = sizeof(struct short_ad); |
214 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) | 214 | else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) |
215 | adsize = sizeof(long_ad); | 215 | adsize = sizeof(struct long_ad); |
216 | else | 216 | else |
217 | BUG(); | 217 | BUG(); |
218 | 218 | ||
@@ -221,7 +221,7 @@ void udf_truncate_extents(struct inode *inode) | |||
221 | (inode->i_size & (sb->s_blocksize - 1)); | 221 | (inode->i_size & (sb->s_blocksize - 1)); |
222 | if (etype != -1) { | 222 | if (etype != -1) { |
223 | epos.offset -= adsize; | 223 | epos.offset -= adsize; |
224 | extent_trunc(inode, &epos, eloc, etype, elen, byte_offset); | 224 | extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset); |
225 | epos.offset += adsize; | 225 | epos.offset += adsize; |
226 | if (byte_offset) | 226 | if (byte_offset) |
227 | lenalloc = epos.offset; | 227 | lenalloc = epos.offset; |
@@ -236,12 +236,12 @@ void udf_truncate_extents(struct inode *inode) | |||
236 | while ((etype = udf_current_aext(inode, &epos, &eloc, | 236 | while ((etype = udf_current_aext(inode, &epos, &eloc, |
237 | &elen, 0)) != -1) { | 237 | &elen, 0)) != -1) { |
238 | if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { | 238 | if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { |
239 | udf_write_aext(inode, &epos, neloc, nelen, 0); | 239 | udf_write_aext(inode, &epos, &neloc, nelen, 0); |
240 | if (indirect_ext_len) { | 240 | if (indirect_ext_len) { |
241 | /* We managed to free all extents in the | 241 | /* We managed to free all extents in the |
242 | * indirect extent - free it too */ | 242 | * indirect extent - free it too */ |
243 | BUG_ON(!epos.bh); | 243 | BUG_ON(!epos.bh); |
244 | udf_free_blocks(sb, inode, epos.block, | 244 | udf_free_blocks(sb, inode, &epos.block, |
245 | 0, indirect_ext_len); | 245 | 0, indirect_ext_len); |
246 | } else if (!epos.bh) { | 246 | } else if (!epos.bh) { |
247 | iinfo->i_lenAlloc = lenalloc; | 247 | iinfo->i_lenAlloc = lenalloc; |
@@ -253,7 +253,7 @@ void udf_truncate_extents(struct inode *inode) | |||
253 | epos.offset = sizeof(struct allocExtDesc); | 253 | epos.offset = sizeof(struct allocExtDesc); |
254 | epos.block = eloc; | 254 | epos.block = eloc; |
255 | epos.bh = udf_tread(sb, | 255 | epos.bh = udf_tread(sb, |
256 | udf_get_lb_pblock(sb, eloc, 0)); | 256 | udf_get_lb_pblock(sb, &eloc, 0)); |
257 | if (elen) | 257 | if (elen) |
258 | indirect_ext_len = | 258 | indirect_ext_len = |
259 | (elen + sb->s_blocksize - 1) >> | 259 | (elen + sb->s_blocksize - 1) >> |
@@ -261,7 +261,7 @@ void udf_truncate_extents(struct inode *inode) | |||
261 | else | 261 | else |
262 | indirect_ext_len = 1; | 262 | indirect_ext_len = 1; |
263 | } else { | 263 | } else { |
264 | extent_trunc(inode, &epos, eloc, etype, | 264 | extent_trunc(inode, &epos, &eloc, etype, |
265 | elen, 0); | 265 | elen, 0); |
266 | epos.offset += adsize; | 266 | epos.offset += adsize; |
267 | } | 267 | } |
@@ -269,7 +269,7 @@ void udf_truncate_extents(struct inode *inode) | |||
269 | 269 | ||
270 | if (indirect_ext_len) { | 270 | if (indirect_ext_len) { |
271 | BUG_ON(!epos.bh); | 271 | BUG_ON(!epos.bh); |
272 | udf_free_blocks(sb, inode, epos.block, 0, | 272 | udf_free_blocks(sb, inode, &epos.block, 0, |
273 | indirect_ext_len); | 273 | indirect_ext_len); |
274 | } else if (!epos.bh) { | 274 | } else if (!epos.bh) { |
275 | iinfo->i_lenAlloc = lenalloc; | 275 | iinfo->i_lenAlloc = lenalloc; |
@@ -278,7 +278,7 @@ void udf_truncate_extents(struct inode *inode) | |||
278 | udf_update_alloc_ext_desc(inode, &epos, lenalloc); | 278 | udf_update_alloc_ext_desc(inode, &epos, lenalloc); |
279 | } else if (inode->i_size) { | 279 | } else if (inode->i_size) { |
280 | if (byte_offset) { | 280 | if (byte_offset) { |
281 | kernel_long_ad extent; | 281 | struct kernel_long_ad extent; |
282 | 282 | ||
283 | /* | 283 | /* |
284 | * OK, there is not extent covering inode->i_size and | 284 | * OK, there is not extent covering inode->i_size and |
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index 4f86b1d98a5d..e58d1de41073 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h | |||
@@ -4,7 +4,7 @@ | |||
4 | struct udf_inode_info { | 4 | struct udf_inode_info { |
5 | struct timespec i_crtime; | 5 | struct timespec i_crtime; |
6 | /* Physical address of inode */ | 6 | /* Physical address of inode */ |
7 | kernel_lb_addr i_location; | 7 | struct kernel_lb_addr i_location; |
8 | __u64 i_unique; | 8 | __u64 i_unique; |
9 | __u32 i_lenEAttr; | 9 | __u32 i_lenEAttr; |
10 | __u32 i_lenAlloc; | 10 | __u32 i_lenAlloc; |
@@ -17,8 +17,8 @@ struct udf_inode_info { | |||
17 | unsigned i_strat4096 : 1; | 17 | unsigned i_strat4096 : 1; |
18 | unsigned reserved : 26; | 18 | unsigned reserved : 26; |
19 | union { | 19 | union { |
20 | short_ad *i_sad; | 20 | struct short_ad *i_sad; |
21 | long_ad *i_lad; | 21 | struct long_ad *i_lad; |
22 | __u8 *i_data; | 22 | __u8 *i_data; |
23 | } i_ext; | 23 | } i_ext; |
24 | struct inode vfs_inode; | 24 | struct inode vfs_inode; |
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 1c1c514a9725..d113b72c2768 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #define UDF_FLAG_GID_SET 16 | 30 | #define UDF_FLAG_GID_SET 16 |
31 | #define UDF_FLAG_SESSION_SET 17 | 31 | #define UDF_FLAG_SESSION_SET 17 |
32 | #define UDF_FLAG_LASTBLOCK_SET 18 | 32 | #define UDF_FLAG_LASTBLOCK_SET 18 |
33 | #define UDF_FLAG_BLOCKSIZE_SET 19 | ||
33 | 34 | ||
34 | #define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 | 35 | #define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 |
35 | #define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 | 36 | #define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 |
@@ -48,6 +49,8 @@ | |||
48 | #define UDF_SPARABLE_MAP15 0x1522U | 49 | #define UDF_SPARABLE_MAP15 0x1522U |
49 | #define UDF_METADATA_MAP25 0x2511U | 50 | #define UDF_METADATA_MAP25 0x2511U |
50 | 51 | ||
52 | #define UDF_INVALID_MODE ((mode_t)-1) | ||
53 | |||
51 | #pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ | 54 | #pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ |
52 | 55 | ||
53 | struct udf_meta_data { | 56 | struct udf_meta_data { |
@@ -114,7 +117,7 @@ struct udf_sb_info { | |||
114 | 117 | ||
115 | /* Sector headers */ | 118 | /* Sector headers */ |
116 | __s32 s_session; | 119 | __s32 s_session; |
117 | __u32 s_anchor[3]; | 120 | __u32 s_anchor; |
118 | __u32 s_last_block; | 121 | __u32 s_last_block; |
119 | 122 | ||
120 | struct buffer_head *s_lvid_bh; | 123 | struct buffer_head *s_lvid_bh; |
@@ -123,6 +126,8 @@ struct udf_sb_info { | |||
123 | mode_t s_umask; | 126 | mode_t s_umask; |
124 | gid_t s_gid; | 127 | gid_t s_gid; |
125 | uid_t s_uid; | 128 | uid_t s_uid; |
129 | mode_t s_fmode; | ||
130 | mode_t s_dmode; | ||
126 | 131 | ||
127 | /* Root Info */ | 132 | /* Root Info */ |
128 | struct timespec s_record_time; | 133 | struct timespec s_record_time; |
@@ -143,6 +148,8 @@ struct udf_sb_info { | |||
143 | struct inode *s_vat_inode; | 148 | struct inode *s_vat_inode; |
144 | 149 | ||
145 | struct mutex s_alloc_mutex; | 150 | struct mutex s_alloc_mutex; |
151 | /* Protected by s_alloc_mutex */ | ||
152 | unsigned int s_lvid_dirty; | ||
146 | }; | 153 | }; |
147 | 154 | ||
148 | static inline struct udf_sb_info *UDF_SB(struct super_block *sb) | 155 | static inline struct udf_sb_info *UDF_SB(struct super_block *sb) |
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 8ec865de5f13..cac51b77a5d1 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h | |||
@@ -62,10 +62,8 @@ static inline size_t udf_ext0_offset(struct inode *inode) | |||
62 | return 0; | 62 | return 0; |
63 | } | 63 | } |
64 | 64 | ||
65 | #define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset)) | ||
66 | |||
67 | /* computes tag checksum */ | 65 | /* computes tag checksum */ |
68 | u8 udf_tag_checksum(const tag *t); | 66 | u8 udf_tag_checksum(const struct tag *t); |
69 | 67 | ||
70 | struct dentry; | 68 | struct dentry; |
71 | struct inode; | 69 | struct inode; |
@@ -95,7 +93,7 @@ struct udf_vds_record { | |||
95 | }; | 93 | }; |
96 | 94 | ||
97 | struct generic_desc { | 95 | struct generic_desc { |
98 | tag descTag; | 96 | struct tag descTag; |
99 | __le32 volDescSeqNum; | 97 | __le32 volDescSeqNum; |
100 | }; | 98 | }; |
101 | 99 | ||
@@ -108,11 +106,22 @@ struct ustr { | |||
108 | struct extent_position { | 106 | struct extent_position { |
109 | struct buffer_head *bh; | 107 | struct buffer_head *bh; |
110 | uint32_t offset; | 108 | uint32_t offset; |
111 | kernel_lb_addr block; | 109 | struct kernel_lb_addr block; |
112 | }; | 110 | }; |
113 | 111 | ||
114 | /* super.c */ | 112 | /* super.c */ |
115 | extern void udf_warning(struct super_block *, const char *, const char *, ...); | 113 | extern void udf_warning(struct super_block *, const char *, const char *, ...); |
114 | static inline void udf_updated_lvid(struct super_block *sb) | ||
115 | { | ||
116 | struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh; | ||
117 | |||
118 | BUG_ON(!bh); | ||
119 | WARN_ON_ONCE(((struct logicalVolIntegrityDesc *) | ||
120 | bh->b_data)->integrityType != | ||
121 | cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN)); | ||
122 | sb->s_dirt = 1; | ||
123 | UDF_SB(sb)->s_lvid_dirty = 1; | ||
124 | } | ||
116 | 125 | ||
117 | /* namei.c */ | 126 | /* namei.c */ |
118 | extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, | 127 | extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, |
@@ -124,7 +133,7 @@ extern int udf_ioctl(struct inode *, struct file *, unsigned int, | |||
124 | unsigned long); | 133 | unsigned long); |
125 | 134 | ||
126 | /* inode.c */ | 135 | /* inode.c */ |
127 | extern struct inode *udf_iget(struct super_block *, kernel_lb_addr); | 136 | extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); |
128 | extern int udf_sync_inode(struct inode *); | 137 | extern int udf_sync_inode(struct inode *); |
129 | extern void udf_expand_file_adinicb(struct inode *, int, int *); | 138 | extern void udf_expand_file_adinicb(struct inode *, int, int *); |
130 | extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); | 139 | extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); |
@@ -136,19 +145,19 @@ extern void udf_clear_inode(struct inode *); | |||
136 | extern int udf_write_inode(struct inode *, int); | 145 | extern int udf_write_inode(struct inode *, int); |
137 | extern long udf_block_map(struct inode *, sector_t); | 146 | extern long udf_block_map(struct inode *, sector_t); |
138 | extern int udf_extend_file(struct inode *, struct extent_position *, | 147 | extern int udf_extend_file(struct inode *, struct extent_position *, |
139 | kernel_long_ad *, sector_t); | 148 | struct kernel_long_ad *, sector_t); |
140 | extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, | 149 | extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, |
141 | kernel_lb_addr *, uint32_t *, sector_t *); | 150 | struct kernel_lb_addr *, uint32_t *, sector_t *); |
142 | extern int8_t udf_add_aext(struct inode *, struct extent_position *, | 151 | extern int8_t udf_add_aext(struct inode *, struct extent_position *, |
143 | kernel_lb_addr, uint32_t, int); | 152 | struct kernel_lb_addr *, uint32_t, int); |
144 | extern int8_t udf_write_aext(struct inode *, struct extent_position *, | 153 | extern int8_t udf_write_aext(struct inode *, struct extent_position *, |
145 | kernel_lb_addr, uint32_t, int); | 154 | struct kernel_lb_addr *, uint32_t, int); |
146 | extern int8_t udf_delete_aext(struct inode *, struct extent_position, | 155 | extern int8_t udf_delete_aext(struct inode *, struct extent_position, |
147 | kernel_lb_addr, uint32_t); | 156 | struct kernel_lb_addr, uint32_t); |
148 | extern int8_t udf_next_aext(struct inode *, struct extent_position *, | 157 | extern int8_t udf_next_aext(struct inode *, struct extent_position *, |
149 | kernel_lb_addr *, uint32_t *, int); | 158 | struct kernel_lb_addr *, uint32_t *, int); |
150 | extern int8_t udf_current_aext(struct inode *, struct extent_position *, | 159 | extern int8_t udf_current_aext(struct inode *, struct extent_position *, |
151 | kernel_lb_addr *, uint32_t *, int); | 160 | struct kernel_lb_addr *, uint32_t *, int); |
152 | 161 | ||
153 | /* misc.c */ | 162 | /* misc.c */ |
154 | extern struct buffer_head *udf_tgetblk(struct super_block *, int); | 163 | extern struct buffer_head *udf_tgetblk(struct super_block *, int); |
@@ -160,7 +169,7 @@ extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t, | |||
160 | extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t, | 169 | extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t, |
161 | uint32_t, uint16_t *); | 170 | uint32_t, uint16_t *); |
162 | extern struct buffer_head *udf_read_ptagged(struct super_block *, | 171 | extern struct buffer_head *udf_read_ptagged(struct super_block *, |
163 | kernel_lb_addr, uint32_t, | 172 | struct kernel_lb_addr *, uint32_t, |
164 | uint16_t *); | 173 | uint16_t *); |
165 | extern void udf_update_tag(char *, int); | 174 | extern void udf_update_tag(char *, int); |
166 | extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int); | 175 | extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int); |
@@ -182,6 +191,14 @@ extern uint32_t udf_get_pblock_meta25(struct super_block *, uint32_t, uint16_t, | |||
182 | uint32_t); | 191 | uint32_t); |
183 | extern int udf_relocate_blocks(struct super_block *, long, long *); | 192 | extern int udf_relocate_blocks(struct super_block *, long, long *); |
184 | 193 | ||
194 | static inline uint32_t | ||
195 | udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc, | ||
196 | uint32_t offset) | ||
197 | { | ||
198 | return udf_get_pblock(sb, loc->logicalBlockNum, | ||
199 | loc->partitionReferenceNum, offset); | ||
200 | } | ||
201 | |||
185 | /* unicode.c */ | 202 | /* unicode.c */ |
186 | extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int); | 203 | extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int); |
187 | extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, | 204 | extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, |
@@ -200,7 +217,7 @@ extern void udf_truncate_extents(struct inode *); | |||
200 | 217 | ||
201 | /* balloc.c */ | 218 | /* balloc.c */ |
202 | extern void udf_free_blocks(struct super_block *, struct inode *, | 219 | extern void udf_free_blocks(struct super_block *, struct inode *, |
203 | kernel_lb_addr, uint32_t, uint32_t); | 220 | struct kernel_lb_addr *, uint32_t, uint32_t); |
204 | extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t, | 221 | extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t, |
205 | uint32_t, uint32_t); | 222 | uint32_t, uint32_t); |
206 | extern int udf_new_block(struct super_block *, struct inode *, uint16_t, | 223 | extern int udf_new_block(struct super_block *, struct inode *, uint16_t, |
@@ -214,16 +231,16 @@ extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *, | |||
214 | struct udf_fileident_bh *, | 231 | struct udf_fileident_bh *, |
215 | struct fileIdentDesc *, | 232 | struct fileIdentDesc *, |
216 | struct extent_position *, | 233 | struct extent_position *, |
217 | kernel_lb_addr *, uint32_t *, | 234 | struct kernel_lb_addr *, uint32_t *, |
218 | sector_t *); | 235 | sector_t *); |
219 | extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, | 236 | extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, |
220 | int *offset); | 237 | int *offset); |
221 | extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); | 238 | extern struct long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); |
222 | extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); | 239 | extern struct short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); |
223 | 240 | ||
224 | /* udftime.c */ | 241 | /* udftime.c */ |
225 | extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest, | 242 | extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest, |
226 | timestamp src); | 243 | struct timestamp src); |
227 | extern timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec src); | 244 | extern struct timestamp *udf_time_to_disk_stamp(struct timestamp *dest, struct timespec src); |
228 | 245 | ||
229 | #endif /* __UDF_DECL_H */ | 246 | #endif /* __UDF_DECL_H */ |
diff --git a/fs/udf/udfend.h b/fs/udf/udfend.h index 489f52fb428c..6a9f3a9cc428 100644 --- a/fs/udf/udfend.h +++ b/fs/udf/udfend.h | |||
@@ -4,9 +4,9 @@ | |||
4 | #include <asm/byteorder.h> | 4 | #include <asm/byteorder.h> |
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | 6 | ||
7 | static inline kernel_lb_addr lelb_to_cpu(lb_addr in) | 7 | static inline struct kernel_lb_addr lelb_to_cpu(struct lb_addr in) |
8 | { | 8 | { |
9 | kernel_lb_addr out; | 9 | struct kernel_lb_addr out; |
10 | 10 | ||
11 | out.logicalBlockNum = le32_to_cpu(in.logicalBlockNum); | 11 | out.logicalBlockNum = le32_to_cpu(in.logicalBlockNum); |
12 | out.partitionReferenceNum = le16_to_cpu(in.partitionReferenceNum); | 12 | out.partitionReferenceNum = le16_to_cpu(in.partitionReferenceNum); |
@@ -14,9 +14,9 @@ static inline kernel_lb_addr lelb_to_cpu(lb_addr in) | |||
14 | return out; | 14 | return out; |
15 | } | 15 | } |
16 | 16 | ||
17 | static inline lb_addr cpu_to_lelb(kernel_lb_addr in) | 17 | static inline struct lb_addr cpu_to_lelb(struct kernel_lb_addr in) |
18 | { | 18 | { |
19 | lb_addr out; | 19 | struct lb_addr out; |
20 | 20 | ||
21 | out.logicalBlockNum = cpu_to_le32(in.logicalBlockNum); | 21 | out.logicalBlockNum = cpu_to_le32(in.logicalBlockNum); |
22 | out.partitionReferenceNum = cpu_to_le16(in.partitionReferenceNum); | 22 | out.partitionReferenceNum = cpu_to_le16(in.partitionReferenceNum); |
@@ -24,9 +24,9 @@ static inline lb_addr cpu_to_lelb(kernel_lb_addr in) | |||
24 | return out; | 24 | return out; |
25 | } | 25 | } |
26 | 26 | ||
27 | static inline short_ad lesa_to_cpu(short_ad in) | 27 | static inline struct short_ad lesa_to_cpu(struct short_ad in) |
28 | { | 28 | { |
29 | short_ad out; | 29 | struct short_ad out; |
30 | 30 | ||
31 | out.extLength = le32_to_cpu(in.extLength); | 31 | out.extLength = le32_to_cpu(in.extLength); |
32 | out.extPosition = le32_to_cpu(in.extPosition); | 32 | out.extPosition = le32_to_cpu(in.extPosition); |
@@ -34,9 +34,9 @@ static inline short_ad lesa_to_cpu(short_ad in) | |||
34 | return out; | 34 | return out; |
35 | } | 35 | } |
36 | 36 | ||
37 | static inline short_ad cpu_to_lesa(short_ad in) | 37 | static inline struct short_ad cpu_to_lesa(struct short_ad in) |
38 | { | 38 | { |
39 | short_ad out; | 39 | struct short_ad out; |
40 | 40 | ||
41 | out.extLength = cpu_to_le32(in.extLength); | 41 | out.extLength = cpu_to_le32(in.extLength); |
42 | out.extPosition = cpu_to_le32(in.extPosition); | 42 | out.extPosition = cpu_to_le32(in.extPosition); |
@@ -44,9 +44,9 @@ static inline short_ad cpu_to_lesa(short_ad in) | |||
44 | return out; | 44 | return out; |
45 | } | 45 | } |
46 | 46 | ||
47 | static inline kernel_long_ad lela_to_cpu(long_ad in) | 47 | static inline struct kernel_long_ad lela_to_cpu(struct long_ad in) |
48 | { | 48 | { |
49 | kernel_long_ad out; | 49 | struct kernel_long_ad out; |
50 | 50 | ||
51 | out.extLength = le32_to_cpu(in.extLength); | 51 | out.extLength = le32_to_cpu(in.extLength); |
52 | out.extLocation = lelb_to_cpu(in.extLocation); | 52 | out.extLocation = lelb_to_cpu(in.extLocation); |
@@ -54,9 +54,9 @@ static inline kernel_long_ad lela_to_cpu(long_ad in) | |||
54 | return out; | 54 | return out; |
55 | } | 55 | } |
56 | 56 | ||
57 | static inline long_ad cpu_to_lela(kernel_long_ad in) | 57 | static inline struct long_ad cpu_to_lela(struct kernel_long_ad in) |
58 | { | 58 | { |
59 | long_ad out; | 59 | struct long_ad out; |
60 | 60 | ||
61 | out.extLength = cpu_to_le32(in.extLength); | 61 | out.extLength = cpu_to_le32(in.extLength); |
62 | out.extLocation = cpu_to_lelb(in.extLocation); | 62 | out.extLocation = cpu_to_lelb(in.extLocation); |
@@ -64,9 +64,9 @@ static inline long_ad cpu_to_lela(kernel_long_ad in) | |||
64 | return out; | 64 | return out; |
65 | } | 65 | } |
66 | 66 | ||
67 | static inline kernel_extent_ad leea_to_cpu(extent_ad in) | 67 | static inline struct kernel_extent_ad leea_to_cpu(struct extent_ad in) |
68 | { | 68 | { |
69 | kernel_extent_ad out; | 69 | struct kernel_extent_ad out; |
70 | 70 | ||
71 | out.extLength = le32_to_cpu(in.extLength); | 71 | out.extLength = le32_to_cpu(in.extLength); |
72 | out.extLocation = le32_to_cpu(in.extLocation); | 72 | out.extLocation = le32_to_cpu(in.extLocation); |
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index 5f811655c9b5..b8c828c4d200 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c | |||
@@ -85,7 +85,8 @@ extern struct timezone sys_tz; | |||
85 | #define SECS_PER_HOUR (60 * 60) | 85 | #define SECS_PER_HOUR (60 * 60) |
86 | #define SECS_PER_DAY (SECS_PER_HOUR * 24) | 86 | #define SECS_PER_DAY (SECS_PER_HOUR * 24) |
87 | 87 | ||
88 | struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src) | 88 | struct timespec * |
89 | udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src) | ||
89 | { | 90 | { |
90 | int yday; | 91 | int yday; |
91 | u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone); | 92 | u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone); |
@@ -116,7 +117,8 @@ struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src) | |||
116 | return dest; | 117 | return dest; |
117 | } | 118 | } |
118 | 119 | ||
119 | timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec ts) | 120 | struct timestamp * |
121 | udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts) | ||
120 | { | 122 | { |
121 | long int days, rem, y; | 123 | long int days, rem, y; |
122 | const unsigned short int *ip; | 124 | const unsigned short int *ip; |
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 9fdf8c93c58e..cefa8c8913e6 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c | |||
@@ -254,7 +254,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, | |||
254 | { | 254 | { |
255 | const uint8_t *ocu; | 255 | const uint8_t *ocu; |
256 | uint8_t cmp_id, ocu_len; | 256 | uint8_t cmp_id, ocu_len; |
257 | int i; | 257 | int i, len; |
258 | 258 | ||
259 | 259 | ||
260 | ocu_len = ocu_i->u_len; | 260 | ocu_len = ocu_i->u_len; |
@@ -279,8 +279,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, | |||
279 | if (cmp_id == 16) | 279 | if (cmp_id == 16) |
280 | c = (c << 8) | ocu[i++]; | 280 | c = (c << 8) | ocu[i++]; |
281 | 281 | ||
282 | utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len], | 282 | len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], |
283 | UDF_NAME_LEN - utf_o->u_len); | 283 | UDF_NAME_LEN - utf_o->u_len); |
284 | /* Valid character? */ | ||
285 | if (len >= 0) | ||
286 | utf_o->u_len += len; | ||
287 | else | ||
288 | utf_o->u_name[utf_o->u_len++] = '?'; | ||
284 | } | 289 | } |
285 | utf_o->u_cmpID = 8; | 290 | utf_o->u_cmpID = 8; |
286 | 291 | ||
@@ -290,7 +295,8 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, | |||
290 | static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, | 295 | static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, |
291 | int length) | 296 | int length) |
292 | { | 297 | { |
293 | unsigned len, i, max_val; | 298 | int len; |
299 | unsigned i, max_val; | ||
294 | uint16_t uni_char; | 300 | uint16_t uni_char; |
295 | int u_len; | 301 | int u_len; |
296 | 302 | ||
@@ -302,8 +308,13 @@ try_again: | |||
302 | u_len = 0U; | 308 | u_len = 0U; |
303 | for (i = 0U; i < uni->u_len; i++) { | 309 | for (i = 0U; i < uni->u_len; i++) { |
304 | len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); | 310 | len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); |
305 | if (len <= 0) | 311 | if (!len) |
306 | continue; | 312 | continue; |
313 | /* Invalid character, deal with it */ | ||
314 | if (len < 0) { | ||
315 | len = 1; | ||
316 | uni_char = '?'; | ||
317 | } | ||
307 | 318 | ||
308 | if (uni_char > max_val) { | 319 | if (uni_char > max_val) { |
309 | max_val = 0xffffU; | 320 | max_val = 0xffffU; |
@@ -324,34 +335,43 @@ try_again: | |||
324 | int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, | 335 | int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, |
325 | int flen) | 336 | int flen) |
326 | { | 337 | { |
327 | struct ustr filename, unifilename; | 338 | struct ustr *filename, *unifilename; |
328 | int len; | 339 | int len = 0; |
329 | 340 | ||
330 | if (udf_build_ustr_exact(&unifilename, sname, flen)) | 341 | filename = kmalloc(sizeof(struct ustr), GFP_NOFS); |
342 | if (!filename) | ||
331 | return 0; | 343 | return 0; |
332 | 344 | ||
345 | unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS); | ||
346 | if (!unifilename) | ||
347 | goto out1; | ||
348 | |||
349 | if (udf_build_ustr_exact(unifilename, sname, flen)) | ||
350 | goto out2; | ||
351 | |||
333 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { | 352 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { |
334 | if (!udf_CS0toUTF8(&filename, &unifilename)) { | 353 | if (!udf_CS0toUTF8(filename, unifilename)) { |
335 | udf_debug("Failed in udf_get_filename: sname = %s\n", | 354 | udf_debug("Failed in udf_get_filename: sname = %s\n", |
336 | sname); | 355 | sname); |
337 | return 0; | 356 | goto out2; |
338 | } | 357 | } |
339 | } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { | 358 | } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { |
340 | if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, | 359 | if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename, |
341 | &unifilename)) { | 360 | unifilename)) { |
342 | udf_debug("Failed in udf_get_filename: sname = %s\n", | 361 | udf_debug("Failed in udf_get_filename: sname = %s\n", |
343 | sname); | 362 | sname); |
344 | return 0; | 363 | goto out2; |
345 | } | 364 | } |
346 | } else | 365 | } else |
347 | return 0; | 366 | goto out2; |
348 | 367 | ||
349 | len = udf_translate_to_linux(dname, filename.u_name, filename.u_len, | 368 | len = udf_translate_to_linux(dname, filename->u_name, filename->u_len, |
350 | unifilename.u_name, unifilename.u_len); | 369 | unifilename->u_name, unifilename->u_len); |
351 | if (len) | 370 | out2: |
352 | return len; | 371 | kfree(unifilename); |
353 | 372 | out1: | |
354 | return 0; | 373 | kfree(filename); |
374 | return len; | ||
355 | } | 375 | } |
356 | 376 | ||
357 | int udf_put_filename(struct super_block *sb, const uint8_t *sname, | 377 | int udf_put_filename(struct super_block *sb, const uint8_t *sname, |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index e1c1fc5ee239..60359291761f 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -1268,6 +1268,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1268 | struct ufs_super_block_first *usb1; | 1268 | struct ufs_super_block_first *usb1; |
1269 | struct ufs_super_block_second *usb2; | 1269 | struct ufs_super_block_second *usb2; |
1270 | struct ufs_super_block_third *usb3; | 1270 | struct ufs_super_block_third *usb3; |
1271 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
1271 | 1272 | ||
1272 | lock_kernel(); | 1273 | lock_kernel(); |
1273 | 1274 | ||
@@ -1290,6 +1291,8 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1290 | ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0; | 1291 | ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0; |
1291 | buf->f_files = uspi->s_ncg * uspi->s_ipg; | 1292 | buf->f_files = uspi->s_ncg * uspi->s_ipg; |
1292 | buf->f_namelen = UFS_MAXNAMLEN; | 1293 | buf->f_namelen = UFS_MAXNAMLEN; |
1294 | buf->f_fsid.val[0] = (u32)id; | ||
1295 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
1293 | 1296 | ||
1294 | unlock_kernel(); | 1297 | unlock_kernel(); |
1295 | 1298 | ||
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index c3dc491fff89..60f107e47fe9 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -33,6 +33,7 @@ xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ | |||
33 | xfs_qm_syscalls.o \ | 33 | xfs_qm_syscalls.o \ |
34 | xfs_qm_bhv.o \ | 34 | xfs_qm_bhv.o \ |
35 | xfs_qm.o) | 35 | xfs_qm.o) |
36 | xfs-$(CONFIG_XFS_QUOTA) += linux-2.6/xfs_quotaops.o | ||
36 | 37 | ||
37 | ifeq ($(CONFIG_XFS_QUOTA),y) | 38 | ifeq ($(CONFIG_XFS_QUOTA),y) |
38 | xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o | 39 | xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o |
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h deleted file mode 100644 index 2a88d56c4dc2..000000000000 --- a/fs/xfs/linux-2.6/mutex.h +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_MUTEX_H__ | ||
19 | #define __XFS_SUPPORT_MUTEX_H__ | ||
20 | |||
21 | #include <linux/mutex.h> | ||
22 | |||
23 | typedef struct mutex mutex_t; | ||
24 | |||
25 | #endif /* __XFS_SUPPORT_MUTEX_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index de3a198f771e..c13f67300fe7 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1623,4 +1623,5 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1623 | .bmap = xfs_vm_bmap, | 1623 | .bmap = xfs_vm_bmap, |
1624 | .direct_IO = xfs_vm_direct_IO, | 1624 | .direct_IO = xfs_vm_direct_IO, |
1625 | .migratepage = buffer_migrate_page, | 1625 | .migratepage = buffer_migrate_page, |
1626 | .is_partially_uptodate = block_is_partially_uptodate, | ||
1626 | }; | 1627 | }; |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 4bd112313f33..d0b499418a7d 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "xfs_dir2_sf.h" | 34 | #include "xfs_dir2_sf.h" |
35 | #include "xfs_dinode.h" | 35 | #include "xfs_dinode.h" |
36 | #include "xfs_inode.h" | 36 | #include "xfs_inode.h" |
37 | #include "xfs_ioctl.h" | ||
37 | #include "xfs_btree.h" | 38 | #include "xfs_btree.h" |
38 | #include "xfs_ialloc.h" | 39 | #include "xfs_ialloc.h" |
39 | #include "xfs_rtalloc.h" | 40 | #include "xfs_rtalloc.h" |
@@ -78,92 +79,74 @@ xfs_find_handle( | |||
78 | int hsize; | 79 | int hsize; |
79 | xfs_handle_t handle; | 80 | xfs_handle_t handle; |
80 | struct inode *inode; | 81 | struct inode *inode; |
82 | struct file *file = NULL; | ||
83 | struct path path; | ||
84 | int error; | ||
85 | struct xfs_inode *ip; | ||
81 | 86 | ||
82 | memset((char *)&handle, 0, sizeof(handle)); | 87 | if (cmd == XFS_IOC_FD_TO_HANDLE) { |
83 | 88 | file = fget(hreq->fd); | |
84 | switch (cmd) { | 89 | if (!file) |
85 | case XFS_IOC_PATH_TO_FSHANDLE: | 90 | return -EBADF; |
86 | case XFS_IOC_PATH_TO_HANDLE: { | 91 | inode = file->f_path.dentry->d_inode; |
87 | struct path path; | 92 | } else { |
88 | int error = user_lpath((const char __user *)hreq->path, &path); | 93 | error = user_lpath((const char __user *)hreq->path, &path); |
89 | if (error) | 94 | if (error) |
90 | return error; | 95 | return error; |
91 | 96 | inode = path.dentry->d_inode; | |
92 | ASSERT(path.dentry); | ||
93 | ASSERT(path.dentry->d_inode); | ||
94 | inode = igrab(path.dentry->d_inode); | ||
95 | path_put(&path); | ||
96 | break; | ||
97 | } | 97 | } |
98 | ip = XFS_I(inode); | ||
98 | 99 | ||
99 | case XFS_IOC_FD_TO_HANDLE: { | 100 | /* |
100 | struct file *file; | 101 | * We can only generate handles for inodes residing on a XFS filesystem, |
101 | 102 | * and only for regular files, directories or symbolic links. | |
102 | file = fget(hreq->fd); | 103 | */ |
103 | if (!file) | 104 | error = -EINVAL; |
104 | return -EBADF; | 105 | if (inode->i_sb->s_magic != XFS_SB_MAGIC) |
106 | goto out_put; | ||
105 | 107 | ||
106 | ASSERT(file->f_path.dentry); | 108 | error = -EBADF; |
107 | ASSERT(file->f_path.dentry->d_inode); | 109 | if (!S_ISREG(inode->i_mode) && |
108 | inode = igrab(file->f_path.dentry->d_inode); | 110 | !S_ISDIR(inode->i_mode) && |
109 | fput(file); | 111 | !S_ISLNK(inode->i_mode)) |
110 | break; | 112 | goto out_put; |
111 | } | ||
112 | 113 | ||
113 | default: | ||
114 | ASSERT(0); | ||
115 | return -XFS_ERROR(EINVAL); | ||
116 | } | ||
117 | 114 | ||
118 | if (inode->i_sb->s_magic != XFS_SB_MAGIC) { | 115 | memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); |
119 | /* we're not in XFS anymore, Toto */ | ||
120 | iput(inode); | ||
121 | return -XFS_ERROR(EINVAL); | ||
122 | } | ||
123 | 116 | ||
124 | switch (inode->i_mode & S_IFMT) { | 117 | if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { |
125 | case S_IFREG: | 118 | /* |
126 | case S_IFDIR: | 119 | * This handle only contains an fsid, zero the rest. |
127 | case S_IFLNK: | 120 | */ |
128 | break; | 121 | memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); |
129 | default: | 122 | hsize = sizeof(xfs_fsid_t); |
130 | iput(inode); | 123 | } else { |
131 | return -XFS_ERROR(EBADF); | ||
132 | } | ||
133 | |||
134 | /* now we can grab the fsid */ | ||
135 | memcpy(&handle.ha_fsid, XFS_I(inode)->i_mount->m_fixedfsid, | ||
136 | sizeof(xfs_fsid_t)); | ||
137 | hsize = sizeof(xfs_fsid_t); | ||
138 | |||
139 | if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { | ||
140 | xfs_inode_t *ip = XFS_I(inode); | ||
141 | int lock_mode; | 124 | int lock_mode; |
142 | 125 | ||
143 | /* need to get access to the xfs_inode to read the generation */ | ||
144 | lock_mode = xfs_ilock_map_shared(ip); | 126 | lock_mode = xfs_ilock_map_shared(ip); |
145 | |||
146 | /* fill in fid section of handle from inode */ | ||
147 | handle.ha_fid.fid_len = sizeof(xfs_fid_t) - | 127 | handle.ha_fid.fid_len = sizeof(xfs_fid_t) - |
148 | sizeof(handle.ha_fid.fid_len); | 128 | sizeof(handle.ha_fid.fid_len); |
149 | handle.ha_fid.fid_pad = 0; | 129 | handle.ha_fid.fid_pad = 0; |
150 | handle.ha_fid.fid_gen = ip->i_d.di_gen; | 130 | handle.ha_fid.fid_gen = ip->i_d.di_gen; |
151 | handle.ha_fid.fid_ino = ip->i_ino; | 131 | handle.ha_fid.fid_ino = ip->i_ino; |
152 | |||
153 | xfs_iunlock_map_shared(ip, lock_mode); | 132 | xfs_iunlock_map_shared(ip, lock_mode); |
154 | 133 | ||
155 | hsize = XFS_HSIZE(handle); | 134 | hsize = XFS_HSIZE(handle); |
156 | } | 135 | } |
157 | 136 | ||
158 | /* now copy our handle into the user buffer & write out the size */ | 137 | error = -EFAULT; |
159 | if (copy_to_user(hreq->ohandle, &handle, hsize) || | 138 | if (copy_to_user(hreq->ohandle, &handle, hsize) || |
160 | copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) { | 139 | copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) |
161 | iput(inode); | 140 | goto out_put; |
162 | return -XFS_ERROR(EFAULT); | ||
163 | } | ||
164 | 141 | ||
165 | iput(inode); | 142 | error = 0; |
166 | return 0; | 143 | |
144 | out_put: | ||
145 | if (cmd == XFS_IOC_FD_TO_HANDLE) | ||
146 | fput(file); | ||
147 | else | ||
148 | path_put(&path); | ||
149 | return error; | ||
167 | } | 150 | } |
168 | 151 | ||
169 | /* | 152 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 7aa53fefc67f..6075382336d7 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -211,8 +211,13 @@ xfs_vn_mknod( | |||
211 | * Irix uses Missed'em'V split, but doesn't want to see | 211 | * Irix uses Missed'em'V split, but doesn't want to see |
212 | * the upper 5 bits of (14bit) major. | 212 | * the upper 5 bits of (14bit) major. |
213 | */ | 213 | */ |
214 | if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) | 214 | if (S_ISCHR(mode) || S_ISBLK(mode)) { |
215 | return -EINVAL; | 215 | if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) |
216 | return -EINVAL; | ||
217 | rdev = sysv_encode_dev(rdev); | ||
218 | } else { | ||
219 | rdev = 0; | ||
220 | } | ||
216 | 221 | ||
217 | if (test_default_acl && test_default_acl(dir)) { | 222 | if (test_default_acl && test_default_acl(dir)) { |
218 | if (!_ACL_ALLOC(default_acl)) { | 223 | if (!_ACL_ALLOC(default_acl)) { |
@@ -224,28 +229,11 @@ xfs_vn_mknod( | |||
224 | } | 229 | } |
225 | } | 230 | } |
226 | 231 | ||
227 | xfs_dentry_to_name(&name, dentry); | ||
228 | |||
229 | if (IS_POSIXACL(dir) && !default_acl) | 232 | if (IS_POSIXACL(dir) && !default_acl) |
230 | mode &= ~current->fs->umask; | 233 | mode &= ~current_umask(); |
231 | |||
232 | switch (mode & S_IFMT) { | ||
233 | case S_IFCHR: | ||
234 | case S_IFBLK: | ||
235 | case S_IFIFO: | ||
236 | case S_IFSOCK: | ||
237 | rdev = sysv_encode_dev(rdev); | ||
238 | case S_IFREG: | ||
239 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); | ||
240 | break; | ||
241 | case S_IFDIR: | ||
242 | error = xfs_mkdir(XFS_I(dir), &name, mode, &ip, NULL); | ||
243 | break; | ||
244 | default: | ||
245 | error = EINVAL; | ||
246 | break; | ||
247 | } | ||
248 | 234 | ||
235 | xfs_dentry_to_name(&name, dentry); | ||
236 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); | ||
249 | if (unlikely(error)) | 237 | if (unlikely(error)) |
250 | goto out_free_acl; | 238 | goto out_free_acl; |
251 | 239 | ||
@@ -416,7 +404,7 @@ xfs_vn_symlink( | |||
416 | mode_t mode; | 404 | mode_t mode; |
417 | 405 | ||
418 | mode = S_IFLNK | | 406 | mode = S_IFLNK | |
419 | (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); | 407 | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); |
420 | xfs_dentry_to_name(&name, dentry); | 408 | xfs_dentry_to_name(&name, dentry); |
421 | 409 | ||
422 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); | 410 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); |
@@ -553,9 +541,6 @@ xfs_vn_getattr( | |||
553 | stat->uid = ip->i_d.di_uid; | 541 | stat->uid = ip->i_d.di_uid; |
554 | stat->gid = ip->i_d.di_gid; | 542 | stat->gid = ip->i_d.di_gid; |
555 | stat->ino = ip->i_ino; | 543 | stat->ino = ip->i_ino; |
556 | #if XFS_BIG_INUMS | ||
557 | stat->ino += mp->m_inoadd; | ||
558 | #endif | ||
559 | stat->atime = inode->i_atime; | 544 | stat->atime = inode->i_atime; |
560 | stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; | 545 | stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; |
561 | stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; | 546 | stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 507492d6dccd..f65a53f8752f 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <kmem.h> | 38 | #include <kmem.h> |
39 | #include <mrlock.h> | 39 | #include <mrlock.h> |
40 | #include <sv.h> | 40 | #include <sv.h> |
41 | #include <mutex.h> | ||
42 | #include <time.h> | 41 | #include <time.h> |
43 | 42 | ||
44 | #include <support/ktrace.h> | 43 | #include <support/ktrace.h> |
@@ -51,6 +50,7 @@ | |||
51 | #include <linux/blkdev.h> | 50 | #include <linux/blkdev.h> |
52 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
53 | #include <linux/module.h> | 52 | #include <linux/module.h> |
53 | #include <linux/mutex.h> | ||
54 | #include <linux/file.h> | 54 | #include <linux/file.h> |
55 | #include <linux/swap.h> | 55 | #include <linux/swap.h> |
56 | #include <linux/errno.h> | 56 | #include <linux/errno.h> |
@@ -147,17 +147,6 @@ | |||
147 | #define SYNCHRONIZE() barrier() | 147 | #define SYNCHRONIZE() barrier() |
148 | #define __return_address __builtin_return_address(0) | 148 | #define __return_address __builtin_return_address(0) |
149 | 149 | ||
150 | /* | ||
151 | * IRIX (BSD) quotactl makes use of separate commands for user/group, | ||
152 | * whereas on Linux the syscall encodes this information into the cmd | ||
153 | * field (see the QCMD macro in quota.h). These macros help keep the | ||
154 | * code portable - they are not visible from the syscall interface. | ||
155 | */ | ||
156 | #define Q_XSETGQLIM XQM_CMD(8) /* set groups disk limits */ | ||
157 | #define Q_XGETGQUOTA XQM_CMD(9) /* get groups disk limits */ | ||
158 | #define Q_XSETPQLIM XQM_CMD(10) /* set projects disk limits */ | ||
159 | #define Q_XGETPQUOTA XQM_CMD(11) /* get projects disk limits */ | ||
160 | |||
161 | #define dfltprid 0 | 150 | #define dfltprid 0 |
162 | #define MAXPATHLEN 1024 | 151 | #define MAXPATHLEN 1024 |
163 | 152 | ||
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c new file mode 100644 index 000000000000..94d9a633d3d9 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_quotaops.c | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008, Christoph Hellwig | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_dmapi.h" | ||
20 | #include "xfs_sb.h" | ||
21 | #include "xfs_inum.h" | ||
22 | #include "xfs_ag.h" | ||
23 | #include "xfs_mount.h" | ||
24 | #include "xfs_quota.h" | ||
25 | #include "xfs_log.h" | ||
26 | #include "xfs_trans.h" | ||
27 | #include "xfs_bmap_btree.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "quota/xfs_qm.h" | ||
30 | #include <linux/quota.h> | ||
31 | |||
32 | |||
33 | STATIC int | ||
34 | xfs_quota_type(int type) | ||
35 | { | ||
36 | switch (type) { | ||
37 | case USRQUOTA: | ||
38 | return XFS_DQ_USER; | ||
39 | case GRPQUOTA: | ||
40 | return XFS_DQ_GROUP; | ||
41 | default: | ||
42 | return XFS_DQ_PROJ; | ||
43 | } | ||
44 | } | ||
45 | |||
46 | STATIC int | ||
47 | xfs_fs_quota_sync( | ||
48 | struct super_block *sb, | ||
49 | int type) | ||
50 | { | ||
51 | struct xfs_mount *mp = XFS_M(sb); | ||
52 | |||
53 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
54 | return -ENOSYS; | ||
55 | return -xfs_sync_inodes(mp, SYNC_DELWRI); | ||
56 | } | ||
57 | |||
58 | STATIC int | ||
59 | xfs_fs_get_xstate( | ||
60 | struct super_block *sb, | ||
61 | struct fs_quota_stat *fqs) | ||
62 | { | ||
63 | struct xfs_mount *mp = XFS_M(sb); | ||
64 | |||
65 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
66 | return -ENOSYS; | ||
67 | return -xfs_qm_scall_getqstat(mp, fqs); | ||
68 | } | ||
69 | |||
70 | STATIC int | ||
71 | xfs_fs_set_xstate( | ||
72 | struct super_block *sb, | ||
73 | unsigned int uflags, | ||
74 | int op) | ||
75 | { | ||
76 | struct xfs_mount *mp = XFS_M(sb); | ||
77 | unsigned int flags = 0; | ||
78 | |||
79 | if (sb->s_flags & MS_RDONLY) | ||
80 | return -EROFS; | ||
81 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
82 | return -ENOSYS; | ||
83 | if (!capable(CAP_SYS_ADMIN)) | ||
84 | return -EPERM; | ||
85 | |||
86 | if (uflags & XFS_QUOTA_UDQ_ACCT) | ||
87 | flags |= XFS_UQUOTA_ACCT; | ||
88 | if (uflags & XFS_QUOTA_PDQ_ACCT) | ||
89 | flags |= XFS_PQUOTA_ACCT; | ||
90 | if (uflags & XFS_QUOTA_GDQ_ACCT) | ||
91 | flags |= XFS_GQUOTA_ACCT; | ||
92 | if (uflags & XFS_QUOTA_UDQ_ENFD) | ||
93 | flags |= XFS_UQUOTA_ENFD; | ||
94 | if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) | ||
95 | flags |= XFS_OQUOTA_ENFD; | ||
96 | |||
97 | switch (op) { | ||
98 | case Q_XQUOTAON: | ||
99 | return -xfs_qm_scall_quotaon(mp, flags); | ||
100 | case Q_XQUOTAOFF: | ||
101 | if (!XFS_IS_QUOTA_ON(mp)) | ||
102 | return -EINVAL; | ||
103 | return -xfs_qm_scall_quotaoff(mp, flags); | ||
104 | case Q_XQUOTARM: | ||
105 | if (XFS_IS_QUOTA_ON(mp)) | ||
106 | return -EINVAL; | ||
107 | return -xfs_qm_scall_trunc_qfiles(mp, flags); | ||
108 | } | ||
109 | |||
110 | return -EINVAL; | ||
111 | } | ||
112 | |||
113 | STATIC int | ||
114 | xfs_fs_get_xquota( | ||
115 | struct super_block *sb, | ||
116 | int type, | ||
117 | qid_t id, | ||
118 | struct fs_disk_quota *fdq) | ||
119 | { | ||
120 | struct xfs_mount *mp = XFS_M(sb); | ||
121 | |||
122 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
123 | return -ENOSYS; | ||
124 | if (!XFS_IS_QUOTA_ON(mp)) | ||
125 | return -ESRCH; | ||
126 | |||
127 | return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq); | ||
128 | } | ||
129 | |||
130 | STATIC int | ||
131 | xfs_fs_set_xquota( | ||
132 | struct super_block *sb, | ||
133 | int type, | ||
134 | qid_t id, | ||
135 | struct fs_disk_quota *fdq) | ||
136 | { | ||
137 | struct xfs_mount *mp = XFS_M(sb); | ||
138 | |||
139 | if (sb->s_flags & MS_RDONLY) | ||
140 | return -EROFS; | ||
141 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
142 | return -ENOSYS; | ||
143 | if (!XFS_IS_QUOTA_ON(mp)) | ||
144 | return -ESRCH; | ||
145 | if (!capable(CAP_SYS_ADMIN)) | ||
146 | return -EPERM; | ||
147 | |||
148 | return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); | ||
149 | } | ||
150 | |||
151 | struct quotactl_ops xfs_quotactl_operations = { | ||
152 | .quota_sync = xfs_fs_quota_sync, | ||
153 | .get_xstate = xfs_fs_get_xstate, | ||
154 | .set_xstate = xfs_fs_set_xstate, | ||
155 | .get_xquota = xfs_fs_get_xquota, | ||
156 | .set_xquota = xfs_fs_set_xquota, | ||
157 | }; | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 32ae5028e96b..bb685269f832 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -68,7 +68,6 @@ | |||
68 | #include <linux/freezer.h> | 68 | #include <linux/freezer.h> |
69 | #include <linux/parser.h> | 69 | #include <linux/parser.h> |
70 | 70 | ||
71 | static struct quotactl_ops xfs_quotactl_operations; | ||
72 | static struct super_operations xfs_super_operations; | 71 | static struct super_operations xfs_super_operations; |
73 | static kmem_zone_t *xfs_ioend_zone; | 72 | static kmem_zone_t *xfs_ioend_zone; |
74 | mempool_t *xfs_ioend_pool; | 73 | mempool_t *xfs_ioend_pool; |
@@ -79,7 +78,6 @@ mempool_t *xfs_ioend_pool; | |||
79 | #define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ | 78 | #define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ |
80 | #define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ | 79 | #define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ |
81 | #define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ | 80 | #define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ |
82 | #define MNTOPT_INO64 "ino64" /* force inodes into 64-bit range */ | ||
83 | #define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ | 81 | #define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ |
84 | #define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ | 82 | #define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ |
85 | #define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ | 83 | #define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ |
@@ -180,7 +178,7 @@ xfs_parseargs( | |||
180 | int dswidth = 0; | 178 | int dswidth = 0; |
181 | int iosize = 0; | 179 | int iosize = 0; |
182 | int dmapi_implies_ikeep = 1; | 180 | int dmapi_implies_ikeep = 1; |
183 | uchar_t iosizelog = 0; | 181 | __uint8_t iosizelog = 0; |
184 | 182 | ||
185 | /* | 183 | /* |
186 | * Copy binary VFS mount flags we are interested in. | 184 | * Copy binary VFS mount flags we are interested in. |
@@ -291,16 +289,6 @@ xfs_parseargs( | |||
291 | mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; | 289 | mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; |
292 | } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { | 290 | } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { |
293 | mp->m_flags |= XFS_MOUNT_NORECOVERY; | 291 | mp->m_flags |= XFS_MOUNT_NORECOVERY; |
294 | } else if (!strcmp(this_char, MNTOPT_INO64)) { | ||
295 | #if XFS_BIG_INUMS | ||
296 | mp->m_flags |= XFS_MOUNT_INO64; | ||
297 | mp->m_inoadd = XFS_INO64_OFFSET; | ||
298 | #else | ||
299 | cmn_err(CE_WARN, | ||
300 | "XFS: %s option not allowed on this system", | ||
301 | this_char); | ||
302 | return EINVAL; | ||
303 | #endif | ||
304 | } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { | 292 | } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { |
305 | mp->m_flags |= XFS_MOUNT_NOALIGN; | 293 | mp->m_flags |= XFS_MOUNT_NOALIGN; |
306 | } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { | 294 | } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { |
@@ -529,7 +517,6 @@ xfs_showargs( | |||
529 | /* the few simple ones we can get from the mount struct */ | 517 | /* the few simple ones we can get from the mount struct */ |
530 | { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP }, | 518 | { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP }, |
531 | { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, | 519 | { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, |
532 | { XFS_MOUNT_INO64, "," MNTOPT_INO64 }, | ||
533 | { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, | 520 | { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, |
534 | { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, | 521 | { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, |
535 | { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, | 522 | { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, |
@@ -634,7 +621,7 @@ xfs_max_file_offset( | |||
634 | return (((__uint64_t)pagefactor) << bitshift) - 1; | 621 | return (((__uint64_t)pagefactor) << bitshift) - 1; |
635 | } | 622 | } |
636 | 623 | ||
637 | int | 624 | STATIC int |
638 | xfs_blkdev_get( | 625 | xfs_blkdev_get( |
639 | xfs_mount_t *mp, | 626 | xfs_mount_t *mp, |
640 | const char *name, | 627 | const char *name, |
@@ -651,7 +638,7 @@ xfs_blkdev_get( | |||
651 | return -error; | 638 | return -error; |
652 | } | 639 | } |
653 | 640 | ||
654 | void | 641 | STATIC void |
655 | xfs_blkdev_put( | 642 | xfs_blkdev_put( |
656 | struct block_device *bdev) | 643 | struct block_device *bdev) |
657 | { | 644 | { |
@@ -872,7 +859,7 @@ xfsaild_wakeup( | |||
872 | wake_up_process(ailp->xa_task); | 859 | wake_up_process(ailp->xa_task); |
873 | } | 860 | } |
874 | 861 | ||
875 | int | 862 | STATIC int |
876 | xfsaild( | 863 | xfsaild( |
877 | void *data) | 864 | void *data) |
878 | { | 865 | { |
@@ -990,26 +977,57 @@ xfs_fs_write_inode( | |||
990 | int sync) | 977 | int sync) |
991 | { | 978 | { |
992 | struct xfs_inode *ip = XFS_I(inode); | 979 | struct xfs_inode *ip = XFS_I(inode); |
980 | struct xfs_mount *mp = ip->i_mount; | ||
993 | int error = 0; | 981 | int error = 0; |
994 | int flags = 0; | ||
995 | 982 | ||
996 | xfs_itrace_entry(ip); | 983 | xfs_itrace_entry(ip); |
984 | |||
985 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
986 | return XFS_ERROR(EIO); | ||
987 | |||
997 | if (sync) { | 988 | if (sync) { |
998 | error = xfs_wait_on_pages(ip, 0, -1); | 989 | error = xfs_wait_on_pages(ip, 0, -1); |
999 | if (error) | 990 | if (error) |
1000 | goto out_error; | 991 | goto out; |
1001 | flags |= FLUSH_SYNC; | ||
1002 | } | 992 | } |
1003 | error = xfs_inode_flush(ip, flags); | ||
1004 | 993 | ||
1005 | out_error: | 994 | /* |
995 | * Bypass inodes which have already been cleaned by | ||
996 | * the inode flush clustering code inside xfs_iflush | ||
997 | */ | ||
998 | if (xfs_inode_clean(ip)) | ||
999 | goto out; | ||
1000 | |||
1001 | /* | ||
1002 | * We make this non-blocking if the inode is contended, return | ||
1003 | * EAGAIN to indicate to the caller that they did not succeed. | ||
1004 | * This prevents the flush path from blocking on inodes inside | ||
1005 | * another operation right now, they get caught later by xfs_sync. | ||
1006 | */ | ||
1007 | if (sync) { | ||
1008 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
1009 | xfs_iflock(ip); | ||
1010 | |||
1011 | error = xfs_iflush(ip, XFS_IFLUSH_SYNC); | ||
1012 | } else { | ||
1013 | error = EAGAIN; | ||
1014 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) | ||
1015 | goto out; | ||
1016 | if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) | ||
1017 | goto out_unlock; | ||
1018 | |||
1019 | error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK); | ||
1020 | } | ||
1021 | |||
1022 | out_unlock: | ||
1023 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
1024 | out: | ||
1006 | /* | 1025 | /* |
1007 | * if we failed to write out the inode then mark | 1026 | * if we failed to write out the inode then mark |
1008 | * it dirty again so we'll try again later. | 1027 | * it dirty again so we'll try again later. |
1009 | */ | 1028 | */ |
1010 | if (error) | 1029 | if (error) |
1011 | xfs_mark_inode_dirty_sync(ip); | 1030 | xfs_mark_inode_dirty_sync(ip); |
1012 | |||
1013 | return -error; | 1031 | return -error; |
1014 | } | 1032 | } |
1015 | 1033 | ||
@@ -1169,18 +1187,12 @@ xfs_fs_statfs( | |||
1169 | statp->f_bfree = statp->f_bavail = | 1187 | statp->f_bfree = statp->f_bavail = |
1170 | sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | 1188 | sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); |
1171 | fakeinos = statp->f_bfree << sbp->sb_inopblog; | 1189 | fakeinos = statp->f_bfree << sbp->sb_inopblog; |
1172 | #if XFS_BIG_INUMS | ||
1173 | fakeinos += mp->m_inoadd; | ||
1174 | #endif | ||
1175 | statp->f_files = | 1190 | statp->f_files = |
1176 | MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); | 1191 | MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); |
1177 | if (mp->m_maxicount) | 1192 | if (mp->m_maxicount) |
1178 | #if XFS_BIG_INUMS | 1193 | statp->f_files = min_t(typeof(statp->f_files), |
1179 | if (!mp->m_inoadd) | 1194 | statp->f_files, |
1180 | #endif | 1195 | mp->m_maxicount); |
1181 | statp->f_files = min_t(typeof(statp->f_files), | ||
1182 | statp->f_files, | ||
1183 | mp->m_maxicount); | ||
1184 | statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | 1196 | statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); |
1185 | spin_unlock(&mp->m_sb_lock); | 1197 | spin_unlock(&mp->m_sb_lock); |
1186 | 1198 | ||
@@ -1302,57 +1314,6 @@ xfs_fs_show_options( | |||
1302 | return -xfs_showargs(XFS_M(mnt->mnt_sb), m); | 1314 | return -xfs_showargs(XFS_M(mnt->mnt_sb), m); |
1303 | } | 1315 | } |
1304 | 1316 | ||
1305 | STATIC int | ||
1306 | xfs_fs_quotasync( | ||
1307 | struct super_block *sb, | ||
1308 | int type) | ||
1309 | { | ||
1310 | return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XQUOTASYNC, 0, NULL); | ||
1311 | } | ||
1312 | |||
1313 | STATIC int | ||
1314 | xfs_fs_getxstate( | ||
1315 | struct super_block *sb, | ||
1316 | struct fs_quota_stat *fqs) | ||
1317 | { | ||
1318 | return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XGETQSTAT, 0, (caddr_t)fqs); | ||
1319 | } | ||
1320 | |||
1321 | STATIC int | ||
1322 | xfs_fs_setxstate( | ||
1323 | struct super_block *sb, | ||
1324 | unsigned int flags, | ||
1325 | int op) | ||
1326 | { | ||
1327 | return -XFS_QM_QUOTACTL(XFS_M(sb), op, 0, (caddr_t)&flags); | ||
1328 | } | ||
1329 | |||
1330 | STATIC int | ||
1331 | xfs_fs_getxquota( | ||
1332 | struct super_block *sb, | ||
1333 | int type, | ||
1334 | qid_t id, | ||
1335 | struct fs_disk_quota *fdq) | ||
1336 | { | ||
1337 | return -XFS_QM_QUOTACTL(XFS_M(sb), | ||
1338 | (type == USRQUOTA) ? Q_XGETQUOTA : | ||
1339 | ((type == GRPQUOTA) ? Q_XGETGQUOTA : | ||
1340 | Q_XGETPQUOTA), id, (caddr_t)fdq); | ||
1341 | } | ||
1342 | |||
1343 | STATIC int | ||
1344 | xfs_fs_setxquota( | ||
1345 | struct super_block *sb, | ||
1346 | int type, | ||
1347 | qid_t id, | ||
1348 | struct fs_disk_quota *fdq) | ||
1349 | { | ||
1350 | return -XFS_QM_QUOTACTL(XFS_M(sb), | ||
1351 | (type == USRQUOTA) ? Q_XSETQLIM : | ||
1352 | ((type == GRPQUOTA) ? Q_XSETGQLIM : | ||
1353 | Q_XSETPQLIM), id, (caddr_t)fdq); | ||
1354 | } | ||
1355 | |||
1356 | /* | 1317 | /* |
1357 | * This function fills in xfs_mount_t fields based on mount args. | 1318 | * This function fills in xfs_mount_t fields based on mount args. |
1358 | * Note: the superblock _has_ now been read in. | 1319 | * Note: the superblock _has_ now been read in. |
@@ -1435,7 +1396,9 @@ xfs_fs_fill_super( | |||
1435 | sb_min_blocksize(sb, BBSIZE); | 1396 | sb_min_blocksize(sb, BBSIZE); |
1436 | sb->s_xattr = xfs_xattr_handlers; | 1397 | sb->s_xattr = xfs_xattr_handlers; |
1437 | sb->s_export_op = &xfs_export_operations; | 1398 | sb->s_export_op = &xfs_export_operations; |
1399 | #ifdef CONFIG_XFS_QUOTA | ||
1438 | sb->s_qcop = &xfs_quotactl_operations; | 1400 | sb->s_qcop = &xfs_quotactl_operations; |
1401 | #endif | ||
1439 | sb->s_op = &xfs_super_operations; | 1402 | sb->s_op = &xfs_super_operations; |
1440 | 1403 | ||
1441 | error = xfs_dmops_get(mp); | 1404 | error = xfs_dmops_get(mp); |
@@ -1578,14 +1541,6 @@ static struct super_operations xfs_super_operations = { | |||
1578 | .show_options = xfs_fs_show_options, | 1541 | .show_options = xfs_fs_show_options, |
1579 | }; | 1542 | }; |
1580 | 1543 | ||
1581 | static struct quotactl_ops xfs_quotactl_operations = { | ||
1582 | .quota_sync = xfs_fs_quotasync, | ||
1583 | .get_xstate = xfs_fs_getxstate, | ||
1584 | .set_xstate = xfs_fs_setxstate, | ||
1585 | .get_xquota = xfs_fs_getxquota, | ||
1586 | .set_xquota = xfs_fs_setxquota, | ||
1587 | }; | ||
1588 | |||
1589 | static struct file_system_type xfs_fs_type = { | 1544 | static struct file_system_type xfs_fs_type = { |
1590 | .owner = THIS_MODULE, | 1545 | .owner = THIS_MODULE, |
1591 | .name = "xfs", | 1546 | .name = "xfs", |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index d5d776d4cd67..5a2ea3a21781 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -93,6 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | |||
93 | 93 | ||
94 | extern const struct export_operations xfs_export_operations; | 94 | extern const struct export_operations xfs_export_operations; |
95 | extern struct xattr_handler *xfs_xattr_handlers[]; | 95 | extern struct xattr_handler *xfs_xattr_handlers[]; |
96 | extern struct quotactl_ops xfs_quotactl_operations; | ||
96 | 97 | ||
97 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 98 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
98 | 99 | ||
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 5f6de1efe1f6..04f058c848ae 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #define XFS_SYNC_H 1 | 19 | #define XFS_SYNC_H 1 |
20 | 20 | ||
21 | struct xfs_mount; | 21 | struct xfs_mount; |
22 | struct xfs_perag; | ||
22 | 23 | ||
23 | typedef struct bhv_vfs_sync_work { | 24 | typedef struct bhv_vfs_sync_work { |
24 | struct list_head w_list; | 25 | struct list_head w_list; |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index f65983a230d3..ad7fbead4c97 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h | |||
@@ -41,11 +41,6 @@ struct attrlist_cursor_kern; | |||
41 | #define IO_INVIS 0x00020 /* don't update inode timestamps */ | 41 | #define IO_INVIS 0x00020 /* don't update inode timestamps */ |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Flags for xfs_inode_flush | ||
45 | */ | ||
46 | #define FLUSH_SYNC 1 /* wait for flush to complete */ | ||
47 | |||
48 | /* | ||
49 | * Flush/Invalidate options for vop_toss/flush/flushinval_pages. | 44 | * Flush/Invalidate options for vop_toss/flush/flushinval_pages. |
50 | */ | 45 | */ |
51 | #define FI_NONE 0 /* none */ | 46 | #define FI_NONE 0 /* none */ |
@@ -55,33 +50,6 @@ struct attrlist_cursor_kern; | |||
55 | the operation completes. */ | 50 | the operation completes. */ |
56 | 51 | ||
57 | /* | 52 | /* |
58 | * Dealing with bad inodes | ||
59 | */ | ||
60 | static inline int VN_BAD(struct inode *vp) | ||
61 | { | ||
62 | return is_bad_inode(vp); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Extracting atime values in various formats | ||
67 | */ | ||
68 | static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime) | ||
69 | { | ||
70 | bs_atime->tv_sec = vp->i_atime.tv_sec; | ||
71 | bs_atime->tv_nsec = vp->i_atime.tv_nsec; | ||
72 | } | ||
73 | |||
74 | static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts) | ||
75 | { | ||
76 | *ts = vp->i_atime; | ||
77 | } | ||
78 | |||
79 | static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt) | ||
80 | { | ||
81 | *tt = vp->i_atime.tv_sec; | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * Some useful predicates. | 53 | * Some useful predicates. |
86 | */ | 54 | */ |
87 | #define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) | 55 | #define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) |
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 6543c0b29753..e4babcc63423 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -804,7 +804,7 @@ xfs_qm_dqlookup( | |||
804 | uint flist_locked; | 804 | uint flist_locked; |
805 | xfs_dquot_t *d; | 805 | xfs_dquot_t *d; |
806 | 806 | ||
807 | ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); | 807 | ASSERT(mutex_is_locked(&qh->qh_lock)); |
808 | 808 | ||
809 | flist_locked = B_FALSE; | 809 | flist_locked = B_FALSE; |
810 | 810 | ||
@@ -877,7 +877,7 @@ xfs_qm_dqlookup( | |||
877 | /* | 877 | /* |
878 | * move the dquot to the front of the hashchain | 878 | * move the dquot to the front of the hashchain |
879 | */ | 879 | */ |
880 | ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); | 880 | ASSERT(mutex_is_locked(&qh->qh_lock)); |
881 | if (dqp->HL_PREVP != &qh->qh_next) { | 881 | if (dqp->HL_PREVP != &qh->qh_next) { |
882 | xfs_dqtrace_entry(dqp, | 882 | xfs_dqtrace_entry(dqp, |
883 | "DQLOOKUP: HASH MOVETOFRONT"); | 883 | "DQLOOKUP: HASH MOVETOFRONT"); |
@@ -892,13 +892,13 @@ xfs_qm_dqlookup( | |||
892 | } | 892 | } |
893 | xfs_dqtrace_entry(dqp, "LOOKUP END"); | 893 | xfs_dqtrace_entry(dqp, "LOOKUP END"); |
894 | *O_dqpp = dqp; | 894 | *O_dqpp = dqp; |
895 | ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); | 895 | ASSERT(mutex_is_locked(&qh->qh_lock)); |
896 | return (0); | 896 | return (0); |
897 | } | 897 | } |
898 | } | 898 | } |
899 | 899 | ||
900 | *O_dqpp = NULL; | 900 | *O_dqpp = NULL; |
901 | ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); | 901 | ASSERT(mutex_is_locked(&qh->qh_lock)); |
902 | return (1); | 902 | return (1); |
903 | } | 903 | } |
904 | 904 | ||
@@ -956,7 +956,7 @@ xfs_qm_dqget( | |||
956 | ASSERT(ip->i_gdquot == NULL); | 956 | ASSERT(ip->i_gdquot == NULL); |
957 | } | 957 | } |
958 | #endif | 958 | #endif |
959 | XFS_DQ_HASH_LOCK(h); | 959 | mutex_lock(&h->qh_lock); |
960 | 960 | ||
961 | /* | 961 | /* |
962 | * Look in the cache (hashtable). | 962 | * Look in the cache (hashtable). |
@@ -971,7 +971,7 @@ xfs_qm_dqget( | |||
971 | */ | 971 | */ |
972 | ASSERT(*O_dqpp); | 972 | ASSERT(*O_dqpp); |
973 | ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); | 973 | ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); |
974 | XFS_DQ_HASH_UNLOCK(h); | 974 | mutex_unlock(&h->qh_lock); |
975 | xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)"); | 975 | xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)"); |
976 | return (0); /* success */ | 976 | return (0); /* success */ |
977 | } | 977 | } |
@@ -991,7 +991,7 @@ xfs_qm_dqget( | |||
991 | * we don't keep the lock across a disk read | 991 | * we don't keep the lock across a disk read |
992 | */ | 992 | */ |
993 | version = h->qh_version; | 993 | version = h->qh_version; |
994 | XFS_DQ_HASH_UNLOCK(h); | 994 | mutex_unlock(&h->qh_lock); |
995 | 995 | ||
996 | /* | 996 | /* |
997 | * Allocate the dquot on the kernel heap, and read the ondisk | 997 | * Allocate the dquot on the kernel heap, and read the ondisk |
@@ -1056,7 +1056,7 @@ xfs_qm_dqget( | |||
1056 | /* | 1056 | /* |
1057 | * Hashlock comes after ilock in lock order | 1057 | * Hashlock comes after ilock in lock order |
1058 | */ | 1058 | */ |
1059 | XFS_DQ_HASH_LOCK(h); | 1059 | mutex_lock(&h->qh_lock); |
1060 | if (version != h->qh_version) { | 1060 | if (version != h->qh_version) { |
1061 | xfs_dquot_t *tmpdqp; | 1061 | xfs_dquot_t *tmpdqp; |
1062 | /* | 1062 | /* |
@@ -1072,7 +1072,7 @@ xfs_qm_dqget( | |||
1072 | * and start over. | 1072 | * and start over. |
1073 | */ | 1073 | */ |
1074 | xfs_qm_dqput(tmpdqp); | 1074 | xfs_qm_dqput(tmpdqp); |
1075 | XFS_DQ_HASH_UNLOCK(h); | 1075 | mutex_unlock(&h->qh_lock); |
1076 | xfs_qm_dqdestroy(dqp); | 1076 | xfs_qm_dqdestroy(dqp); |
1077 | XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); | 1077 | XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); |
1078 | goto again; | 1078 | goto again; |
@@ -1083,7 +1083,7 @@ xfs_qm_dqget( | |||
1083 | * Put the dquot at the beginning of the hash-chain and mp's list | 1083 | * Put the dquot at the beginning of the hash-chain and mp's list |
1084 | * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. | 1084 | * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. |
1085 | */ | 1085 | */ |
1086 | ASSERT(XFS_DQ_IS_HASH_LOCKED(h)); | 1086 | ASSERT(mutex_is_locked(&h->qh_lock)); |
1087 | dqp->q_hash = h; | 1087 | dqp->q_hash = h; |
1088 | XQM_HASHLIST_INSERT(h, dqp); | 1088 | XQM_HASHLIST_INSERT(h, dqp); |
1089 | 1089 | ||
@@ -1102,7 +1102,7 @@ xfs_qm_dqget( | |||
1102 | XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp); | 1102 | XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp); |
1103 | 1103 | ||
1104 | xfs_qm_mplist_unlock(mp); | 1104 | xfs_qm_mplist_unlock(mp); |
1105 | XFS_DQ_HASH_UNLOCK(h); | 1105 | mutex_unlock(&h->qh_lock); |
1106 | dqret: | 1106 | dqret: |
1107 | ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 1107 | ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
1108 | xfs_dqtrace_entry(dqp, "DQGET DONE"); | 1108 | xfs_dqtrace_entry(dqp, "DQGET DONE"); |
@@ -1440,7 +1440,7 @@ xfs_qm_dqpurge( | |||
1440 | xfs_mount_t *mp = dqp->q_mount; | 1440 | xfs_mount_t *mp = dqp->q_mount; |
1441 | 1441 | ||
1442 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); | 1442 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); |
1443 | ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); | 1443 | ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); |
1444 | 1444 | ||
1445 | xfs_dqlock(dqp); | 1445 | xfs_dqlock(dqp); |
1446 | /* | 1446 | /* |
@@ -1453,7 +1453,7 @@ xfs_qm_dqpurge( | |||
1453 | */ | 1453 | */ |
1454 | if (dqp->q_nrefs != 0) { | 1454 | if (dqp->q_nrefs != 0) { |
1455 | xfs_dqunlock(dqp); | 1455 | xfs_dqunlock(dqp); |
1456 | XFS_DQ_HASH_UNLOCK(dqp->q_hash); | 1456 | mutex_unlock(&dqp->q_hash->qh_lock); |
1457 | return (1); | 1457 | return (1); |
1458 | } | 1458 | } |
1459 | 1459 | ||
@@ -1517,7 +1517,7 @@ xfs_qm_dqpurge( | |||
1517 | memset(&dqp->q_core, 0, sizeof(dqp->q_core)); | 1517 | memset(&dqp->q_core, 0, sizeof(dqp->q_core)); |
1518 | xfs_dqfunlock(dqp); | 1518 | xfs_dqfunlock(dqp); |
1519 | xfs_dqunlock(dqp); | 1519 | xfs_dqunlock(dqp); |
1520 | XFS_DQ_HASH_UNLOCK(thishash); | 1520 | mutex_unlock(&thishash->qh_lock); |
1521 | return (0); | 1521 | return (0); |
1522 | } | 1522 | } |
1523 | 1523 | ||
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index d443e93b4331..de0f402ddb4c 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h | |||
@@ -34,7 +34,7 @@ | |||
34 | */ | 34 | */ |
35 | typedef struct xfs_dqhash { | 35 | typedef struct xfs_dqhash { |
36 | struct xfs_dquot *qh_next; | 36 | struct xfs_dquot *qh_next; |
37 | mutex_t qh_lock; | 37 | struct mutex qh_lock; |
38 | uint qh_version; /* ever increasing version */ | 38 | uint qh_version; /* ever increasing version */ |
39 | uint qh_nelems; /* number of dquots on the list */ | 39 | uint qh_nelems; /* number of dquots on the list */ |
40 | } xfs_dqhash_t; | 40 | } xfs_dqhash_t; |
@@ -81,7 +81,7 @@ typedef struct xfs_dquot { | |||
81 | xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ | 81 | xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ |
82 | xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ | 82 | xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ |
83 | xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ | 83 | xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ |
84 | mutex_t q_qlock; /* quota lock */ | 84 | struct mutex q_qlock; /* quota lock */ |
85 | struct completion q_flush; /* flush completion queue */ | 85 | struct completion q_flush; /* flush completion queue */ |
86 | atomic_t q_pincount; /* dquot pin count */ | 86 | atomic_t q_pincount; /* dquot pin count */ |
87 | wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ | 87 | wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ |
@@ -109,19 +109,6 @@ enum { | |||
109 | 109 | ||
110 | #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) | 110 | #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) |
111 | 111 | ||
112 | #ifdef DEBUG | ||
113 | static inline int | ||
114 | XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) | ||
115 | { | ||
116 | if (mutex_trylock(&dqp->q_qlock)) { | ||
117 | mutex_unlock(&dqp->q_qlock); | ||
118 | return 0; | ||
119 | } | ||
120 | return 1; | ||
121 | } | ||
122 | #endif | ||
123 | |||
124 | |||
125 | /* | 112 | /* |
126 | * Manage the q_flush completion queue embedded in the dquot. This completion | 113 | * Manage the q_flush completion queue embedded in the dquot. This completion |
127 | * queue synchronizes processes attempting to flush the in-core dquot back to | 114 | * queue synchronizes processes attempting to flush the in-core dquot back to |
@@ -142,6 +129,7 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp) | |||
142 | complete(&dqp->q_flush); | 129 | complete(&dqp->q_flush); |
143 | } | 130 | } |
144 | 131 | ||
132 | #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) | ||
145 | #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) | 133 | #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) |
146 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) | 134 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) |
147 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) | 135 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 7a2beb64314f..5b6695049e00 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -55,7 +55,7 @@ | |||
55 | * quota functionality, including maintaining the freelist and hash | 55 | * quota functionality, including maintaining the freelist and hash |
56 | * tables of dquots. | 56 | * tables of dquots. |
57 | */ | 57 | */ |
58 | mutex_t xfs_Gqm_lock; | 58 | struct mutex xfs_Gqm_lock; |
59 | struct xfs_qm *xfs_Gqm; | 59 | struct xfs_qm *xfs_Gqm; |
60 | uint ndquot; | 60 | uint ndquot; |
61 | 61 | ||
@@ -69,8 +69,6 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); | |||
69 | 69 | ||
70 | STATIC void xfs_qm_freelist_init(xfs_frlist_t *); | 70 | STATIC void xfs_qm_freelist_init(xfs_frlist_t *); |
71 | STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *); | 71 | STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *); |
72 | STATIC int xfs_qm_mplist_nowait(xfs_mount_t *); | ||
73 | STATIC int xfs_qm_dqhashlock_nowait(xfs_dquot_t *); | ||
74 | 72 | ||
75 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 73 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
76 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 74 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
@@ -82,7 +80,7 @@ static struct shrinker xfs_qm_shaker = { | |||
82 | }; | 80 | }; |
83 | 81 | ||
84 | #ifdef DEBUG | 82 | #ifdef DEBUG |
85 | extern mutex_t qcheck_lock; | 83 | extern struct mutex qcheck_lock; |
86 | #endif | 84 | #endif |
87 | 85 | ||
88 | #ifdef QUOTADEBUG | 86 | #ifdef QUOTADEBUG |
@@ -219,7 +217,7 @@ xfs_qm_hold_quotafs_ref( | |||
219 | * the structure could disappear between the entry to this routine and | 217 | * the structure could disappear between the entry to this routine and |
220 | * a HOLD operation if not locked. | 218 | * a HOLD operation if not locked. |
221 | */ | 219 | */ |
222 | XFS_QM_LOCK(xfs_Gqm); | 220 | mutex_lock(&xfs_Gqm_lock); |
223 | 221 | ||
224 | if (xfs_Gqm == NULL) | 222 | if (xfs_Gqm == NULL) |
225 | xfs_Gqm = xfs_Gqm_init(); | 223 | xfs_Gqm = xfs_Gqm_init(); |
@@ -228,8 +226,8 @@ xfs_qm_hold_quotafs_ref( | |||
228 | * debugging and statistical purposes, but ... | 226 | * debugging and statistical purposes, but ... |
229 | * Just take a reference and get out. | 227 | * Just take a reference and get out. |
230 | */ | 228 | */ |
231 | XFS_QM_HOLD(xfs_Gqm); | 229 | xfs_Gqm->qm_nrefs++; |
232 | XFS_QM_UNLOCK(xfs_Gqm); | 230 | mutex_unlock(&xfs_Gqm_lock); |
233 | 231 | ||
234 | return 0; | 232 | return 0; |
235 | } | 233 | } |
@@ -277,13 +275,12 @@ xfs_qm_rele_quotafs_ref( | |||
277 | * Destroy the entire XQM. If somebody mounts with quotaon, this'll | 275 | * Destroy the entire XQM. If somebody mounts with quotaon, this'll |
278 | * be restarted. | 276 | * be restarted. |
279 | */ | 277 | */ |
280 | XFS_QM_LOCK(xfs_Gqm); | 278 | mutex_lock(&xfs_Gqm_lock); |
281 | XFS_QM_RELE(xfs_Gqm); | 279 | if (--xfs_Gqm->qm_nrefs == 0) { |
282 | if (xfs_Gqm->qm_nrefs == 0) { | ||
283 | xfs_qm_destroy(xfs_Gqm); | 280 | xfs_qm_destroy(xfs_Gqm); |
284 | xfs_Gqm = NULL; | 281 | xfs_Gqm = NULL; |
285 | } | 282 | } |
286 | XFS_QM_UNLOCK(xfs_Gqm); | 283 | mutex_unlock(&xfs_Gqm_lock); |
287 | } | 284 | } |
288 | 285 | ||
289 | /* | 286 | /* |
@@ -577,10 +574,10 @@ xfs_qm_dqpurge_int( | |||
577 | continue; | 574 | continue; |
578 | } | 575 | } |
579 | 576 | ||
580 | if (! xfs_qm_dqhashlock_nowait(dqp)) { | 577 | if (!mutex_trylock(&dqp->q_hash->qh_lock)) { |
581 | nrecl = XFS_QI_MPLRECLAIMS(mp); | 578 | nrecl = XFS_QI_MPLRECLAIMS(mp); |
582 | xfs_qm_mplist_unlock(mp); | 579 | xfs_qm_mplist_unlock(mp); |
583 | XFS_DQ_HASH_LOCK(dqp->q_hash); | 580 | mutex_lock(&dqp->q_hash->qh_lock); |
584 | xfs_qm_mplist_lock(mp); | 581 | xfs_qm_mplist_lock(mp); |
585 | 582 | ||
586 | /* | 583 | /* |
@@ -590,7 +587,7 @@ xfs_qm_dqpurge_int( | |||
590 | * this point, but somebody might be taking things off. | 587 | * this point, but somebody might be taking things off. |
591 | */ | 588 | */ |
592 | if (nrecl != XFS_QI_MPLRECLAIMS(mp)) { | 589 | if (nrecl != XFS_QI_MPLRECLAIMS(mp)) { |
593 | XFS_DQ_HASH_UNLOCK(dqp->q_hash); | 590 | mutex_unlock(&dqp->q_hash->qh_lock); |
594 | goto again; | 591 | goto again; |
595 | } | 592 | } |
596 | } | 593 | } |
@@ -632,7 +629,6 @@ xfs_qm_dqattach_one( | |||
632 | xfs_dqid_t id, | 629 | xfs_dqid_t id, |
633 | uint type, | 630 | uint type, |
634 | uint doalloc, | 631 | uint doalloc, |
635 | uint dolock, | ||
636 | xfs_dquot_t *udqhint, /* hint */ | 632 | xfs_dquot_t *udqhint, /* hint */ |
637 | xfs_dquot_t **IO_idqpp) | 633 | xfs_dquot_t **IO_idqpp) |
638 | { | 634 | { |
@@ -641,16 +637,16 @@ xfs_qm_dqattach_one( | |||
641 | 637 | ||
642 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 638 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
643 | error = 0; | 639 | error = 0; |
640 | |||
644 | /* | 641 | /* |
645 | * See if we already have it in the inode itself. IO_idqpp is | 642 | * See if we already have it in the inode itself. IO_idqpp is |
646 | * &i_udquot or &i_gdquot. This made the code look weird, but | 643 | * &i_udquot or &i_gdquot. This made the code look weird, but |
647 | * made the logic a lot simpler. | 644 | * made the logic a lot simpler. |
648 | */ | 645 | */ |
649 | if ((dqp = *IO_idqpp)) { | 646 | dqp = *IO_idqpp; |
650 | if (dolock) | 647 | if (dqp) { |
651 | xfs_dqlock(dqp); | ||
652 | xfs_dqtrace_entry(dqp, "DQATTACH: found in ip"); | 648 | xfs_dqtrace_entry(dqp, "DQATTACH: found in ip"); |
653 | goto done; | 649 | return 0; |
654 | } | 650 | } |
655 | 651 | ||
656 | /* | 652 | /* |
@@ -659,38 +655,38 @@ xfs_qm_dqattach_one( | |||
659 | * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside | 655 | * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside |
660 | * the user dquot. | 656 | * the user dquot. |
661 | */ | 657 | */ |
662 | ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); | 658 | if (udqhint) { |
663 | if (udqhint && !dolock) | 659 | ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); |
664 | xfs_dqlock(udqhint); | 660 | xfs_dqlock(udqhint); |
665 | 661 | ||
666 | /* | 662 | /* |
667 | * No need to take dqlock to look at the id. | 663 | * No need to take dqlock to look at the id. |
668 | * The ID can't change until it gets reclaimed, and it won't | 664 | * |
669 | * be reclaimed as long as we have a ref from inode and we hold | 665 | * The ID can't change until it gets reclaimed, and it won't |
670 | * the ilock. | 666 | * be reclaimed as long as we have a ref from inode and we |
671 | */ | 667 | * hold the ilock. |
672 | if (udqhint && | 668 | */ |
673 | (dqp = udqhint->q_gdquot) && | 669 | dqp = udqhint->q_gdquot; |
674 | (be32_to_cpu(dqp->q_core.d_id) == id)) { | 670 | if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { |
675 | ASSERT(XFS_DQ_IS_LOCKED(udqhint)); | 671 | xfs_dqlock(dqp); |
676 | xfs_dqlock(dqp); | 672 | XFS_DQHOLD(dqp); |
677 | XFS_DQHOLD(dqp); | 673 | ASSERT(*IO_idqpp == NULL); |
678 | ASSERT(*IO_idqpp == NULL); | 674 | *IO_idqpp = dqp; |
679 | *IO_idqpp = dqp; | 675 | |
680 | if (!dolock) { | ||
681 | xfs_dqunlock(dqp); | 676 | xfs_dqunlock(dqp); |
682 | xfs_dqunlock(udqhint); | 677 | xfs_dqunlock(udqhint); |
678 | return 0; | ||
683 | } | 679 | } |
684 | goto done; | 680 | |
685 | } | 681 | /* |
686 | /* | 682 | * We can't hold a dquot lock when we call the dqget code. |
687 | * We can't hold a dquot lock when we call the dqget code. | 683 | * We'll deadlock in no time, because of (not conforming to) |
688 | * We'll deadlock in no time, because of (not conforming to) | 684 | * lock ordering - the inodelock comes before any dquot lock, |
689 | * lock ordering - the inodelock comes before any dquot lock, | 685 | * and we may drop and reacquire the ilock in xfs_qm_dqget(). |
690 | * and we may drop and reacquire the ilock in xfs_qm_dqget(). | 686 | */ |
691 | */ | ||
692 | if (udqhint) | ||
693 | xfs_dqunlock(udqhint); | 687 | xfs_dqunlock(udqhint); |
688 | } | ||
689 | |||
694 | /* | 690 | /* |
695 | * Find the dquot from somewhere. This bumps the | 691 | * Find the dquot from somewhere. This bumps the |
696 | * reference count of dquot and returns it locked. | 692 | * reference count of dquot and returns it locked. |
@@ -698,48 +694,19 @@ xfs_qm_dqattach_one( | |||
698 | * disk and we didn't ask it to allocate; | 694 | * disk and we didn't ask it to allocate; |
699 | * ESRCH if quotas got turned off suddenly. | 695 | * ESRCH if quotas got turned off suddenly. |
700 | */ | 696 | */ |
701 | if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type, | 697 | error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp); |
702 | doalloc|XFS_QMOPT_DOWARN, &dqp))) { | 698 | if (error) |
703 | if (udqhint && dolock) | 699 | return error; |
704 | xfs_dqlock(udqhint); | ||
705 | goto done; | ||
706 | } | ||
707 | 700 | ||
708 | xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget"); | 701 | xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget"); |
702 | |||
709 | /* | 703 | /* |
710 | * dqget may have dropped and re-acquired the ilock, but it guarantees | 704 | * dqget may have dropped and re-acquired the ilock, but it guarantees |
711 | * that the dquot returned is the one that should go in the inode. | 705 | * that the dquot returned is the one that should go in the inode. |
712 | */ | 706 | */ |
713 | *IO_idqpp = dqp; | 707 | *IO_idqpp = dqp; |
714 | ASSERT(dqp); | 708 | xfs_dqunlock(dqp); |
715 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 709 | return 0; |
716 | if (! dolock) { | ||
717 | xfs_dqunlock(dqp); | ||
718 | goto done; | ||
719 | } | ||
720 | if (! udqhint) | ||
721 | goto done; | ||
722 | |||
723 | ASSERT(udqhint); | ||
724 | ASSERT(dolock); | ||
725 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
726 | if (! xfs_qm_dqlock_nowait(udqhint)) { | ||
727 | xfs_dqunlock(dqp); | ||
728 | xfs_dqlock(udqhint); | ||
729 | xfs_dqlock(dqp); | ||
730 | } | ||
731 | done: | ||
732 | #ifdef QUOTADEBUG | ||
733 | if (udqhint) { | ||
734 | if (dolock) | ||
735 | ASSERT(XFS_DQ_IS_LOCKED(udqhint)); | ||
736 | } | ||
737 | if (! error) { | ||
738 | if (dolock) | ||
739 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
740 | } | ||
741 | #endif | ||
742 | return error; | ||
743 | } | 710 | } |
744 | 711 | ||
745 | 712 | ||
@@ -754,24 +721,15 @@ xfs_qm_dqattach_one( | |||
754 | STATIC void | 721 | STATIC void |
755 | xfs_qm_dqattach_grouphint( | 722 | xfs_qm_dqattach_grouphint( |
756 | xfs_dquot_t *udq, | 723 | xfs_dquot_t *udq, |
757 | xfs_dquot_t *gdq, | 724 | xfs_dquot_t *gdq) |
758 | uint locked) | ||
759 | { | 725 | { |
760 | xfs_dquot_t *tmp; | 726 | xfs_dquot_t *tmp; |
761 | 727 | ||
762 | #ifdef QUOTADEBUG | 728 | xfs_dqlock(udq); |
763 | if (locked) { | ||
764 | ASSERT(XFS_DQ_IS_LOCKED(udq)); | ||
765 | ASSERT(XFS_DQ_IS_LOCKED(gdq)); | ||
766 | } | ||
767 | #endif | ||
768 | if (! locked) | ||
769 | xfs_dqlock(udq); | ||
770 | 729 | ||
771 | if ((tmp = udq->q_gdquot)) { | 730 | if ((tmp = udq->q_gdquot)) { |
772 | if (tmp == gdq) { | 731 | if (tmp == gdq) { |
773 | if (! locked) | 732 | xfs_dqunlock(udq); |
774 | xfs_dqunlock(udq); | ||
775 | return; | 733 | return; |
776 | } | 734 | } |
777 | 735 | ||
@@ -781,8 +739,6 @@ xfs_qm_dqattach_grouphint( | |||
781 | * because the freelist lock comes before dqlocks. | 739 | * because the freelist lock comes before dqlocks. |
782 | */ | 740 | */ |
783 | xfs_dqunlock(udq); | 741 | xfs_dqunlock(udq); |
784 | if (locked) | ||
785 | xfs_dqunlock(gdq); | ||
786 | /* | 742 | /* |
787 | * we took a hard reference once upon a time in dqget, | 743 | * we took a hard reference once upon a time in dqget, |
788 | * so give it back when the udquot no longer points at it | 744 | * so give it back when the udquot no longer points at it |
@@ -795,9 +751,7 @@ xfs_qm_dqattach_grouphint( | |||
795 | 751 | ||
796 | } else { | 752 | } else { |
797 | ASSERT(XFS_DQ_IS_LOCKED(udq)); | 753 | ASSERT(XFS_DQ_IS_LOCKED(udq)); |
798 | if (! locked) { | 754 | xfs_dqlock(gdq); |
799 | xfs_dqlock(gdq); | ||
800 | } | ||
801 | } | 755 | } |
802 | 756 | ||
803 | ASSERT(XFS_DQ_IS_LOCKED(udq)); | 757 | ASSERT(XFS_DQ_IS_LOCKED(udq)); |
@@ -810,10 +764,9 @@ xfs_qm_dqattach_grouphint( | |||
810 | XFS_DQHOLD(gdq); | 764 | XFS_DQHOLD(gdq); |
811 | udq->q_gdquot = gdq; | 765 | udq->q_gdquot = gdq; |
812 | } | 766 | } |
813 | if (! locked) { | 767 | |
814 | xfs_dqunlock(gdq); | 768 | xfs_dqunlock(gdq); |
815 | xfs_dqunlock(udq); | 769 | xfs_dqunlock(udq); |
816 | } | ||
817 | } | 770 | } |
818 | 771 | ||
819 | 772 | ||
@@ -821,8 +774,6 @@ xfs_qm_dqattach_grouphint( | |||
821 | * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON | 774 | * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON |
822 | * into account. | 775 | * into account. |
823 | * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. | 776 | * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. |
824 | * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty | ||
825 | * much made this code a complete mess, but it has been pretty useful. | ||
826 | * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL. | 777 | * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL. |
827 | * Inode may get unlocked and relocked in here, and the caller must deal with | 778 | * Inode may get unlocked and relocked in here, and the caller must deal with |
828 | * the consequences. | 779 | * the consequences. |
@@ -851,7 +802,6 @@ xfs_qm_dqattach( | |||
851 | if (XFS_IS_UQUOTA_ON(mp)) { | 802 | if (XFS_IS_UQUOTA_ON(mp)) { |
852 | error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, | 803 | error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, |
853 | flags & XFS_QMOPT_DQALLOC, | 804 | flags & XFS_QMOPT_DQALLOC, |
854 | flags & XFS_QMOPT_DQLOCK, | ||
855 | NULL, &ip->i_udquot); | 805 | NULL, &ip->i_udquot); |
856 | if (error) | 806 | if (error) |
857 | goto done; | 807 | goto done; |
@@ -863,11 +813,9 @@ xfs_qm_dqattach( | |||
863 | error = XFS_IS_GQUOTA_ON(mp) ? | 813 | error = XFS_IS_GQUOTA_ON(mp) ? |
864 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, | 814 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, |
865 | flags & XFS_QMOPT_DQALLOC, | 815 | flags & XFS_QMOPT_DQALLOC, |
866 | flags & XFS_QMOPT_DQLOCK, | ||
867 | ip->i_udquot, &ip->i_gdquot) : | 816 | ip->i_udquot, &ip->i_gdquot) : |
868 | xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, | 817 | xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, |
869 | flags & XFS_QMOPT_DQALLOC, | 818 | flags & XFS_QMOPT_DQALLOC, |
870 | flags & XFS_QMOPT_DQLOCK, | ||
871 | ip->i_udquot, &ip->i_gdquot); | 819 | ip->i_udquot, &ip->i_gdquot); |
872 | /* | 820 | /* |
873 | * Don't worry about the udquot that we may have | 821 | * Don't worry about the udquot that we may have |
@@ -898,22 +846,13 @@ xfs_qm_dqattach( | |||
898 | /* | 846 | /* |
899 | * Attach i_gdquot to the gdquot hint inside the i_udquot. | 847 | * Attach i_gdquot to the gdquot hint inside the i_udquot. |
900 | */ | 848 | */ |
901 | xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot, | 849 | xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); |
902 | flags & XFS_QMOPT_DQLOCK); | ||
903 | } | 850 | } |
904 | 851 | ||
905 | done: | 852 | done: |
906 | 853 | ||
907 | #ifdef QUOTADEBUG | 854 | #ifdef QUOTADEBUG |
908 | if (! error) { | 855 | if (! error) { |
909 | if (ip->i_udquot) { | ||
910 | if (flags & XFS_QMOPT_DQLOCK) | ||
911 | ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot)); | ||
912 | } | ||
913 | if (ip->i_gdquot) { | ||
914 | if (flags & XFS_QMOPT_DQLOCK) | ||
915 | ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot)); | ||
916 | } | ||
917 | if (XFS_IS_UQUOTA_ON(mp)) | 856 | if (XFS_IS_UQUOTA_ON(mp)) |
918 | ASSERT(ip->i_udquot); | 857 | ASSERT(ip->i_udquot); |
919 | if (XFS_IS_OQUOTA_ON(mp)) | 858 | if (XFS_IS_OQUOTA_ON(mp)) |
@@ -2086,7 +2025,7 @@ xfs_qm_shake_freelist( | |||
2086 | * a dqlookup process that holds the hashlock that is | 2025 | * a dqlookup process that holds the hashlock that is |
2087 | * waiting for the freelist lock. | 2026 | * waiting for the freelist lock. |
2088 | */ | 2027 | */ |
2089 | if (! xfs_qm_dqhashlock_nowait(dqp)) { | 2028 | if (!mutex_trylock(&dqp->q_hash->qh_lock)) { |
2090 | xfs_dqfunlock(dqp); | 2029 | xfs_dqfunlock(dqp); |
2091 | xfs_dqunlock(dqp); | 2030 | xfs_dqunlock(dqp); |
2092 | dqp = dqp->dq_flnext; | 2031 | dqp = dqp->dq_flnext; |
@@ -2103,7 +2042,7 @@ xfs_qm_shake_freelist( | |||
2103 | /* XXX put a sentinel so that we can come back here */ | 2042 | /* XXX put a sentinel so that we can come back here */ |
2104 | xfs_dqfunlock(dqp); | 2043 | xfs_dqfunlock(dqp); |
2105 | xfs_dqunlock(dqp); | 2044 | xfs_dqunlock(dqp); |
2106 | XFS_DQ_HASH_UNLOCK(hash); | 2045 | mutex_unlock(&hash->qh_lock); |
2107 | xfs_qm_freelist_unlock(xfs_Gqm); | 2046 | xfs_qm_freelist_unlock(xfs_Gqm); |
2108 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | 2047 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) |
2109 | return nreclaimed; | 2048 | return nreclaimed; |
@@ -2120,7 +2059,7 @@ xfs_qm_shake_freelist( | |||
2120 | XQM_HASHLIST_REMOVE(hash, dqp); | 2059 | XQM_HASHLIST_REMOVE(hash, dqp); |
2121 | xfs_dqfunlock(dqp); | 2060 | xfs_dqfunlock(dqp); |
2122 | xfs_qm_mplist_unlock(dqp->q_mount); | 2061 | xfs_qm_mplist_unlock(dqp->q_mount); |
2123 | XFS_DQ_HASH_UNLOCK(hash); | 2062 | mutex_unlock(&hash->qh_lock); |
2124 | 2063 | ||
2125 | off_freelist: | 2064 | off_freelist: |
2126 | XQM_FREELIST_REMOVE(dqp); | 2065 | XQM_FREELIST_REMOVE(dqp); |
@@ -2262,7 +2201,7 @@ xfs_qm_dqreclaim_one(void) | |||
2262 | continue; | 2201 | continue; |
2263 | } | 2202 | } |
2264 | 2203 | ||
2265 | if (! xfs_qm_dqhashlock_nowait(dqp)) | 2204 | if (!mutex_trylock(&dqp->q_hash->qh_lock)) |
2266 | goto mplistunlock; | 2205 | goto mplistunlock; |
2267 | 2206 | ||
2268 | ASSERT(dqp->q_nrefs == 0); | 2207 | ASSERT(dqp->q_nrefs == 0); |
@@ -2271,7 +2210,7 @@ xfs_qm_dqreclaim_one(void) | |||
2271 | XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); | 2210 | XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); |
2272 | XQM_FREELIST_REMOVE(dqp); | 2211 | XQM_FREELIST_REMOVE(dqp); |
2273 | dqpout = dqp; | 2212 | dqpout = dqp; |
2274 | XFS_DQ_HASH_UNLOCK(dqp->q_hash); | 2213 | mutex_unlock(&dqp->q_hash->qh_lock); |
2275 | mplistunlock: | 2214 | mplistunlock: |
2276 | xfs_qm_mplist_unlock(dqp->q_mount); | 2215 | xfs_qm_mplist_unlock(dqp->q_mount); |
2277 | xfs_dqfunlock(dqp); | 2216 | xfs_dqfunlock(dqp); |
@@ -2774,34 +2713,3 @@ xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq) | |||
2774 | { | 2713 | { |
2775 | xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq); | 2714 | xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq); |
2776 | } | 2715 | } |
2777 | |||
2778 | STATIC int | ||
2779 | xfs_qm_dqhashlock_nowait( | ||
2780 | xfs_dquot_t *dqp) | ||
2781 | { | ||
2782 | int locked; | ||
2783 | |||
2784 | locked = mutex_trylock(&((dqp)->q_hash->qh_lock)); | ||
2785 | return locked; | ||
2786 | } | ||
2787 | |||
2788 | int | ||
2789 | xfs_qm_freelist_lock_nowait( | ||
2790 | xfs_qm_t *xqm) | ||
2791 | { | ||
2792 | int locked; | ||
2793 | |||
2794 | locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock)); | ||
2795 | return locked; | ||
2796 | } | ||
2797 | |||
2798 | STATIC int | ||
2799 | xfs_qm_mplist_nowait( | ||
2800 | xfs_mount_t *mp) | ||
2801 | { | ||
2802 | int locked; | ||
2803 | |||
2804 | ASSERT(mp->m_quotainfo); | ||
2805 | locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp))); | ||
2806 | return locked; | ||
2807 | } | ||
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index ddf09166387c..a371954cae1b 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
@@ -27,7 +27,7 @@ struct xfs_qm; | |||
27 | struct xfs_inode; | 27 | struct xfs_inode; |
28 | 28 | ||
29 | extern uint ndquot; | 29 | extern uint ndquot; |
30 | extern mutex_t xfs_Gqm_lock; | 30 | extern struct mutex xfs_Gqm_lock; |
31 | extern struct xfs_qm *xfs_Gqm; | 31 | extern struct xfs_qm *xfs_Gqm; |
32 | extern kmem_zone_t *qm_dqzone; | 32 | extern kmem_zone_t *qm_dqzone; |
33 | extern kmem_zone_t *qm_dqtrxzone; | 33 | extern kmem_zone_t *qm_dqtrxzone; |
@@ -79,7 +79,7 @@ typedef xfs_dqhash_t xfs_dqlist_t; | |||
79 | typedef struct xfs_frlist { | 79 | typedef struct xfs_frlist { |
80 | struct xfs_dquot *qh_next; | 80 | struct xfs_dquot *qh_next; |
81 | struct xfs_dquot *qh_prev; | 81 | struct xfs_dquot *qh_prev; |
82 | mutex_t qh_lock; | 82 | struct mutex qh_lock; |
83 | uint qh_version; | 83 | uint qh_version; |
84 | uint qh_nelems; | 84 | uint qh_nelems; |
85 | } xfs_frlist_t; | 85 | } xfs_frlist_t; |
@@ -115,7 +115,7 @@ typedef struct xfs_quotainfo { | |||
115 | xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ | 115 | xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ |
116 | xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ | 116 | xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ |
117 | xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ | 117 | xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ |
118 | mutex_t qi_quotaofflock;/* to serialize quotaoff */ | 118 | struct mutex qi_quotaofflock;/* to serialize quotaoff */ |
119 | xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ | 119 | xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ |
120 | uint qi_dqperchunk; /* # ondisk dqs in above chunk */ | 120 | uint qi_dqperchunk; /* # ondisk dqs in above chunk */ |
121 | xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ | 121 | xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ |
@@ -158,11 +158,6 @@ typedef struct xfs_dquot_acct { | |||
158 | #define XFS_QM_IWARNLIMIT 5 | 158 | #define XFS_QM_IWARNLIMIT 5 |
159 | #define XFS_QM_RTBWARNLIMIT 5 | 159 | #define XFS_QM_RTBWARNLIMIT 5 |
160 | 160 | ||
161 | #define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock)) | ||
162 | #define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock)) | ||
163 | #define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) | ||
164 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) | ||
165 | |||
166 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); | 161 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); |
167 | extern void xfs_qm_mount_quotas(xfs_mount_t *); | 162 | extern void xfs_qm_mount_quotas(xfs_mount_t *); |
168 | extern int xfs_qm_quotacheck(xfs_mount_t *); | 163 | extern int xfs_qm_quotacheck(xfs_mount_t *); |
@@ -178,6 +173,16 @@ extern void xfs_qm_dqdetach(xfs_inode_t *); | |||
178 | extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); | 173 | extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); |
179 | extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); | 174 | extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); |
180 | 175 | ||
176 | /* quota ops */ | ||
177 | extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); | ||
178 | extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, | ||
179 | fs_disk_quota_t *); | ||
180 | extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, | ||
181 | fs_disk_quota_t *); | ||
182 | extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); | ||
183 | extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); | ||
184 | extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); | ||
185 | |||
181 | /* vop stuff */ | 186 | /* vop stuff */ |
182 | extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, | 187 | extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, |
183 | uid_t, gid_t, prid_t, uint, | 188 | uid_t, gid_t, prid_t, uint, |
@@ -194,11 +199,6 @@ extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *, | |||
194 | /* list stuff */ | 199 | /* list stuff */ |
195 | extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); | 200 | extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); |
196 | extern void xfs_qm_freelist_unlink(xfs_dquot_t *); | 201 | extern void xfs_qm_freelist_unlink(xfs_dquot_t *); |
197 | extern int xfs_qm_freelist_lock_nowait(xfs_qm_t *); | ||
198 | |||
199 | /* system call interface */ | ||
200 | extern int xfs_qm_quotactl(struct xfs_mount *, int, int, | ||
201 | xfs_caddr_t); | ||
202 | 202 | ||
203 | #ifdef DEBUG | 203 | #ifdef DEBUG |
204 | extern int xfs_qm_internalqcheck(xfs_mount_t *); | 204 | extern int xfs_qm_internalqcheck(xfs_mount_t *); |
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index bc6c5cca3e12..63037c689a4b 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c | |||
@@ -235,7 +235,6 @@ struct xfs_qmops xfs_qmcore_xfs = { | |||
235 | .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve, | 235 | .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve, |
236 | .xfs_dqstatvfs = xfs_qm_statvfs, | 236 | .xfs_dqstatvfs = xfs_qm_statvfs, |
237 | .xfs_dqsync = xfs_qm_sync, | 237 | .xfs_dqsync = xfs_qm_sync, |
238 | .xfs_quotactl = xfs_qm_quotactl, | ||
239 | .xfs_dqtrxops = &xfs_trans_dquot_ops, | 238 | .xfs_dqtrxops = &xfs_trans_dquot_ops, |
240 | }; | 239 | }; |
241 | EXPORT_SYMBOL(xfs_qmcore_xfs); | 240 | EXPORT_SYMBOL(xfs_qmcore_xfs); |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 68139b38aede..c7b66f6506ce 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -57,135 +57,16 @@ | |||
57 | # define qdprintk(s, args...) do { } while (0) | 57 | # define qdprintk(s, args...) do { } while (0) |
58 | #endif | 58 | #endif |
59 | 59 | ||
60 | STATIC int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); | ||
61 | STATIC int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, | ||
62 | fs_disk_quota_t *); | ||
63 | STATIC int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); | ||
64 | STATIC int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, | ||
65 | fs_disk_quota_t *); | ||
66 | STATIC int xfs_qm_scall_quotaon(xfs_mount_t *, uint); | ||
67 | STATIC int xfs_qm_scall_quotaoff(xfs_mount_t *, uint, boolean_t); | ||
68 | STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); | 60 | STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); |
69 | STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, | 61 | STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, |
70 | uint); | 62 | uint); |
71 | STATIC uint xfs_qm_import_flags(uint); | ||
72 | STATIC uint xfs_qm_export_flags(uint); | 63 | STATIC uint xfs_qm_export_flags(uint); |
73 | STATIC uint xfs_qm_import_qtype_flags(uint); | ||
74 | STATIC uint xfs_qm_export_qtype_flags(uint); | 64 | STATIC uint xfs_qm_export_qtype_flags(uint); |
75 | STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, | 65 | STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, |
76 | fs_disk_quota_t *); | 66 | fs_disk_quota_t *); |
77 | 67 | ||
78 | 68 | ||
79 | /* | 69 | /* |
80 | * The main distribution switch of all XFS quotactl system calls. | ||
81 | */ | ||
82 | int | ||
83 | xfs_qm_quotactl( | ||
84 | xfs_mount_t *mp, | ||
85 | int cmd, | ||
86 | int id, | ||
87 | xfs_caddr_t addr) | ||
88 | { | ||
89 | int error; | ||
90 | |||
91 | ASSERT(addr != NULL || cmd == Q_XQUOTASYNC); | ||
92 | |||
93 | /* | ||
94 | * The following commands are valid even when quotaoff. | ||
95 | */ | ||
96 | switch (cmd) { | ||
97 | case Q_XQUOTARM: | ||
98 | /* | ||
99 | * Truncate quota files. quota must be off. | ||
100 | */ | ||
101 | if (XFS_IS_QUOTA_ON(mp)) | ||
102 | return XFS_ERROR(EINVAL); | ||
103 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
104 | return XFS_ERROR(EROFS); | ||
105 | return (xfs_qm_scall_trunc_qfiles(mp, | ||
106 | xfs_qm_import_qtype_flags(*(uint *)addr))); | ||
107 | |||
108 | case Q_XGETQSTAT: | ||
109 | /* | ||
110 | * Get quota status information. | ||
111 | */ | ||
112 | return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr)); | ||
113 | |||
114 | case Q_XQUOTAON: | ||
115 | /* | ||
116 | * QUOTAON - enabling quota enforcement. | ||
117 | * Quota accounting must be turned on at mount time. | ||
118 | */ | ||
119 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
120 | return XFS_ERROR(EROFS); | ||
121 | return (xfs_qm_scall_quotaon(mp, | ||
122 | xfs_qm_import_flags(*(uint *)addr))); | ||
123 | |||
124 | case Q_XQUOTAOFF: | ||
125 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
126 | return XFS_ERROR(EROFS); | ||
127 | break; | ||
128 | |||
129 | case Q_XQUOTASYNC: | ||
130 | return xfs_sync_inodes(mp, SYNC_DELWRI); | ||
131 | |||
132 | default: | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | if (! XFS_IS_QUOTA_ON(mp)) | ||
137 | return XFS_ERROR(ESRCH); | ||
138 | |||
139 | switch (cmd) { | ||
140 | case Q_XQUOTAOFF: | ||
141 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
142 | return XFS_ERROR(EROFS); | ||
143 | error = xfs_qm_scall_quotaoff(mp, | ||
144 | xfs_qm_import_flags(*(uint *)addr), | ||
145 | B_FALSE); | ||
146 | break; | ||
147 | |||
148 | case Q_XGETQUOTA: | ||
149 | error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER, | ||
150 | (fs_disk_quota_t *)addr); | ||
151 | break; | ||
152 | case Q_XGETGQUOTA: | ||
153 | error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, | ||
154 | (fs_disk_quota_t *)addr); | ||
155 | break; | ||
156 | case Q_XGETPQUOTA: | ||
157 | error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ, | ||
158 | (fs_disk_quota_t *)addr); | ||
159 | break; | ||
160 | |||
161 | case Q_XSETQLIM: | ||
162 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
163 | return XFS_ERROR(EROFS); | ||
164 | error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER, | ||
165 | (fs_disk_quota_t *)addr); | ||
166 | break; | ||
167 | case Q_XSETGQLIM: | ||
168 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
169 | return XFS_ERROR(EROFS); | ||
170 | error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, | ||
171 | (fs_disk_quota_t *)addr); | ||
172 | break; | ||
173 | case Q_XSETPQLIM: | ||
174 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
175 | return XFS_ERROR(EROFS); | ||
176 | error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ, | ||
177 | (fs_disk_quota_t *)addr); | ||
178 | break; | ||
179 | |||
180 | default: | ||
181 | error = XFS_ERROR(EINVAL); | ||
182 | break; | ||
183 | } | ||
184 | |||
185 | return (error); | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Turn off quota accounting and/or enforcement for all udquots and/or | 70 | * Turn off quota accounting and/or enforcement for all udquots and/or |
190 | * gdquots. Called only at unmount time. | 71 | * gdquots. Called only at unmount time. |
191 | * | 72 | * |
@@ -193,11 +74,10 @@ xfs_qm_quotactl( | |||
193 | * incore, and modifies the ondisk dquot directly. Therefore, for example, | 74 | * incore, and modifies the ondisk dquot directly. Therefore, for example, |
194 | * it is an error to call this twice, without purging the cache. | 75 | * it is an error to call this twice, without purging the cache. |
195 | */ | 76 | */ |
196 | STATIC int | 77 | int |
197 | xfs_qm_scall_quotaoff( | 78 | xfs_qm_scall_quotaoff( |
198 | xfs_mount_t *mp, | 79 | xfs_mount_t *mp, |
199 | uint flags, | 80 | uint flags) |
200 | boolean_t force) | ||
201 | { | 81 | { |
202 | uint dqtype; | 82 | uint dqtype; |
203 | int error; | 83 | int error; |
@@ -205,8 +85,6 @@ xfs_qm_scall_quotaoff( | |||
205 | xfs_qoff_logitem_t *qoffstart; | 85 | xfs_qoff_logitem_t *qoffstart; |
206 | int nculprits; | 86 | int nculprits; |
207 | 87 | ||
208 | if (!force && !capable(CAP_SYS_ADMIN)) | ||
209 | return XFS_ERROR(EPERM); | ||
210 | /* | 88 | /* |
211 | * No file system can have quotas enabled on disk but not in core. | 89 | * No file system can have quotas enabled on disk but not in core. |
212 | * Note that quota utilities (like quotaoff) _expect_ | 90 | * Note that quota utilities (like quotaoff) _expect_ |
@@ -375,7 +253,7 @@ out_error: | |||
375 | return (error); | 253 | return (error); |
376 | } | 254 | } |
377 | 255 | ||
378 | STATIC int | 256 | int |
379 | xfs_qm_scall_trunc_qfiles( | 257 | xfs_qm_scall_trunc_qfiles( |
380 | xfs_mount_t *mp, | 258 | xfs_mount_t *mp, |
381 | uint flags) | 259 | uint flags) |
@@ -383,8 +261,6 @@ xfs_qm_scall_trunc_qfiles( | |||
383 | int error = 0, error2 = 0; | 261 | int error = 0, error2 = 0; |
384 | xfs_inode_t *qip; | 262 | xfs_inode_t *qip; |
385 | 263 | ||
386 | if (!capable(CAP_SYS_ADMIN)) | ||
387 | return XFS_ERROR(EPERM); | ||
388 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { | 264 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { |
389 | qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); | 265 | qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); |
390 | return XFS_ERROR(EINVAL); | 266 | return XFS_ERROR(EINVAL); |
@@ -416,7 +292,7 @@ xfs_qm_scall_trunc_qfiles( | |||
416 | * effect immediately. | 292 | * effect immediately. |
417 | * (Switching on quota accounting must be done at mount time.) | 293 | * (Switching on quota accounting must be done at mount time.) |
418 | */ | 294 | */ |
419 | STATIC int | 295 | int |
420 | xfs_qm_scall_quotaon( | 296 | xfs_qm_scall_quotaon( |
421 | xfs_mount_t *mp, | 297 | xfs_mount_t *mp, |
422 | uint flags) | 298 | uint flags) |
@@ -426,9 +302,6 @@ xfs_qm_scall_quotaon( | |||
426 | uint accflags; | 302 | uint accflags; |
427 | __int64_t sbflags; | 303 | __int64_t sbflags; |
428 | 304 | ||
429 | if (!capable(CAP_SYS_ADMIN)) | ||
430 | return XFS_ERROR(EPERM); | ||
431 | |||
432 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); | 305 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); |
433 | /* | 306 | /* |
434 | * Switching on quota accounting must be done at mount time. | 307 | * Switching on quota accounting must be done at mount time. |
@@ -517,7 +390,7 @@ xfs_qm_scall_quotaon( | |||
517 | /* | 390 | /* |
518 | * Return quota status information, such as uquota-off, enforcements, etc. | 391 | * Return quota status information, such as uquota-off, enforcements, etc. |
519 | */ | 392 | */ |
520 | STATIC int | 393 | int |
521 | xfs_qm_scall_getqstat( | 394 | xfs_qm_scall_getqstat( |
522 | xfs_mount_t *mp, | 395 | xfs_mount_t *mp, |
523 | fs_quota_stat_t *out) | 396 | fs_quota_stat_t *out) |
@@ -582,7 +455,7 @@ xfs_qm_scall_getqstat( | |||
582 | /* | 455 | /* |
583 | * Adjust quota limits, and start/stop timers accordingly. | 456 | * Adjust quota limits, and start/stop timers accordingly. |
584 | */ | 457 | */ |
585 | STATIC int | 458 | int |
586 | xfs_qm_scall_setqlim( | 459 | xfs_qm_scall_setqlim( |
587 | xfs_mount_t *mp, | 460 | xfs_mount_t *mp, |
588 | xfs_dqid_t id, | 461 | xfs_dqid_t id, |
@@ -595,9 +468,6 @@ xfs_qm_scall_setqlim( | |||
595 | int error; | 468 | int error; |
596 | xfs_qcnt_t hard, soft; | 469 | xfs_qcnt_t hard, soft; |
597 | 470 | ||
598 | if (!capable(CAP_SYS_ADMIN)) | ||
599 | return XFS_ERROR(EPERM); | ||
600 | |||
601 | if ((newlim->d_fieldmask & | 471 | if ((newlim->d_fieldmask & |
602 | (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) | 472 | (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) |
603 | return (0); | 473 | return (0); |
@@ -742,7 +612,7 @@ xfs_qm_scall_setqlim( | |||
742 | return error; | 612 | return error; |
743 | } | 613 | } |
744 | 614 | ||
745 | STATIC int | 615 | int |
746 | xfs_qm_scall_getquota( | 616 | xfs_qm_scall_getquota( |
747 | xfs_mount_t *mp, | 617 | xfs_mount_t *mp, |
748 | xfs_dqid_t id, | 618 | xfs_dqid_t id, |
@@ -935,30 +805,6 @@ xfs_qm_export_dquot( | |||
935 | } | 805 | } |
936 | 806 | ||
937 | STATIC uint | 807 | STATIC uint |
938 | xfs_qm_import_qtype_flags( | ||
939 | uint uflags) | ||
940 | { | ||
941 | uint oflags = 0; | ||
942 | |||
943 | /* | ||
944 | * Can't be more than one, or none. | ||
945 | */ | ||
946 | if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == | ||
947 | (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) || | ||
948 | ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) == | ||
949 | (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) || | ||
950 | ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) == | ||
951 | (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) || | ||
952 | ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0)) | ||
953 | return (0); | ||
954 | |||
955 | oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0; | ||
956 | oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0; | ||
957 | oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0; | ||
958 | return oflags; | ||
959 | } | ||
960 | |||
961 | STATIC uint | ||
962 | xfs_qm_export_qtype_flags( | 808 | xfs_qm_export_qtype_flags( |
963 | uint flags) | 809 | uint flags) |
964 | { | 810 | { |
@@ -979,26 +825,6 @@ xfs_qm_export_qtype_flags( | |||
979 | } | 825 | } |
980 | 826 | ||
981 | STATIC uint | 827 | STATIC uint |
982 | xfs_qm_import_flags( | ||
983 | uint uflags) | ||
984 | { | ||
985 | uint flags = 0; | ||
986 | |||
987 | if (uflags & XFS_QUOTA_UDQ_ACCT) | ||
988 | flags |= XFS_UQUOTA_ACCT; | ||
989 | if (uflags & XFS_QUOTA_PDQ_ACCT) | ||
990 | flags |= XFS_PQUOTA_ACCT; | ||
991 | if (uflags & XFS_QUOTA_GDQ_ACCT) | ||
992 | flags |= XFS_GQUOTA_ACCT; | ||
993 | if (uflags & XFS_QUOTA_UDQ_ENFD) | ||
994 | flags |= XFS_UQUOTA_ENFD; | ||
995 | if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) | ||
996 | flags |= XFS_OQUOTA_ENFD; | ||
997 | return (flags); | ||
998 | } | ||
999 | |||
1000 | |||
1001 | STATIC uint | ||
1002 | xfs_qm_export_flags( | 828 | xfs_qm_export_flags( |
1003 | uint flags) | 829 | uint flags) |
1004 | { | 830 | { |
@@ -1134,7 +960,7 @@ xfs_dqhash_t *qmtest_udqtab; | |||
1134 | xfs_dqhash_t *qmtest_gdqtab; | 960 | xfs_dqhash_t *qmtest_gdqtab; |
1135 | int qmtest_hashmask; | 961 | int qmtest_hashmask; |
1136 | int qmtest_nfails; | 962 | int qmtest_nfails; |
1137 | mutex_t qcheck_lock; | 963 | struct mutex qcheck_lock; |
1138 | 964 | ||
1139 | #define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ | 965 | #define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ |
1140 | (__psunsigned_t)(id)) & \ | 966 | (__psunsigned_t)(id)) & \ |
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index c4fcea600bc2..8286b2842b6b 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h | |||
@@ -42,34 +42,24 @@ | |||
42 | #define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock) | 42 | #define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock) |
43 | 43 | ||
44 | #define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist) | 44 | #define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist) |
45 | #define XFS_QI_MPLLOCK(mp) ((mp)->m_quotainfo->qi_dqlist.qh_lock) | ||
46 | #define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next) | 45 | #define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next) |
47 | #define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems) | 46 | #define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems) |
48 | 47 | ||
49 | #define XQMLCK(h) (mutex_lock(&((h)->qh_lock))) | 48 | #define xfs_qm_mplist_lock(mp) \ |
50 | #define XQMUNLCK(h) (mutex_unlock(&((h)->qh_lock))) | 49 | mutex_lock(&(XFS_QI_MPL_LIST(mp).qh_lock)) |
51 | #ifdef DEBUG | 50 | #define xfs_qm_mplist_nowait(mp) \ |
52 | struct xfs_dqhash; | 51 | mutex_trylock(&(XFS_QI_MPL_LIST(mp).qh_lock)) |
53 | static inline int XQMISLCKD(struct xfs_dqhash *h) | 52 | #define xfs_qm_mplist_unlock(mp) \ |
54 | { | 53 | mutex_unlock(&(XFS_QI_MPL_LIST(mp).qh_lock)) |
55 | if (mutex_trylock(&h->qh_lock)) { | 54 | #define XFS_QM_IS_MPLIST_LOCKED(mp) \ |
56 | mutex_unlock(&h->qh_lock); | 55 | mutex_is_locked(&(XFS_QI_MPL_LIST(mp).qh_lock)) |
57 | return 0; | 56 | |
58 | } | 57 | #define xfs_qm_freelist_lock(qm) \ |
59 | return 1; | 58 | mutex_lock(&((qm)->qm_dqfreelist.qh_lock)) |
60 | } | 59 | #define xfs_qm_freelist_lock_nowait(qm) \ |
61 | #endif | 60 | mutex_trylock(&((qm)->qm_dqfreelist.qh_lock)) |
62 | 61 | #define xfs_qm_freelist_unlock(qm) \ | |
63 | #define XFS_DQ_HASH_LOCK(h) XQMLCK(h) | 62 | mutex_unlock(&((qm)->qm_dqfreelist.qh_lock)) |
64 | #define XFS_DQ_HASH_UNLOCK(h) XQMUNLCK(h) | ||
65 | #define XFS_DQ_IS_HASH_LOCKED(h) XQMISLCKD(h) | ||
66 | |||
67 | #define xfs_qm_mplist_lock(mp) XQMLCK(&(XFS_QI_MPL_LIST(mp))) | ||
68 | #define xfs_qm_mplist_unlock(mp) XQMUNLCK(&(XFS_QI_MPL_LIST(mp))) | ||
69 | #define XFS_QM_IS_MPLIST_LOCKED(mp) XQMISLCKD(&(XFS_QI_MPL_LIST(mp))) | ||
70 | |||
71 | #define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist)) | ||
72 | #define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist)) | ||
73 | 63 | ||
74 | /* | 64 | /* |
75 | * Hash into a bucket in the dquot hash table, based on <mp, id>. | 65 | * Hash into a bucket in the dquot hash table, based on <mp, id>. |
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 99611381e740..447173bcf96d 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c | |||
@@ -624,10 +624,9 @@ xfs_trans_dqresv( | |||
624 | xfs_qcnt_t *resbcountp; | 624 | xfs_qcnt_t *resbcountp; |
625 | xfs_quotainfo_t *q = mp->m_quotainfo; | 625 | xfs_quotainfo_t *q = mp->m_quotainfo; |
626 | 626 | ||
627 | if (! (flags & XFS_QMOPT_DQLOCK)) { | 627 | |
628 | xfs_dqlock(dqp); | 628 | xfs_dqlock(dqp); |
629 | } | 629 | |
630 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
631 | if (flags & XFS_TRANS_DQ_RES_BLKS) { | 630 | if (flags & XFS_TRANS_DQ_RES_BLKS) { |
632 | hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); | 631 | hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); |
633 | if (!hardlimit) | 632 | if (!hardlimit) |
@@ -740,10 +739,8 @@ xfs_trans_dqresv( | |||
740 | ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); | 739 | ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); |
741 | 740 | ||
742 | error_return: | 741 | error_return: |
743 | if (! (flags & XFS_QMOPT_DQLOCK)) { | 742 | xfs_dqunlock(dqp); |
744 | xfs_dqunlock(dqp); | 743 | return error; |
745 | } | ||
746 | return (error); | ||
747 | } | 744 | } |
748 | 745 | ||
749 | 746 | ||
@@ -753,8 +750,7 @@ error_return: | |||
753 | * grp/prj quotas is important, because this follows a both-or-nothing | 750 | * grp/prj quotas is important, because this follows a both-or-nothing |
754 | * approach. | 751 | * approach. |
755 | * | 752 | * |
756 | * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked. | 753 | * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. |
757 | * XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. | ||
758 | * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. | 754 | * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. |
759 | * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks | 755 | * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks |
760 | * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks | 756 | * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks |
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index ae5482965424..3f3610a7ee05 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
25 | #include "xfs_dmapi.h" | 25 | #include "xfs_dmapi.h" |
26 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
27 | #include "xfs_error.h" | ||
27 | 28 | ||
28 | static char message[1024]; /* keep it off the stack */ | 29 | static char message[1024]; /* keep it off the stack */ |
29 | static DEFINE_SPINLOCK(xfs_err_lock); | 30 | static DEFINE_SPINLOCK(xfs_err_lock); |
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c index 5830c040ea7e..b83f76b6d410 100644 --- a/fs/xfs/support/uuid.c +++ b/fs/xfs/support/uuid.c | |||
@@ -17,10 +17,6 @@ | |||
17 | */ | 17 | */ |
18 | #include <xfs.h> | 18 | #include <xfs.h> |
19 | 19 | ||
20 | static DEFINE_MUTEX(uuid_monitor); | ||
21 | static int uuid_table_size; | ||
22 | static uuid_t *uuid_table; | ||
23 | |||
24 | /* IRIX interpretation of an uuid_t */ | 20 | /* IRIX interpretation of an uuid_t */ |
25 | typedef struct { | 21 | typedef struct { |
26 | __be32 uu_timelow; | 22 | __be32 uu_timelow; |
@@ -46,12 +42,6 @@ uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) | |||
46 | fsid[1] = be32_to_cpu(uup->uu_timelow); | 42 | fsid[1] = be32_to_cpu(uup->uu_timelow); |
47 | } | 43 | } |
48 | 44 | ||
49 | void | ||
50 | uuid_create_nil(uuid_t *uuid) | ||
51 | { | ||
52 | memset(uuid, 0, sizeof(*uuid)); | ||
53 | } | ||
54 | |||
55 | int | 45 | int |
56 | uuid_is_nil(uuid_t *uuid) | 46 | uuid_is_nil(uuid_t *uuid) |
57 | { | 47 | { |
@@ -71,64 +61,3 @@ uuid_equal(uuid_t *uuid1, uuid_t *uuid2) | |||
71 | { | 61 | { |
72 | return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; | 62 | return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; |
73 | } | 63 | } |
74 | |||
75 | /* | ||
76 | * Given a 128-bit uuid, return a 64-bit value by adding the top and bottom | ||
77 | * 64-bit words. NOTE: This function can not be changed EVER. Although | ||
78 | * brain-dead, some applications depend on this 64-bit value remaining | ||
79 | * persistent. Specifically, DMI vendors store the value as a persistent | ||
80 | * filehandle. | ||
81 | */ | ||
82 | __uint64_t | ||
83 | uuid_hash64(uuid_t *uuid) | ||
84 | { | ||
85 | __uint64_t *sp = (__uint64_t *)uuid; | ||
86 | |||
87 | return sp[0] + sp[1]; | ||
88 | } | ||
89 | |||
90 | int | ||
91 | uuid_table_insert(uuid_t *uuid) | ||
92 | { | ||
93 | int i, hole; | ||
94 | |||
95 | mutex_lock(&uuid_monitor); | ||
96 | for (i = 0, hole = -1; i < uuid_table_size; i++) { | ||
97 | if (uuid_is_nil(&uuid_table[i])) { | ||
98 | hole = i; | ||
99 | continue; | ||
100 | } | ||
101 | if (uuid_equal(uuid, &uuid_table[i])) { | ||
102 | mutex_unlock(&uuid_monitor); | ||
103 | return 0; | ||
104 | } | ||
105 | } | ||
106 | if (hole < 0) { | ||
107 | uuid_table = kmem_realloc(uuid_table, | ||
108 | (uuid_table_size + 1) * sizeof(*uuid_table), | ||
109 | uuid_table_size * sizeof(*uuid_table), | ||
110 | KM_SLEEP); | ||
111 | hole = uuid_table_size++; | ||
112 | } | ||
113 | uuid_table[hole] = *uuid; | ||
114 | mutex_unlock(&uuid_monitor); | ||
115 | return 1; | ||
116 | } | ||
117 | |||
118 | void | ||
119 | uuid_table_remove(uuid_t *uuid) | ||
120 | { | ||
121 | int i; | ||
122 | |||
123 | mutex_lock(&uuid_monitor); | ||
124 | for (i = 0; i < uuid_table_size; i++) { | ||
125 | if (uuid_is_nil(&uuid_table[i])) | ||
126 | continue; | ||
127 | if (!uuid_equal(uuid, &uuid_table[i])) | ||
128 | continue; | ||
129 | uuid_create_nil(&uuid_table[i]); | ||
130 | break; | ||
131 | } | ||
132 | ASSERT(i < uuid_table_size); | ||
133 | mutex_unlock(&uuid_monitor); | ||
134 | } | ||
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h index cff5b607d445..4732d71262cc 100644 --- a/fs/xfs/support/uuid.h +++ b/fs/xfs/support/uuid.h | |||
@@ -22,12 +22,8 @@ typedef struct { | |||
22 | unsigned char __u_bits[16]; | 22 | unsigned char __u_bits[16]; |
23 | } uuid_t; | 23 | } uuid_t; |
24 | 24 | ||
25 | extern void uuid_create_nil(uuid_t *uuid); | ||
26 | extern int uuid_is_nil(uuid_t *uuid); | 25 | extern int uuid_is_nil(uuid_t *uuid); |
27 | extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); | 26 | extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); |
28 | extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); | 27 | extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); |
29 | extern __uint64_t uuid_hash64(uuid_t *uuid); | ||
30 | extern int uuid_table_insert(uuid_t *uuid); | ||
31 | extern void uuid_table_remove(uuid_t *uuid); | ||
32 | 28 | ||
33 | #endif /* __XFS_SUPPORT_UUID_H__ */ | 29 | #endif /* __XFS_SUPPORT_UUID_H__ */ |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 143d63ecb20a..c8641f713caa 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -223,8 +223,8 @@ typedef struct xfs_perag | |||
223 | be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp)) | 223 | be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp)) |
224 | #define XFS_MIN_FREELIST_PAG(pag,mp) \ | 224 | #define XFS_MIN_FREELIST_PAG(pag,mp) \ |
225 | (XFS_MIN_FREELIST_RAW( \ | 225 | (XFS_MIN_FREELIST_RAW( \ |
226 | (uint_t)(pag)->pagf_levels[XFS_BTNUM_BNOi], \ | 226 | (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \ |
227 | (uint_t)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp)) | 227 | (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp)) |
228 | 228 | ||
229 | #define XFS_AGB_TO_FSB(mp,agno,agbno) \ | 229 | #define XFS_AGB_TO_FSB(mp,agno,agbno) \ |
230 | (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) | 230 | (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 028e44e58ea9..2cf944eb796d 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -1872,6 +1872,25 @@ xfs_alloc_compute_maxlevels( | |||
1872 | } | 1872 | } |
1873 | 1873 | ||
1874 | /* | 1874 | /* |
1875 | * Find the length of the longest extent in an AG. | ||
1876 | */ | ||
1877 | xfs_extlen_t | ||
1878 | xfs_alloc_longest_free_extent( | ||
1879 | struct xfs_mount *mp, | ||
1880 | struct xfs_perag *pag) | ||
1881 | { | ||
1882 | xfs_extlen_t need, delta = 0; | ||
1883 | |||
1884 | need = XFS_MIN_FREELIST_PAG(pag, mp); | ||
1885 | if (need > pag->pagf_flcount) | ||
1886 | delta = need - pag->pagf_flcount; | ||
1887 | |||
1888 | if (pag->pagf_longest > delta) | ||
1889 | return pag->pagf_longest - delta; | ||
1890 | return pag->pagf_flcount > 0 || pag->pagf_longest > 0; | ||
1891 | } | ||
1892 | |||
1893 | /* | ||
1875 | * Decide whether to use this allocation group for this allocation. | 1894 | * Decide whether to use this allocation group for this allocation. |
1876 | * If so, fix up the btree freelist's size. | 1895 | * If so, fix up the btree freelist's size. |
1877 | */ | 1896 | */ |
@@ -1923,15 +1942,12 @@ xfs_alloc_fix_freelist( | |||
1923 | } | 1942 | } |
1924 | 1943 | ||
1925 | if (!(flags & XFS_ALLOC_FLAG_FREEING)) { | 1944 | if (!(flags & XFS_ALLOC_FLAG_FREEING)) { |
1926 | need = XFS_MIN_FREELIST_PAG(pag, mp); | ||
1927 | delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; | ||
1928 | /* | 1945 | /* |
1929 | * If it looks like there isn't a long enough extent, or enough | 1946 | * If it looks like there isn't a long enough extent, or enough |
1930 | * total blocks, reject it. | 1947 | * total blocks, reject it. |
1931 | */ | 1948 | */ |
1932 | longest = (pag->pagf_longest > delta) ? | 1949 | need = XFS_MIN_FREELIST_PAG(pag, mp); |
1933 | (pag->pagf_longest - delta) : | 1950 | longest = xfs_alloc_longest_free_extent(mp, pag); |
1934 | (pag->pagf_flcount > 0 || pag->pagf_longest > 0); | ||
1935 | if ((args->minlen + args->alignment + args->minalignslop - 1) > | 1951 | if ((args->minlen + args->alignment + args->minalignslop - 1) > |
1936 | longest || | 1952 | longest || |
1937 | ((int)(pag->pagf_freeblks + pag->pagf_flcount - | 1953 | ((int)(pag->pagf_freeblks + pag->pagf_flcount - |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 588172796f7b..e704caee10df 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -100,6 +100,12 @@ typedef struct xfs_alloc_arg { | |||
100 | #define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ | 100 | #define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ |
101 | #define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ | 101 | #define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ |
102 | 102 | ||
103 | /* | ||
104 | * Find the length of the longest extent in an AG. | ||
105 | */ | ||
106 | xfs_extlen_t | ||
107 | xfs_alloc_longest_free_extent(struct xfs_mount *mp, | ||
108 | struct xfs_perag *pag); | ||
103 | 109 | ||
104 | #ifdef __KERNEL__ | 110 | #ifdef __KERNEL__ |
105 | 111 | ||
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 6c323f8a4cd1..afdc8911637d 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -155,7 +155,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) | |||
155 | * minimum offset only needs to be the space required for | 155 | * minimum offset only needs to be the space required for |
156 | * the btree root. | 156 | * the btree root. |
157 | */ | 157 | */ |
158 | if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > mp->m_attroffset) | 158 | if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > |
159 | xfs_default_attroffset(dp)) | ||
159 | dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); | 160 | dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); |
160 | break; | 161 | break; |
161 | 162 | ||
@@ -298,6 +299,26 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) | |||
298 | } | 299 | } |
299 | 300 | ||
300 | /* | 301 | /* |
302 | * After the last attribute is removed revert to original inode format, | ||
303 | * making all literal area available to the data fork once more. | ||
304 | */ | ||
305 | STATIC void | ||
306 | xfs_attr_fork_reset( | ||
307 | struct xfs_inode *ip, | ||
308 | struct xfs_trans *tp) | ||
309 | { | ||
310 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
311 | ip->i_d.di_forkoff = 0; | ||
312 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | ||
313 | |||
314 | ASSERT(ip->i_d.di_anextents == 0); | ||
315 | ASSERT(ip->i_afp == NULL); | ||
316 | |||
317 | ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t); | ||
318 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
319 | } | ||
320 | |||
321 | /* | ||
301 | * Remove an attribute from the shortform attribute list structure. | 322 | * Remove an attribute from the shortform attribute list structure. |
302 | */ | 323 | */ |
303 | int | 324 | int |
@@ -344,22 +365,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) | |||
344 | */ | 365 | */ |
345 | totsize -= size; | 366 | totsize -= size; |
346 | if (totsize == sizeof(xfs_attr_sf_hdr_t) && | 367 | if (totsize == sizeof(xfs_attr_sf_hdr_t) && |
347 | !(args->op_flags & XFS_DA_OP_ADDNAME) && | 368 | (mp->m_flags & XFS_MOUNT_ATTR2) && |
348 | (mp->m_flags & XFS_MOUNT_ATTR2) && | 369 | (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && |
349 | (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) { | 370 | !(args->op_flags & XFS_DA_OP_ADDNAME)) { |
350 | /* | 371 | xfs_attr_fork_reset(dp, args->trans); |
351 | * Last attribute now removed, revert to original | ||
352 | * inode format making all literal area available | ||
353 | * to the data fork once more. | ||
354 | */ | ||
355 | xfs_idestroy_fork(dp, XFS_ATTR_FORK); | ||
356 | dp->i_d.di_forkoff = 0; | ||
357 | dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | ||
358 | ASSERT(dp->i_d.di_anextents == 0); | ||
359 | ASSERT(dp->i_afp == NULL); | ||
360 | dp->i_df.if_ext_max = | ||
361 | XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); | ||
362 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); | ||
363 | } else { | 372 | } else { |
364 | xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); | 373 | xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); |
365 | dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); | 374 | dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); |
@@ -786,20 +795,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) | |||
786 | if (forkoff == -1) { | 795 | if (forkoff == -1) { |
787 | ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); | 796 | ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); |
788 | ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); | 797 | ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); |
789 | 798 | xfs_attr_fork_reset(dp, args->trans); | |
790 | /* | ||
791 | * Last attribute was removed, revert to original | ||
792 | * inode format making all literal area available | ||
793 | * to the data fork once more. | ||
794 | */ | ||
795 | xfs_idestroy_fork(dp, XFS_ATTR_FORK); | ||
796 | dp->i_d.di_forkoff = 0; | ||
797 | dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | ||
798 | ASSERT(dp->i_d.di_anextents == 0); | ||
799 | ASSERT(dp->i_afp == NULL); | ||
800 | dp->i_df.if_ext_max = | ||
801 | XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); | ||
802 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); | ||
803 | goto out; | 799 | goto out; |
804 | } | 800 | } |
805 | 801 | ||
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index c852cd65aaea..3a6ed426327a 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -2479,7 +2479,7 @@ xfs_bmap_adjacent( | |||
2479 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); | 2479 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); |
2480 | /* | 2480 | /* |
2481 | * If allocating at eof, and there's a previous real block, | 2481 | * If allocating at eof, and there's a previous real block, |
2482 | * try to use it's last block as our starting point. | 2482 | * try to use its last block as our starting point. |
2483 | */ | 2483 | */ |
2484 | if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF && | 2484 | if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF && |
2485 | !isnullstartblock(ap->prevp->br_startblock) && | 2485 | !isnullstartblock(ap->prevp->br_startblock) && |
@@ -2712,9 +2712,6 @@ xfs_bmap_btalloc( | |||
2712 | xfs_agnumber_t startag; | 2712 | xfs_agnumber_t startag; |
2713 | xfs_alloc_arg_t args; | 2713 | xfs_alloc_arg_t args; |
2714 | xfs_extlen_t blen; | 2714 | xfs_extlen_t blen; |
2715 | xfs_extlen_t delta; | ||
2716 | xfs_extlen_t longest; | ||
2717 | xfs_extlen_t need; | ||
2718 | xfs_extlen_t nextminlen = 0; | 2715 | xfs_extlen_t nextminlen = 0; |
2719 | xfs_perag_t *pag; | 2716 | xfs_perag_t *pag; |
2720 | int nullfb; /* true if ap->firstblock isn't set */ | 2717 | int nullfb; /* true if ap->firstblock isn't set */ |
@@ -2796,13 +2793,8 @@ xfs_bmap_btalloc( | |||
2796 | * See xfs_alloc_fix_freelist... | 2793 | * See xfs_alloc_fix_freelist... |
2797 | */ | 2794 | */ |
2798 | if (pag->pagf_init) { | 2795 | if (pag->pagf_init) { |
2799 | need = XFS_MIN_FREELIST_PAG(pag, mp); | 2796 | xfs_extlen_t longest; |
2800 | delta = need > pag->pagf_flcount ? | 2797 | longest = xfs_alloc_longest_free_extent(mp, pag); |
2801 | need - pag->pagf_flcount : 0; | ||
2802 | longest = (pag->pagf_longest > delta) ? | ||
2803 | (pag->pagf_longest - delta) : | ||
2804 | (pag->pagf_flcount > 0 || | ||
2805 | pag->pagf_longest > 0); | ||
2806 | if (blen < longest) | 2798 | if (blen < longest) |
2807 | blen = longest; | 2799 | blen = longest; |
2808 | } else | 2800 | } else |
@@ -3577,6 +3569,27 @@ xfs_bmap_extents_to_btree( | |||
3577 | } | 3569 | } |
3578 | 3570 | ||
3579 | /* | 3571 | /* |
3572 | * Calculate the default attribute fork offset for newly created inodes. | ||
3573 | */ | ||
3574 | uint | ||
3575 | xfs_default_attroffset( | ||
3576 | struct xfs_inode *ip) | ||
3577 | { | ||
3578 | struct xfs_mount *mp = ip->i_mount; | ||
3579 | uint offset; | ||
3580 | |||
3581 | if (mp->m_sb.sb_inodesize == 256) { | ||
3582 | offset = XFS_LITINO(mp) - | ||
3583 | XFS_BMDR_SPACE_CALC(MINABTPTRS); | ||
3584 | } else { | ||
3585 | offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); | ||
3586 | } | ||
3587 | |||
3588 | ASSERT(offset < XFS_LITINO(mp)); | ||
3589 | return offset; | ||
3590 | } | ||
3591 | |||
3592 | /* | ||
3580 | * Helper routine to reset inode di_forkoff field when switching | 3593 | * Helper routine to reset inode di_forkoff field when switching |
3581 | * attribute fork from local to extent format - we reset it where | 3594 | * attribute fork from local to extent format - we reset it where |
3582 | * possible to make space available for inline data fork extents. | 3595 | * possible to make space available for inline data fork extents. |
@@ -3588,15 +3601,18 @@ xfs_bmap_forkoff_reset( | |||
3588 | int whichfork) | 3601 | int whichfork) |
3589 | { | 3602 | { |
3590 | if (whichfork == XFS_ATTR_FORK && | 3603 | if (whichfork == XFS_ATTR_FORK && |
3591 | (ip->i_d.di_format != XFS_DINODE_FMT_DEV) && | 3604 | ip->i_d.di_format != XFS_DINODE_FMT_DEV && |
3592 | (ip->i_d.di_format != XFS_DINODE_FMT_UUID) && | 3605 | ip->i_d.di_format != XFS_DINODE_FMT_UUID && |
3593 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && | 3606 | ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { |
3594 | ((mp->m_attroffset >> 3) > ip->i_d.di_forkoff)) { | 3607 | uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; |
3595 | ip->i_d.di_forkoff = mp->m_attroffset >> 3; | 3608 | |
3596 | ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / | 3609 | if (dfl_forkoff > ip->i_d.di_forkoff) { |
3597 | (uint)sizeof(xfs_bmbt_rec_t); | 3610 | ip->i_d.di_forkoff = dfl_forkoff; |
3598 | ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) / | 3611 | ip->i_df.if_ext_max = |
3599 | (uint)sizeof(xfs_bmbt_rec_t); | 3612 | XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t); |
3613 | ip->i_afp->if_ext_max = | ||
3614 | XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t); | ||
3615 | } | ||
3600 | } | 3616 | } |
3601 | } | 3617 | } |
3602 | 3618 | ||
@@ -4065,7 +4081,7 @@ xfs_bmap_add_attrfork( | |||
4065 | case XFS_DINODE_FMT_BTREE: | 4081 | case XFS_DINODE_FMT_BTREE: |
4066 | ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); | 4082 | ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); |
4067 | if (!ip->i_d.di_forkoff) | 4083 | if (!ip->i_d.di_forkoff) |
4068 | ip->i_d.di_forkoff = mp->m_attroffset >> 3; | 4084 | ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; |
4069 | else if (mp->m_flags & XFS_MOUNT_ATTR2) | 4085 | else if (mp->m_flags & XFS_MOUNT_ATTR2) |
4070 | version = 2; | 4086 | version = 2; |
4071 | break; | 4087 | break; |
@@ -4212,12 +4228,12 @@ xfs_bmap_compute_maxlevels( | |||
4212 | * (a signed 16-bit number, xfs_aextnum_t). | 4228 | * (a signed 16-bit number, xfs_aextnum_t). |
4213 | * | 4229 | * |
4214 | * Note that we can no longer assume that if we are in ATTR1 that | 4230 | * Note that we can no longer assume that if we are in ATTR1 that |
4215 | * the fork offset of all the inodes will be (m_attroffset >> 3) | 4231 | * the fork offset of all the inodes will be |
4216 | * because we could have mounted with ATTR2 and then mounted back | 4232 | * (xfs_default_attroffset(ip) >> 3) because we could have mounted |
4217 | * with ATTR1, keeping the di_forkoff's fixed but probably at | 4233 | * with ATTR2 and then mounted back with ATTR1, keeping the |
4218 | * various positions. Therefore, for both ATTR1 and ATTR2 | 4234 | * di_forkoff's fixed but probably at various positions. Therefore, |
4219 | * we have to assume the worst case scenario of a minimum size | 4235 | * for both ATTR1 and ATTR2 we have to assume the worst case scenario |
4220 | * available. | 4236 | * of a minimum size available. |
4221 | */ | 4237 | */ |
4222 | if (whichfork == XFS_DATA_FORK) { | 4238 | if (whichfork == XFS_DATA_FORK) { |
4223 | maxleafents = MAXEXTNUM; | 4239 | maxleafents = MAXEXTNUM; |
@@ -4804,7 +4820,7 @@ xfs_bmapi( | |||
4804 | xfs_extlen_t minlen; /* min allocation size */ | 4820 | xfs_extlen_t minlen; /* min allocation size */ |
4805 | xfs_mount_t *mp; /* xfs mount structure */ | 4821 | xfs_mount_t *mp; /* xfs mount structure */ |
4806 | int n; /* current extent index */ | 4822 | int n; /* current extent index */ |
4807 | int nallocs; /* number of extents alloc\'d */ | 4823 | int nallocs; /* number of extents alloc'd */ |
4808 | xfs_extnum_t nextents; /* number of extents in file */ | 4824 | xfs_extnum_t nextents; /* number of extents in file */ |
4809 | xfs_fileoff_t obno; /* old block number (offset) */ | 4825 | xfs_fileoff_t obno; /* old block number (offset) */ |
4810 | xfs_bmbt_irec_t prev; /* previous file extent record */ | 4826 | xfs_bmbt_irec_t prev; /* previous file extent record */ |
@@ -6204,7 +6220,7 @@ xfs_bmap_get_bp( | |||
6204 | return(bp); | 6220 | return(bp); |
6205 | } | 6221 | } |
6206 | 6222 | ||
6207 | void | 6223 | STATIC void |
6208 | xfs_check_block( | 6224 | xfs_check_block( |
6209 | struct xfs_btree_block *block, | 6225 | struct xfs_btree_block *block, |
6210 | xfs_mount_t *mp, | 6226 | xfs_mount_t *mp, |
@@ -6494,7 +6510,7 @@ xfs_bmap_count_tree( | |||
6494 | block = XFS_BUF_TO_BLOCK(bp); | 6510 | block = XFS_BUF_TO_BLOCK(bp); |
6495 | 6511 | ||
6496 | if (--level) { | 6512 | if (--level) { |
6497 | /* Not at node above leafs, count this level of nodes */ | 6513 | /* Not at node above leaves, count this level of nodes */ |
6498 | nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); | 6514 | nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); |
6499 | while (nextbno != NULLFSBLOCK) { | 6515 | while (nextbno != NULLFSBLOCK) { |
6500 | if ((error = xfs_btree_read_bufl(mp, tp, nextbno, | 6516 | if ((error = xfs_btree_read_bufl(mp, tp, nextbno, |
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index be2979d88d32..1b8ff9256bd0 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -125,7 +125,7 @@ typedef struct xfs_bmalloca { | |||
125 | struct xfs_bmbt_irec *gotp; /* extent after, or delayed */ | 125 | struct xfs_bmbt_irec *gotp; /* extent after, or delayed */ |
126 | xfs_extlen_t alen; /* i/o length asked/allocated */ | 126 | xfs_extlen_t alen; /* i/o length asked/allocated */ |
127 | xfs_extlen_t total; /* total blocks needed for xaction */ | 127 | xfs_extlen_t total; /* total blocks needed for xaction */ |
128 | xfs_extlen_t minlen; /* mininum allocation size (blocks) */ | 128 | xfs_extlen_t minlen; /* minimum allocation size (blocks) */ |
129 | xfs_extlen_t minleft; /* amount must be left after alloc */ | 129 | xfs_extlen_t minleft; /* amount must be left after alloc */ |
130 | char eof; /* set if allocating past last extent */ | 130 | char eof; /* set if allocating past last extent */ |
131 | char wasdel; /* replacing a delayed allocation */ | 131 | char wasdel; /* replacing a delayed allocation */ |
@@ -338,6 +338,10 @@ xfs_check_nostate_extents( | |||
338 | xfs_extnum_t idx, | 338 | xfs_extnum_t idx, |
339 | xfs_extnum_t num); | 339 | xfs_extnum_t num); |
340 | 340 | ||
341 | uint | ||
342 | xfs_default_attroffset( | ||
343 | struct xfs_inode *ip); | ||
344 | |||
341 | #ifdef __KERNEL__ | 345 | #ifdef __KERNEL__ |
342 | 346 | ||
343 | /* | 347 | /* |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e73c332eb23f..e9df99574829 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -1883,7 +1883,7 @@ xfs_btree_lshift( | |||
1883 | 1883 | ||
1884 | /* | 1884 | /* |
1885 | * We add one entry to the left side and remove one for the right side. | 1885 | * We add one entry to the left side and remove one for the right side. |
1886 | * Accout for it here, the changes will be updated on disk and logged | 1886 | * Account for it here, the changes will be updated on disk and logged |
1887 | * later. | 1887 | * later. |
1888 | */ | 1888 | */ |
1889 | lrecs++; | 1889 | lrecs++; |
@@ -3535,7 +3535,7 @@ xfs_btree_delrec( | |||
3535 | XFS_BTREE_STATS_INC(cur, join); | 3535 | XFS_BTREE_STATS_INC(cur, join); |
3536 | 3536 | ||
3537 | /* | 3537 | /* |
3538 | * Fix up the the number of records and right block pointer in the | 3538 | * Fix up the number of records and right block pointer in the |
3539 | * surviving block, and log it. | 3539 | * surviving block, and log it. |
3540 | */ | 3540 | */ |
3541 | xfs_btree_set_numrecs(left, lrecs + rrecs); | 3541 | xfs_btree_set_numrecs(left, lrecs + rrecs); |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 789fffdf8b2f..4f852b735b96 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -41,7 +41,7 @@ extern kmem_zone_t *xfs_btree_cur_zone; | |||
41 | /* | 41 | /* |
42 | * Generic btree header. | 42 | * Generic btree header. |
43 | * | 43 | * |
44 | * This is a comination of the actual format used on disk for short and long | 44 | * This is a combination of the actual format used on disk for short and long |
45 | * format btrees. The first three fields are shared by both format, but | 45 | * format btrees. The first three fields are shared by both format, but |
46 | * the pointers are different and should be used with care. | 46 | * the pointers are different and should be used with care. |
47 | * | 47 | * |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index c45f74ff1a5b..9ff6e57a5075 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -1503,7 +1503,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, | |||
1503 | * This is implemented with some source-level loop unrolling. | 1503 | * This is implemented with some source-level loop unrolling. |
1504 | */ | 1504 | */ |
1505 | xfs_dahash_t | 1505 | xfs_dahash_t |
1506 | xfs_da_hashname(const uchar_t *name, int namelen) | 1506 | xfs_da_hashname(const __uint8_t *name, int namelen) |
1507 | { | 1507 | { |
1508 | xfs_dahash_t hash; | 1508 | xfs_dahash_t hash; |
1509 | 1509 | ||
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 70b710c1792d..8c536167bf75 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h | |||
@@ -91,9 +91,9 @@ enum xfs_dacmp { | |||
91 | * Structure to ease passing around component names. | 91 | * Structure to ease passing around component names. |
92 | */ | 92 | */ |
93 | typedef struct xfs_da_args { | 93 | typedef struct xfs_da_args { |
94 | const uchar_t *name; /* string (maybe not NULL terminated) */ | 94 | const __uint8_t *name; /* string (maybe not NULL terminated) */ |
95 | int namelen; /* length of string (maybe no NULL) */ | 95 | int namelen; /* length of string (maybe no NULL) */ |
96 | uchar_t *value; /* set of bytes (maybe contain NULLs) */ | 96 | __uint8_t *value; /* set of bytes (maybe contain NULLs) */ |
97 | int valuelen; /* length of value */ | 97 | int valuelen; /* length of value */ |
98 | int flags; /* argument flags (eg: ATTR_NOCREATE) */ | 98 | int flags; /* argument flags (eg: ATTR_NOCREATE) */ |
99 | xfs_dahash_t hashval; /* hash value of name */ | 99 | xfs_dahash_t hashval; /* hash value of name */ |
@@ -185,7 +185,7 @@ typedef struct xfs_da_state { | |||
185 | unsigned char inleaf; /* insert into 1->lf, 0->splf */ | 185 | unsigned char inleaf; /* insert into 1->lf, 0->splf */ |
186 | unsigned char extravalid; /* T/F: extrablk is in use */ | 186 | unsigned char extravalid; /* T/F: extrablk is in use */ |
187 | unsigned char extraafter; /* T/F: extrablk is after new */ | 187 | unsigned char extraafter; /* T/F: extrablk is after new */ |
188 | xfs_da_state_blk_t extrablk; /* for double-splits on leafs */ | 188 | xfs_da_state_blk_t extrablk; /* for double-splits on leaves */ |
189 | /* for dirv2 extrablk is data */ | 189 | /* for dirv2 extrablk is data */ |
190 | } xfs_da_state_t; | 190 | } xfs_da_state_t; |
191 | 191 | ||
@@ -251,7 +251,7 @@ xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, | |||
251 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, | 251 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, |
252 | xfs_dabuf_t *dead_buf); | 252 | xfs_dabuf_t *dead_buf); |
253 | 253 | ||
254 | uint xfs_da_hashname(const uchar_t *name_string, int name_length); | 254 | uint xfs_da_hashname(const __uint8_t *name_string, int name_length); |
255 | enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, | 255 | enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, |
256 | const char *name, int len); | 256 | const char *name, int len); |
257 | 257 | ||
@@ -268,5 +268,6 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf); | |||
268 | 268 | ||
269 | extern struct kmem_zone *xfs_da_state_zone; | 269 | extern struct kmem_zone *xfs_da_state_zone; |
270 | extern struct kmem_zone *xfs_dabuf_zone; | 270 | extern struct kmem_zone *xfs_dabuf_zone; |
271 | extern const struct xfs_nameops xfs_default_nameops; | ||
271 | 272 | ||
272 | #endif /* __XFS_DA_BTREE_H__ */ | 273 | #endif /* __XFS_DA_BTREE_H__ */ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index f8278cfcc1d3..e6d839bddbf0 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -79,6 +79,12 @@ xfs_swapext( | |||
79 | goto out_put_target_file; | 79 | goto out_put_target_file; |
80 | } | 80 | } |
81 | 81 | ||
82 | if (IS_SWAPFILE(file->f_path.dentry->d_inode) || | ||
83 | IS_SWAPFILE(target_file->f_path.dentry->d_inode)) { | ||
84 | error = XFS_ERROR(EINVAL); | ||
85 | goto out_put_target_file; | ||
86 | } | ||
87 | |||
82 | ip = XFS_I(file->f_path.dentry->d_inode); | 88 | ip = XFS_I(file->f_path.dentry->d_inode); |
83 | tip = XFS_I(target_file->f_path.dentry->d_inode); | 89 | tip = XFS_I(target_file->f_path.dentry->d_inode); |
84 | 90 | ||
@@ -118,19 +124,17 @@ xfs_swap_extents( | |||
118 | xfs_bstat_t *sbp = &sxp->sx_stat; | 124 | xfs_bstat_t *sbp = &sxp->sx_stat; |
119 | xfs_ifork_t *tempifp, *ifp, *tifp; | 125 | xfs_ifork_t *tempifp, *ifp, *tifp; |
120 | int ilf_fields, tilf_fields; | 126 | int ilf_fields, tilf_fields; |
121 | static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; | ||
122 | int error = 0; | 127 | int error = 0; |
123 | int aforkblks = 0; | 128 | int aforkblks = 0; |
124 | int taforkblks = 0; | 129 | int taforkblks = 0; |
125 | __uint64_t tmp; | 130 | __uint64_t tmp; |
126 | char locked = 0; | ||
127 | 131 | ||
128 | mp = ip->i_mount; | 132 | mp = ip->i_mount; |
129 | 133 | ||
130 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); | 134 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); |
131 | if (!tempifp) { | 135 | if (!tempifp) { |
132 | error = XFS_ERROR(ENOMEM); | 136 | error = XFS_ERROR(ENOMEM); |
133 | goto error0; | 137 | goto out; |
134 | } | 138 | } |
135 | 139 | ||
136 | sbp = &sxp->sx_stat; | 140 | sbp = &sxp->sx_stat; |
@@ -143,25 +147,24 @@ xfs_swap_extents( | |||
143 | */ | 147 | */ |
144 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | 148 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); |
145 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | 149 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); |
146 | locked = 1; | ||
147 | 150 | ||
148 | /* Verify that both files have the same format */ | 151 | /* Verify that both files have the same format */ |
149 | if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { | 152 | if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { |
150 | error = XFS_ERROR(EINVAL); | 153 | error = XFS_ERROR(EINVAL); |
151 | goto error0; | 154 | goto out_unlock; |
152 | } | 155 | } |
153 | 156 | ||
154 | /* Verify both files are either real-time or non-realtime */ | 157 | /* Verify both files are either real-time or non-realtime */ |
155 | if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { | 158 | if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { |
156 | error = XFS_ERROR(EINVAL); | 159 | error = XFS_ERROR(EINVAL); |
157 | goto error0; | 160 | goto out_unlock; |
158 | } | 161 | } |
159 | 162 | ||
160 | /* Should never get a local format */ | 163 | /* Should never get a local format */ |
161 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || | 164 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || |
162 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | 165 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
163 | error = XFS_ERROR(EINVAL); | 166 | error = XFS_ERROR(EINVAL); |
164 | goto error0; | 167 | goto out_unlock; |
165 | } | 168 | } |
166 | 169 | ||
167 | if (VN_CACHED(VFS_I(tip)) != 0) { | 170 | if (VN_CACHED(VFS_I(tip)) != 0) { |
@@ -169,13 +172,13 @@ xfs_swap_extents( | |||
169 | error = xfs_flushinval_pages(tip, 0, -1, | 172 | error = xfs_flushinval_pages(tip, 0, -1, |
170 | FI_REMAPF_LOCKED); | 173 | FI_REMAPF_LOCKED); |
171 | if (error) | 174 | if (error) |
172 | goto error0; | 175 | goto out_unlock; |
173 | } | 176 | } |
174 | 177 | ||
175 | /* Verify O_DIRECT for ftmp */ | 178 | /* Verify O_DIRECT for ftmp */ |
176 | if (VN_CACHED(VFS_I(tip)) != 0) { | 179 | if (VN_CACHED(VFS_I(tip)) != 0) { |
177 | error = XFS_ERROR(EINVAL); | 180 | error = XFS_ERROR(EINVAL); |
178 | goto error0; | 181 | goto out_unlock; |
179 | } | 182 | } |
180 | 183 | ||
181 | /* Verify all data are being swapped */ | 184 | /* Verify all data are being swapped */ |
@@ -183,7 +186,7 @@ xfs_swap_extents( | |||
183 | sxp->sx_length != ip->i_d.di_size || | 186 | sxp->sx_length != ip->i_d.di_size || |
184 | sxp->sx_length != tip->i_d.di_size) { | 187 | sxp->sx_length != tip->i_d.di_size) { |
185 | error = XFS_ERROR(EFAULT); | 188 | error = XFS_ERROR(EFAULT); |
186 | goto error0; | 189 | goto out_unlock; |
187 | } | 190 | } |
188 | 191 | ||
189 | /* | 192 | /* |
@@ -193,7 +196,7 @@ xfs_swap_extents( | |||
193 | */ | 196 | */ |
194 | if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { | 197 | if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { |
195 | error = XFS_ERROR(EINVAL); | 198 | error = XFS_ERROR(EINVAL); |
196 | goto error0; | 199 | goto out_unlock; |
197 | } | 200 | } |
198 | 201 | ||
199 | /* | 202 | /* |
@@ -208,7 +211,7 @@ xfs_swap_extents( | |||
208 | (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || | 211 | (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || |
209 | (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { | 212 | (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { |
210 | error = XFS_ERROR(EBUSY); | 213 | error = XFS_ERROR(EBUSY); |
211 | goto error0; | 214 | goto out_unlock; |
212 | } | 215 | } |
213 | 216 | ||
214 | /* We need to fail if the file is memory mapped. Once we have tossed | 217 | /* We need to fail if the file is memory mapped. Once we have tossed |
@@ -219,7 +222,7 @@ xfs_swap_extents( | |||
219 | */ | 222 | */ |
220 | if (VN_MAPPED(VFS_I(ip))) { | 223 | if (VN_MAPPED(VFS_I(ip))) { |
221 | error = XFS_ERROR(EBUSY); | 224 | error = XFS_ERROR(EBUSY); |
222 | goto error0; | 225 | goto out_unlock; |
223 | } | 226 | } |
224 | 227 | ||
225 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 228 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
@@ -242,8 +245,7 @@ xfs_swap_extents( | |||
242 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 245 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
243 | xfs_iunlock(tip, XFS_IOLOCK_EXCL); | 246 | xfs_iunlock(tip, XFS_IOLOCK_EXCL); |
244 | xfs_trans_cancel(tp, 0); | 247 | xfs_trans_cancel(tp, 0); |
245 | locked = 0; | 248 | goto out; |
246 | goto error0; | ||
247 | } | 249 | } |
248 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | 250 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); |
249 | 251 | ||
@@ -253,19 +255,15 @@ xfs_swap_extents( | |||
253 | if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && | 255 | if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && |
254 | (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { | 256 | (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { |
255 | error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); | 257 | error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); |
256 | if (error) { | 258 | if (error) |
257 | xfs_trans_cancel(tp, 0); | 259 | goto out_trans_cancel; |
258 | goto error0; | ||
259 | } | ||
260 | } | 260 | } |
261 | if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && | 261 | if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && |
262 | (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { | 262 | (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { |
263 | error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, | 263 | error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, |
264 | &taforkblks); | 264 | &taforkblks); |
265 | if (error) { | 265 | if (error) |
266 | xfs_trans_cancel(tp, 0); | 266 | goto out_trans_cancel; |
267 | goto error0; | ||
268 | } | ||
269 | } | 267 | } |
270 | 268 | ||
271 | /* | 269 | /* |
@@ -332,10 +330,10 @@ xfs_swap_extents( | |||
332 | 330 | ||
333 | 331 | ||
334 | IHOLD(ip); | 332 | IHOLD(ip); |
335 | xfs_trans_ijoin(tp, ip, lock_flags); | 333 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
336 | 334 | ||
337 | IHOLD(tip); | 335 | IHOLD(tip); |
338 | xfs_trans_ijoin(tp, tip, lock_flags); | 336 | xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
339 | 337 | ||
340 | xfs_trans_log_inode(tp, ip, ilf_fields); | 338 | xfs_trans_log_inode(tp, ip, ilf_fields); |
341 | xfs_trans_log_inode(tp, tip, tilf_fields); | 339 | xfs_trans_log_inode(tp, tip, tilf_fields); |
@@ -344,19 +342,19 @@ xfs_swap_extents( | |||
344 | * If this is a synchronous mount, make sure that the | 342 | * If this is a synchronous mount, make sure that the |
345 | * transaction goes to disk before returning to the user. | 343 | * transaction goes to disk before returning to the user. |
346 | */ | 344 | */ |
347 | if (mp->m_flags & XFS_MOUNT_WSYNC) { | 345 | if (mp->m_flags & XFS_MOUNT_WSYNC) |
348 | xfs_trans_set_sync(tp); | 346 | xfs_trans_set_sync(tp); |
349 | } | ||
350 | 347 | ||
351 | error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); | 348 | error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); |
352 | locked = 0; | ||
353 | 349 | ||
354 | error0: | 350 | out_unlock: |
355 | if (locked) { | 351 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
356 | xfs_iunlock(ip, lock_flags); | 352 | xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
357 | xfs_iunlock(tip, lock_flags); | 353 | out: |
358 | } | 354 | kmem_free(tempifp); |
359 | if (tempifp != NULL) | ||
360 | kmem_free(tempifp); | ||
361 | return error; | 355 | return error; |
356 | |||
357 | out_trans_cancel: | ||
358 | xfs_trans_cancel(tp, 0); | ||
359 | goto out_unlock; | ||
362 | } | 360 | } |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 162e8726df5e..e5b153b2e6a3 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -103,7 +103,9 @@ typedef enum xfs_dinode_fmt { | |||
103 | /* | 103 | /* |
104 | * Inode size for given fs. | 104 | * Inode size for given fs. |
105 | */ | 105 | */ |
106 | #define XFS_LITINO(mp) ((mp)->m_litino) | 106 | #define XFS_LITINO(mp) \ |
107 | ((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode))) | ||
108 | |||
107 | #define XFS_BROOT_SIZE_ADJ \ | 109 | #define XFS_BROOT_SIZE_ADJ \ |
108 | (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t)) | 110 | (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t)) |
109 | 111 | ||
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 1afb12278b8d..c657bec6d951 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -46,8 +46,6 @@ | |||
46 | 46 | ||
47 | struct xfs_name xfs_name_dotdot = {"..", 2}; | 47 | struct xfs_name xfs_name_dotdot = {"..", 2}; |
48 | 48 | ||
49 | extern const struct xfs_nameops xfs_default_nameops; | ||
50 | |||
51 | /* | 49 | /* |
52 | * ASCII case-insensitive (ie. A-Z) support for directories that was | 50 | * ASCII case-insensitive (ie. A-Z) support for directories that was |
53 | * used in IRIX. | 51 | * used in IRIX. |
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index e1f0a06aaf04..ab52e9e1c1ee 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
@@ -448,7 +448,6 @@ xfs_dir2_block_getdents( | |||
448 | xfs_mount_t *mp; /* filesystem mount point */ | 448 | xfs_mount_t *mp; /* filesystem mount point */ |
449 | char *ptr; /* current data entry */ | 449 | char *ptr; /* current data entry */ |
450 | int wantoff; /* starting block offset */ | 450 | int wantoff; /* starting block offset */ |
451 | xfs_ino_t ino; | ||
452 | xfs_off_t cook; | 451 | xfs_off_t cook; |
453 | 452 | ||
454 | mp = dp->i_mount; | 453 | mp = dp->i_mount; |
@@ -509,16 +508,12 @@ xfs_dir2_block_getdents( | |||
509 | 508 | ||
510 | cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, | 509 | cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, |
511 | (char *)dep - (char *)block); | 510 | (char *)dep - (char *)block); |
512 | ino = be64_to_cpu(dep->inumber); | ||
513 | #if XFS_BIG_INUMS | ||
514 | ino += mp->m_inoadd; | ||
515 | #endif | ||
516 | 511 | ||
517 | /* | 512 | /* |
518 | * If it didn't fit, set the final offset to here & return. | 513 | * If it didn't fit, set the final offset to here & return. |
519 | */ | 514 | */ |
520 | if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, | 515 | if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, |
521 | ino, DT_UNKNOWN)) { | 516 | be64_to_cpu(dep->inumber), DT_UNKNOWN)) { |
522 | *offset = cook & 0x7fffffff; | 517 | *offset = cook & 0x7fffffff; |
523 | xfs_da_brelse(NULL, bp); | 518 | xfs_da_brelse(NULL, bp); |
524 | return 0; | 519 | return 0; |
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h index b816e0252739..efbc290c7fec 100644 --- a/fs/xfs/xfs_dir2_data.h +++ b/fs/xfs/xfs_dir2_data.h | |||
@@ -38,7 +38,7 @@ struct xfs_trans; | |||
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Directory address space divided into sections, | 40 | * Directory address space divided into sections, |
41 | * spaces separated by 32gb. | 41 | * spaces separated by 32GB. |
42 | */ | 42 | */ |
43 | #define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) | 43 | #define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) |
44 | #define XFS_DIR2_DATA_SPACE 0 | 44 | #define XFS_DIR2_DATA_SPACE 0 |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index ef805a374eec..fa913e459442 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -549,7 +549,7 @@ xfs_dir2_leaf_addname( | |||
549 | * Check the internal consistency of a leaf1 block. | 549 | * Check the internal consistency of a leaf1 block. |
550 | * Pop an assert if something is wrong. | 550 | * Pop an assert if something is wrong. |
551 | */ | 551 | */ |
552 | void | 552 | STATIC void |
553 | xfs_dir2_leaf_check( | 553 | xfs_dir2_leaf_check( |
554 | xfs_inode_t *dp, /* incore directory inode */ | 554 | xfs_inode_t *dp, /* incore directory inode */ |
555 | xfs_dabuf_t *bp) /* leaf's buffer */ | 555 | xfs_dabuf_t *bp) /* leaf's buffer */ |
@@ -780,7 +780,6 @@ xfs_dir2_leaf_getdents( | |||
780 | int ra_index; /* *map index for read-ahead */ | 780 | int ra_index; /* *map index for read-ahead */ |
781 | int ra_offset; /* map entry offset for ra */ | 781 | int ra_offset; /* map entry offset for ra */ |
782 | int ra_want; /* readahead count wanted */ | 782 | int ra_want; /* readahead count wanted */ |
783 | xfs_ino_t ino; | ||
784 | 783 | ||
785 | /* | 784 | /* |
786 | * If the offset is at or past the largest allowed value, | 785 | * If the offset is at or past the largest allowed value, |
@@ -1076,24 +1075,12 @@ xfs_dir2_leaf_getdents( | |||
1076 | continue; | 1075 | continue; |
1077 | } | 1076 | } |
1078 | 1077 | ||
1079 | /* | ||
1080 | * Copy the entry into the putargs, and try formatting it. | ||
1081 | */ | ||
1082 | dep = (xfs_dir2_data_entry_t *)ptr; | 1078 | dep = (xfs_dir2_data_entry_t *)ptr; |
1083 | |||
1084 | length = xfs_dir2_data_entsize(dep->namelen); | 1079 | length = xfs_dir2_data_entsize(dep->namelen); |
1085 | 1080 | ||
1086 | ino = be64_to_cpu(dep->inumber); | ||
1087 | #if XFS_BIG_INUMS | ||
1088 | ino += mp->m_inoadd; | ||
1089 | #endif | ||
1090 | |||
1091 | /* | ||
1092 | * Won't fit. Return to caller. | ||
1093 | */ | ||
1094 | if (filldir(dirent, dep->name, dep->namelen, | 1081 | if (filldir(dirent, dep->name, dep->namelen, |
1095 | xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, | 1082 | xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, |
1096 | ino, DT_UNKNOWN)) | 1083 | be64_to_cpu(dep->inumber), DT_UNKNOWN)) |
1097 | break; | 1084 | break; |
1098 | 1085 | ||
1099 | /* | 1086 | /* |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index fa6c3a5ddbc6..5a81ccd1045b 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -1104,7 +1104,7 @@ xfs_dir2_leafn_remove( | |||
1104 | } | 1104 | } |
1105 | xfs_dir2_leafn_check(dp, bp); | 1105 | xfs_dir2_leafn_check(dp, bp); |
1106 | /* | 1106 | /* |
1107 | * Return indication of whether this leaf block is emtpy enough | 1107 | * Return indication of whether this leaf block is empty enough |
1108 | * to justify trying to join it with a neighbor. | 1108 | * to justify trying to join it with a neighbor. |
1109 | */ | 1109 | */ |
1110 | *rval = | 1110 | *rval = |
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index a8a8a6efad5b..e89734e84646 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c | |||
@@ -748,11 +748,7 @@ xfs_dir2_sf_getdents( | |||
748 | * Put . entry unless we're starting past it. | 748 | * Put . entry unless we're starting past it. |
749 | */ | 749 | */ |
750 | if (*offset <= dot_offset) { | 750 | if (*offset <= dot_offset) { |
751 | ino = dp->i_ino; | 751 | if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, dp->i_ino, DT_DIR)) { |
752 | #if XFS_BIG_INUMS | ||
753 | ino += mp->m_inoadd; | ||
754 | #endif | ||
755 | if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, ino, DT_DIR)) { | ||
756 | *offset = dot_offset & 0x7fffffff; | 752 | *offset = dot_offset & 0x7fffffff; |
757 | return 0; | 753 | return 0; |
758 | } | 754 | } |
@@ -763,9 +759,6 @@ xfs_dir2_sf_getdents( | |||
763 | */ | 759 | */ |
764 | if (*offset <= dotdot_offset) { | 760 | if (*offset <= dotdot_offset) { |
765 | ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); | 761 | ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); |
766 | #if XFS_BIG_INUMS | ||
767 | ino += mp->m_inoadd; | ||
768 | #endif | ||
769 | if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { | 762 | if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { |
770 | *offset = dotdot_offset & 0x7fffffff; | 763 | *offset = dotdot_offset & 0x7fffffff; |
771 | return 0; | 764 | return 0; |
@@ -786,10 +779,6 @@ xfs_dir2_sf_getdents( | |||
786 | } | 779 | } |
787 | 780 | ||
788 | ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); | 781 | ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); |
789 | #if XFS_BIG_INUMS | ||
790 | ino += mp->m_inoadd; | ||
791 | #endif | ||
792 | |||
793 | if (filldir(dirent, sfep->name, sfep->namelen, | 782 | if (filldir(dirent, sfep->name, sfep->namelen, |
794 | off & 0x7fffffff, ino, DT_UNKNOWN)) { | 783 | off & 0x7fffffff, ino, DT_UNKNOWN)) { |
795 | *offset = off & 0x7fffffff; | 784 | *offset = off & 0x7fffffff; |
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 2f049f63e85f..0d22c56fdf64 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h | |||
@@ -33,12 +33,10 @@ typedef struct xfs_extent { | |||
33 | * conversion routine. | 33 | * conversion routine. |
34 | */ | 34 | */ |
35 | 35 | ||
36 | #ifndef HAVE_FORMAT32 | ||
37 | typedef struct xfs_extent_32 { | 36 | typedef struct xfs_extent_32 { |
38 | __uint64_t ext_start; | 37 | __uint64_t ext_start; |
39 | __uint32_t ext_len; | 38 | __uint32_t ext_len; |
40 | } __attribute__((packed)) xfs_extent_32_t; | 39 | } __attribute__((packed)) xfs_extent_32_t; |
41 | #endif | ||
42 | 40 | ||
43 | typedef struct xfs_extent_64 { | 41 | typedef struct xfs_extent_64 { |
44 | __uint64_t ext_start; | 42 | __uint64_t ext_start; |
@@ -59,7 +57,6 @@ typedef struct xfs_efi_log_format { | |||
59 | xfs_extent_t efi_extents[1]; /* array of extents to free */ | 57 | xfs_extent_t efi_extents[1]; /* array of extents to free */ |
60 | } xfs_efi_log_format_t; | 58 | } xfs_efi_log_format_t; |
61 | 59 | ||
62 | #ifndef HAVE_FORMAT32 | ||
63 | typedef struct xfs_efi_log_format_32 { | 60 | typedef struct xfs_efi_log_format_32 { |
64 | __uint16_t efi_type; /* efi log item type */ | 61 | __uint16_t efi_type; /* efi log item type */ |
65 | __uint16_t efi_size; /* size of this item */ | 62 | __uint16_t efi_size; /* size of this item */ |
@@ -67,7 +64,6 @@ typedef struct xfs_efi_log_format_32 { | |||
67 | __uint64_t efi_id; /* efi identifier */ | 64 | __uint64_t efi_id; /* efi identifier */ |
68 | xfs_extent_32_t efi_extents[1]; /* array of extents to free */ | 65 | xfs_extent_32_t efi_extents[1]; /* array of extents to free */ |
69 | } __attribute__((packed)) xfs_efi_log_format_32_t; | 66 | } __attribute__((packed)) xfs_efi_log_format_32_t; |
70 | #endif | ||
71 | 67 | ||
72 | typedef struct xfs_efi_log_format_64 { | 68 | typedef struct xfs_efi_log_format_64 { |
73 | __uint16_t efi_type; /* efi log item type */ | 69 | __uint16_t efi_type; /* efi log item type */ |
@@ -90,7 +86,6 @@ typedef struct xfs_efd_log_format { | |||
90 | xfs_extent_t efd_extents[1]; /* array of extents freed */ | 86 | xfs_extent_t efd_extents[1]; /* array of extents freed */ |
91 | } xfs_efd_log_format_t; | 87 | } xfs_efd_log_format_t; |
92 | 88 | ||
93 | #ifndef HAVE_FORMAT32 | ||
94 | typedef struct xfs_efd_log_format_32 { | 89 | typedef struct xfs_efd_log_format_32 { |
95 | __uint16_t efd_type; /* efd log item type */ | 90 | __uint16_t efd_type; /* efd log item type */ |
96 | __uint16_t efd_size; /* size of this item */ | 91 | __uint16_t efd_size; /* size of this item */ |
@@ -98,7 +93,6 @@ typedef struct xfs_efd_log_format_32 { | |||
98 | __uint64_t efd_efi_id; /* id of corresponding efi */ | 93 | __uint64_t efd_efi_id; /* id of corresponding efi */ |
99 | xfs_extent_32_t efd_extents[1]; /* array of extents freed */ | 94 | xfs_extent_32_t efd_extents[1]; /* array of extents freed */ |
100 | } __attribute__((packed)) xfs_efd_log_format_32_t; | 95 | } __attribute__((packed)) xfs_efd_log_format_32_t; |
101 | #endif | ||
102 | 96 | ||
103 | typedef struct xfs_efd_log_format_64 { | 97 | typedef struct xfs_efd_log_format_64 { |
104 | __uint16_t efd_type; /* efd log item type */ | 98 | __uint16_t efd_type; /* efd log item type */ |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index f3bb75da384e..6c87c8f304ef 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -140,7 +140,7 @@ _xfs_filestream_pick_ag( | |||
140 | xfs_extlen_t minlen) | 140 | xfs_extlen_t minlen) |
141 | { | 141 | { |
142 | int err, trylock, nscan; | 142 | int err, trylock, nscan; |
143 | xfs_extlen_t delta, longest, need, free, minfree, maxfree = 0; | 143 | xfs_extlen_t longest, free, minfree, maxfree = 0; |
144 | xfs_agnumber_t ag, max_ag = NULLAGNUMBER; | 144 | xfs_agnumber_t ag, max_ag = NULLAGNUMBER; |
145 | struct xfs_perag *pag; | 145 | struct xfs_perag *pag; |
146 | 146 | ||
@@ -186,12 +186,7 @@ _xfs_filestream_pick_ag( | |||
186 | goto next_ag; | 186 | goto next_ag; |
187 | } | 187 | } |
188 | 188 | ||
189 | need = XFS_MIN_FREELIST_PAG(pag, mp); | 189 | longest = xfs_alloc_longest_free_extent(mp, pag); |
190 | delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; | ||
191 | longest = (pag->pagf_longest > delta) ? | ||
192 | (pag->pagf_longest - delta) : | ||
193 | (pag->pagf_flcount > 0 || pag->pagf_longest > 0); | ||
194 | |||
195 | if (((minlen && longest >= minlen) || | 190 | if (((minlen && longest >= minlen) || |
196 | (!minlen && pag->pagf_freeblks >= minfree)) && | 191 | (!minlen && pag->pagf_freeblks >= minfree)) && |
197 | (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || | 192 | (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 680d0e0ec932..8379e3bca26c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -576,7 +576,7 @@ out: | |||
576 | if (fdblks_delta) { | 576 | if (fdblks_delta) { |
577 | /* | 577 | /* |
578 | * If we are putting blocks back here, m_resblks_avail is | 578 | * If we are putting blocks back here, m_resblks_avail is |
579 | * already at it's max so this will put it in the free pool. | 579 | * already at its max so this will put it in the free pool. |
580 | * | 580 | * |
581 | * If we need space, we'll either succeed in getting it | 581 | * If we need space, we'll either succeed in getting it |
582 | * from the free block count or we'll get an enospc. If | 582 | * from the free block count or we'll get an enospc. If |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index ab016e5ae7be..3120a3a5e20f 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -230,7 +230,7 @@ xfs_ialloc_ag_alloc( | |||
230 | args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; | 230 | args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; |
231 | 231 | ||
232 | /* Allow space for the inode btree to split. */ | 232 | /* Allow space for the inode btree to split. */ |
233 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; | 233 | args.minleft = args.mp->m_in_maxlevels - 1; |
234 | if ((error = xfs_alloc_vextent(&args))) | 234 | if ((error = xfs_alloc_vextent(&args))) |
235 | return error; | 235 | return error; |
236 | } else | 236 | } else |
@@ -270,7 +270,7 @@ xfs_ialloc_ag_alloc( | |||
270 | /* | 270 | /* |
271 | * Allow space for the inode btree to split. | 271 | * Allow space for the inode btree to split. |
272 | */ | 272 | */ |
273 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; | 273 | args.minleft = args.mp->m_in_maxlevels - 1; |
274 | if ((error = xfs_alloc_vextent(&args))) | 274 | if ((error = xfs_alloc_vextent(&args))) |
275 | return error; | 275 | return error; |
276 | } | 276 | } |
@@ -349,7 +349,7 @@ xfs_ialloc_ag_alloc( | |||
349 | * Initialize all inodes in this buffer and then log them. | 349 | * Initialize all inodes in this buffer and then log them. |
350 | * | 350 | * |
351 | * XXX: It would be much better if we had just one transaction to | 351 | * XXX: It would be much better if we had just one transaction to |
352 | * log a whole cluster of inodes instead of all the indivdual | 352 | * log a whole cluster of inodes instead of all the individual |
353 | * transactions causing a lot of log traffic. | 353 | * transactions causing a lot of log traffic. |
354 | */ | 354 | */ |
355 | xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); | 355 | xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); |
@@ -943,7 +943,7 @@ nextag: | |||
943 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % | 943 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % |
944 | XFS_INODES_PER_CHUNK) == 0); | 944 | XFS_INODES_PER_CHUNK) == 0); |
945 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | 945 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); |
946 | XFS_INOBT_CLR_FREE(&rec, offset); | 946 | rec.ir_free &= ~XFS_INOBT_MASK(offset); |
947 | rec.ir_freecount--; | 947 | rec.ir_freecount--; |
948 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, | 948 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, |
949 | rec.ir_free))) | 949 | rec.ir_free))) |
@@ -1105,11 +1105,11 @@ xfs_difree( | |||
1105 | */ | 1105 | */ |
1106 | off = agino - rec.ir_startino; | 1106 | off = agino - rec.ir_startino; |
1107 | ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); | 1107 | ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); |
1108 | ASSERT(!XFS_INOBT_IS_FREE(&rec, off)); | 1108 | ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); |
1109 | /* | 1109 | /* |
1110 | * Mark the inode free & increment the count. | 1110 | * Mark the inode free & increment the count. |
1111 | */ | 1111 | */ |
1112 | XFS_INOBT_SET_FREE(&rec, off); | 1112 | rec.ir_free |= XFS_INOBT_MASK(off); |
1113 | rec.ir_freecount++; | 1113 | rec.ir_freecount++; |
1114 | 1114 | ||
1115 | /* | 1115 | /* |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 99f2408e8d8e..c282a9af5393 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
@@ -164,7 +164,7 @@ xfs_inobt_init_rec_from_cur( | |||
164 | } | 164 | } |
165 | 165 | ||
166 | /* | 166 | /* |
167 | * intial value of ptr for lookup | 167 | * initial value of ptr for lookup |
168 | */ | 168 | */ |
169 | STATIC void | 169 | STATIC void |
170 | xfs_inobt_init_ptr_from_cur( | 170 | xfs_inobt_init_ptr_from_cur( |
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 5580e255ff06..f782ad0c4769 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h | |||
@@ -32,14 +32,14 @@ struct xfs_mount; | |||
32 | #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ | 32 | #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ |
33 | 33 | ||
34 | typedef __uint64_t xfs_inofree_t; | 34 | typedef __uint64_t xfs_inofree_t; |
35 | #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) | 35 | #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) |
36 | #define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) | 36 | #define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) |
37 | #define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) | 37 | #define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) |
38 | #define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) | ||
38 | 39 | ||
39 | static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) | 40 | static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) |
40 | { | 41 | { |
41 | return (((n) >= XFS_INODES_PER_CHUNK ? \ | 42 | return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; |
42 | (xfs_inofree_t)0 : ((xfs_inofree_t)1 << (n))) - 1) << (i); | ||
43 | } | 43 | } |
44 | 44 | ||
45 | /* | 45 | /* |
@@ -69,20 +69,6 @@ typedef struct xfs_inobt_key { | |||
69 | typedef __be32 xfs_inobt_ptr_t; | 69 | typedef __be32 xfs_inobt_ptr_t; |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * Bit manipulations for ir_free. | ||
73 | */ | ||
74 | #define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) | ||
75 | #define XFS_INOBT_IS_FREE(rp,i) \ | ||
76 | (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) | ||
77 | #define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i)) | ||
78 | #define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) | ||
79 | |||
80 | /* | ||
81 | * Maximum number of inode btree levels. | ||
82 | */ | ||
83 | #define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels) | ||
84 | |||
85 | /* | ||
86 | * block numbers in the AG. | 72 | * block numbers in the AG. |
87 | */ | 73 | */ |
88 | #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) | 74 | #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1f175fa34b22..f879c1bc4b96 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -122,7 +122,7 @@ typedef struct xfs_ictimestamp { | |||
122 | 122 | ||
123 | /* | 123 | /* |
124 | * NOTE: This structure must be kept identical to struct xfs_dinode | 124 | * NOTE: This structure must be kept identical to struct xfs_dinode |
125 | * in xfs_dinode.h except for the endianess annotations. | 125 | * in xfs_dinode.h except for the endianness annotations. |
126 | */ | 126 | */ |
127 | typedef struct xfs_icdinode { | 127 | typedef struct xfs_icdinode { |
128 | __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | 128 | __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 9957d0602d54..a52ac125f055 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
@@ -40,7 +40,6 @@ typedef struct xfs_inode_log_format { | |||
40 | __int32_t ilf_boffset; /* off of inode in buffer */ | 40 | __int32_t ilf_boffset; /* off of inode in buffer */ |
41 | } xfs_inode_log_format_t; | 41 | } xfs_inode_log_format_t; |
42 | 42 | ||
43 | #ifndef HAVE_FORMAT32 | ||
44 | typedef struct xfs_inode_log_format_32 { | 43 | typedef struct xfs_inode_log_format_32 { |
45 | __uint16_t ilf_type; /* inode log item type */ | 44 | __uint16_t ilf_type; /* inode log item type */ |
46 | __uint16_t ilf_size; /* size of this item */ | 45 | __uint16_t ilf_size; /* size of this item */ |
@@ -56,7 +55,6 @@ typedef struct xfs_inode_log_format_32 { | |||
56 | __int32_t ilf_len; /* len of inode buffer */ | 55 | __int32_t ilf_len; /* len of inode buffer */ |
57 | __int32_t ilf_boffset; /* off of inode in buffer */ | 56 | __int32_t ilf_boffset; /* off of inode in buffer */ |
58 | } __attribute__((packed)) xfs_inode_log_format_32_t; | 57 | } __attribute__((packed)) xfs_inode_log_format_32_t; |
59 | #endif | ||
60 | 58 | ||
61 | typedef struct xfs_inode_log_format_64 { | 59 | typedef struct xfs_inode_log_format_64 { |
62 | __uint16_t ilf_type; /* inode log item type */ | 60 | __uint16_t ilf_type; /* inode log item type */ |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index ee1a0c134cc2..a1cc1322fc0f 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -63,7 +63,7 @@ typedef enum { | |||
63 | */ | 63 | */ |
64 | 64 | ||
65 | typedef struct xfs_iomap { | 65 | typedef struct xfs_iomap { |
66 | xfs_daddr_t iomap_bn; /* first 512b blk of mapping */ | 66 | xfs_daddr_t iomap_bn; /* first 512B blk of mapping */ |
67 | xfs_buftarg_t *iomap_target; | 67 | xfs_buftarg_t *iomap_target; |
68 | xfs_off_t iomap_offset; /* offset of mapping, bytes */ | 68 | xfs_off_t iomap_offset; /* offset of mapping, bytes */ |
69 | xfs_off_t iomap_bsize; /* size of mapping, bytes */ | 69 | xfs_off_t iomap_bsize; /* size of mapping, bytes */ |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index cf98a805ec90..aeb2d2221c7d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -83,7 +83,12 @@ xfs_bulkstat_one_iget( | |||
83 | buf->bs_uid = dic->di_uid; | 83 | buf->bs_uid = dic->di_uid; |
84 | buf->bs_gid = dic->di_gid; | 84 | buf->bs_gid = dic->di_gid; |
85 | buf->bs_size = dic->di_size; | 85 | buf->bs_size = dic->di_size; |
86 | vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime); | 86 | /* |
87 | * We are reading the atime from the Linux inode because the | ||
88 | * dinode might not be uptodate. | ||
89 | */ | ||
90 | buf->bs_atime.tv_sec = VFS_I(ip)->i_atime.tv_sec; | ||
91 | buf->bs_atime.tv_nsec = VFS_I(ip)->i_atime.tv_nsec; | ||
87 | buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; | 92 | buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; |
88 | buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; | 93 | buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; |
89 | buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; | 94 | buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; |
@@ -579,7 +584,7 @@ xfs_bulkstat( | |||
579 | * first inode of the cluster. | 584 | * first inode of the cluster. |
580 | * | 585 | * |
581 | * Careful with clustidx. There can be | 586 | * Careful with clustidx. There can be |
582 | * multple clusters per chunk, a single | 587 | * multiple clusters per chunk, a single |
583 | * cluster per chunk or a cluster that has | 588 | * cluster per chunk or a cluster that has |
584 | * inodes represented from several different | 589 | * inodes represented from several different |
585 | * chunks (if blocksize is large). | 590 | * chunks (if blocksize is large). |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f4726f702a9e..f76c6d7cea21 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -574,7 +574,7 @@ xfs_log_mount( | |||
574 | error = xfs_trans_ail_init(mp); | 574 | error = xfs_trans_ail_init(mp); |
575 | if (error) { | 575 | if (error) { |
576 | cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); | 576 | cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); |
577 | goto error; | 577 | goto out_free_log; |
578 | } | 578 | } |
579 | mp->m_log->l_ailp = mp->m_ail; | 579 | mp->m_log->l_ailp = mp->m_ail; |
580 | 580 | ||
@@ -594,20 +594,22 @@ xfs_log_mount( | |||
594 | mp->m_flags |= XFS_MOUNT_RDONLY; | 594 | mp->m_flags |= XFS_MOUNT_RDONLY; |
595 | if (error) { | 595 | if (error) { |
596 | cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); | 596 | cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); |
597 | goto error; | 597 | goto out_destroy_ail; |
598 | } | 598 | } |
599 | } | 599 | } |
600 | 600 | ||
601 | /* Normal transactions can now occur */ | 601 | /* Normal transactions can now occur */ |
602 | mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; | 602 | mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; |
603 | 603 | ||
604 | /* End mounting message in xfs_log_mount_finish */ | ||
605 | return 0; | 604 | return 0; |
606 | error: | 605 | |
607 | xfs_log_unmount_dealloc(mp); | 606 | out_destroy_ail: |
607 | xfs_trans_ail_destroy(mp); | ||
608 | out_free_log: | ||
609 | xlog_dealloc_log(mp->m_log); | ||
608 | out: | 610 | out: |
609 | return error; | 611 | return error; |
610 | } /* xfs_log_mount */ | 612 | } |
611 | 613 | ||
612 | /* | 614 | /* |
613 | * Finish the recovery of the file system. This is separate from | 615 | * Finish the recovery of the file system. This is separate from |
@@ -633,19 +635,6 @@ xfs_log_mount_finish(xfs_mount_t *mp) | |||
633 | } | 635 | } |
634 | 636 | ||
635 | /* | 637 | /* |
636 | * Unmount processing for the log. | ||
637 | */ | ||
638 | int | ||
639 | xfs_log_unmount(xfs_mount_t *mp) | ||
640 | { | ||
641 | int error; | ||
642 | |||
643 | error = xfs_log_unmount_write(mp); | ||
644 | xfs_log_unmount_dealloc(mp); | ||
645 | return error; | ||
646 | } | ||
647 | |||
648 | /* | ||
649 | * Final log writes as part of unmount. | 638 | * Final log writes as part of unmount. |
650 | * | 639 | * |
651 | * Mark the filesystem clean as unmount happens. Note that during relocation | 640 | * Mark the filesystem clean as unmount happens. Note that during relocation |
@@ -795,7 +784,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
795 | * and deallocate the log as the aild references the log. | 784 | * and deallocate the log as the aild references the log. |
796 | */ | 785 | */ |
797 | void | 786 | void |
798 | xfs_log_unmount_dealloc(xfs_mount_t *mp) | 787 | xfs_log_unmount(xfs_mount_t *mp) |
799 | { | 788 | { |
800 | xfs_trans_ail_destroy(mp); | 789 | xfs_trans_ail_destroy(mp); |
801 | xlog_dealloc_log(mp->m_log); | 790 | xlog_dealloc_log(mp->m_log); |
@@ -1109,7 +1098,7 @@ xlog_bdstrat_cb(struct xfs_buf *bp) | |||
1109 | /* | 1098 | /* |
1110 | * Return size of each in-core log record buffer. | 1099 | * Return size of each in-core log record buffer. |
1111 | * | 1100 | * |
1112 | * All machines get 8 x 32KB buffers by default, unless tuned otherwise. | 1101 | * All machines get 8 x 32kB buffers by default, unless tuned otherwise. |
1113 | * | 1102 | * |
1114 | * If the filesystem blocksize is too large, we may need to choose a | 1103 | * If the filesystem blocksize is too large, we may need to choose a |
1115 | * larger size since the directory code currently logs entire blocks. | 1104 | * larger size since the directory code currently logs entire blocks. |
@@ -1139,8 +1128,8 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp, | |||
1139 | } | 1128 | } |
1140 | 1129 | ||
1141 | if (xfs_sb_version_haslogv2(&mp->m_sb)) { | 1130 | if (xfs_sb_version_haslogv2(&mp->m_sb)) { |
1142 | /* # headers = size / 32K | 1131 | /* # headers = size / 32k |
1143 | * one header holds cycles from 32K of data | 1132 | * one header holds cycles from 32k of data |
1144 | */ | 1133 | */ |
1145 | 1134 | ||
1146 | xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; | 1135 | xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; |
@@ -1156,7 +1145,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp, | |||
1156 | goto done; | 1145 | goto done; |
1157 | } | 1146 | } |
1158 | 1147 | ||
1159 | /* All machines use 32KB buffers by default. */ | 1148 | /* All machines use 32kB buffers by default. */ |
1160 | log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; | 1149 | log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; |
1161 | log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; | 1150 | log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; |
1162 | 1151 | ||
@@ -1164,32 +1153,8 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp, | |||
1164 | log->l_iclog_hsize = BBSIZE; | 1153 | log->l_iclog_hsize = BBSIZE; |
1165 | log->l_iclog_heads = 1; | 1154 | log->l_iclog_heads = 1; |
1166 | 1155 | ||
1167 | /* | 1156 | done: |
1168 | * For 16KB, we use 3 32KB buffers. For 32KB block sizes, we use | 1157 | /* are we being asked to make the sizes selected above visible? */ |
1169 | * 4 32KB buffers. For 64KB block sizes, we use 8 32KB buffers. | ||
1170 | */ | ||
1171 | if (mp->m_sb.sb_blocksize >= 16*1024) { | ||
1172 | log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; | ||
1173 | log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; | ||
1174 | if (mp->m_logbufs <= 0) { | ||
1175 | switch (mp->m_sb.sb_blocksize) { | ||
1176 | case 16*1024: /* 16 KB */ | ||
1177 | log->l_iclog_bufs = 3; | ||
1178 | break; | ||
1179 | case 32*1024: /* 32 KB */ | ||
1180 | log->l_iclog_bufs = 4; | ||
1181 | break; | ||
1182 | case 64*1024: /* 64 KB */ | ||
1183 | log->l_iclog_bufs = 8; | ||
1184 | break; | ||
1185 | default: | ||
1186 | xlog_panic("XFS: Invalid blocksize"); | ||
1187 | break; | ||
1188 | } | ||
1189 | } | ||
1190 | } | ||
1191 | |||
1192 | done: /* are we being asked to make the sizes selected above visible? */ | ||
1193 | if (mp->m_logbufs == 0) | 1158 | if (mp->m_logbufs == 0) |
1194 | mp->m_logbufs = log->l_iclog_bufs; | 1159 | mp->m_logbufs = log->l_iclog_bufs; |
1195 | if (mp->m_logbsize == 0) | 1160 | if (mp->m_logbsize == 0) |
@@ -3214,7 +3179,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) | |||
3214 | */ | 3179 | */ |
3215 | 3180 | ||
3216 | /* | 3181 | /* |
3217 | * Free a used ticket when it's refcount falls to zero. | 3182 | * Free a used ticket when its refcount falls to zero. |
3218 | */ | 3183 | */ |
3219 | void | 3184 | void |
3220 | xfs_log_ticket_put( | 3185 | xfs_log_ticket_put( |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 8a3e84e900a3..d0c9baa50b1a 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -170,9 +170,8 @@ int xfs_log_write(struct xfs_mount *mp, | |||
170 | int nentries, | 170 | int nentries, |
171 | xfs_log_ticket_t ticket, | 171 | xfs_log_ticket_t ticket, |
172 | xfs_lsn_t *start_lsn); | 172 | xfs_lsn_t *start_lsn); |
173 | int xfs_log_unmount(struct xfs_mount *mp); | ||
174 | int xfs_log_unmount_write(struct xfs_mount *mp); | 173 | int xfs_log_unmount_write(struct xfs_mount *mp); |
175 | void xfs_log_unmount_dealloc(struct xfs_mount *mp); | 174 | void xfs_log_unmount(struct xfs_mount *mp); |
176 | int xfs_log_force_umount(struct xfs_mount *mp, int logerror); | 175 | int xfs_log_force_umount(struct xfs_mount *mp, int logerror); |
177 | int xfs_log_need_covered(struct xfs_mount *mp); | 176 | int xfs_log_need_covered(struct xfs_mount *mp); |
178 | 177 | ||
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 654167be0efb..bcad5f4c1fd1 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -359,7 +359,7 @@ typedef struct xlog_in_core { | |||
359 | int ic_size; | 359 | int ic_size; |
360 | int ic_offset; | 360 | int ic_offset; |
361 | int ic_bwritecnt; | 361 | int ic_bwritecnt; |
362 | ushort_t ic_state; | 362 | unsigned short ic_state; |
363 | char *ic_datap; /* pointer to iclog data */ | 363 | char *ic_datap; /* pointer to iclog data */ |
364 | #ifdef XFS_LOG_TRACE | 364 | #ifdef XFS_LOG_TRACE |
365 | struct ktrace *ic_trace; | 365 | struct ktrace *ic_trace; |
@@ -455,7 +455,6 @@ extern void xlog_recover_process_iunlinks(xlog_t *log); | |||
455 | 455 | ||
456 | extern struct xfs_buf *xlog_get_bp(xlog_t *, int); | 456 | extern struct xfs_buf *xlog_get_bp(xlog_t *, int); |
457 | extern void xlog_put_bp(struct xfs_buf *); | 457 | extern void xlog_put_bp(struct xfs_buf *); |
458 | extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); | ||
459 | 458 | ||
460 | extern kmem_zone_t *xfs_log_ticket_zone; | 459 | extern kmem_zone_t *xfs_log_ticket_zone; |
461 | 460 | ||
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 61af610d79b3..7ba450116d4f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -94,12 +94,30 @@ xlog_put_bp( | |||
94 | xfs_buf_free(bp); | 94 | xfs_buf_free(bp); |
95 | } | 95 | } |
96 | 96 | ||
97 | STATIC xfs_caddr_t | ||
98 | xlog_align( | ||
99 | xlog_t *log, | ||
100 | xfs_daddr_t blk_no, | ||
101 | int nbblks, | ||
102 | xfs_buf_t *bp) | ||
103 | { | ||
104 | xfs_caddr_t ptr; | ||
105 | |||
106 | if (!log->l_sectbb_log) | ||
107 | return XFS_BUF_PTR(bp); | ||
108 | |||
109 | ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); | ||
110 | ASSERT(XFS_BUF_SIZE(bp) >= | ||
111 | BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); | ||
112 | return ptr; | ||
113 | } | ||
114 | |||
97 | 115 | ||
98 | /* | 116 | /* |
99 | * nbblks should be uint, but oh well. Just want to catch that 32-bit length. | 117 | * nbblks should be uint, but oh well. Just want to catch that 32-bit length. |
100 | */ | 118 | */ |
101 | int | 119 | STATIC int |
102 | xlog_bread( | 120 | xlog_bread_noalign( |
103 | xlog_t *log, | 121 | xlog_t *log, |
104 | xfs_daddr_t blk_no, | 122 | xfs_daddr_t blk_no, |
105 | int nbblks, | 123 | int nbblks, |
@@ -137,6 +155,24 @@ xlog_bread( | |||
137 | return error; | 155 | return error; |
138 | } | 156 | } |
139 | 157 | ||
158 | STATIC int | ||
159 | xlog_bread( | ||
160 | xlog_t *log, | ||
161 | xfs_daddr_t blk_no, | ||
162 | int nbblks, | ||
163 | xfs_buf_t *bp, | ||
164 | xfs_caddr_t *offset) | ||
165 | { | ||
166 | int error; | ||
167 | |||
168 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); | ||
169 | if (error) | ||
170 | return error; | ||
171 | |||
172 | *offset = xlog_align(log, blk_no, nbblks, bp); | ||
173 | return 0; | ||
174 | } | ||
175 | |||
140 | /* | 176 | /* |
141 | * Write out the buffer at the given block for the given number of blocks. | 177 | * Write out the buffer at the given block for the given number of blocks. |
142 | * The buffer is kept locked across the write and is returned locked. | 178 | * The buffer is kept locked across the write and is returned locked. |
@@ -180,24 +216,6 @@ xlog_bwrite( | |||
180 | return error; | 216 | return error; |
181 | } | 217 | } |
182 | 218 | ||
183 | STATIC xfs_caddr_t | ||
184 | xlog_align( | ||
185 | xlog_t *log, | ||
186 | xfs_daddr_t blk_no, | ||
187 | int nbblks, | ||
188 | xfs_buf_t *bp) | ||
189 | { | ||
190 | xfs_caddr_t ptr; | ||
191 | |||
192 | if (!log->l_sectbb_log) | ||
193 | return XFS_BUF_PTR(bp); | ||
194 | |||
195 | ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); | ||
196 | ASSERT(XFS_BUF_SIZE(bp) >= | ||
197 | BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); | ||
198 | return ptr; | ||
199 | } | ||
200 | |||
201 | #ifdef DEBUG | 219 | #ifdef DEBUG |
202 | /* | 220 | /* |
203 | * dump debug superblock and log record information | 221 | * dump debug superblock and log record information |
@@ -211,11 +229,11 @@ xlog_header_check_dump( | |||
211 | 229 | ||
212 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); | 230 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); |
213 | for (b = 0; b < 16; b++) | 231 | for (b = 0; b < 16; b++) |
214 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); | 232 | cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&mp->m_sb.sb_uuid)[b]); |
215 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); | 233 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); |
216 | cmn_err(CE_DEBUG, " log : uuid = "); | 234 | cmn_err(CE_DEBUG, " log : uuid = "); |
217 | for (b = 0; b < 16; b++) | 235 | for (b = 0; b < 16; b++) |
218 | cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); | 236 | cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&head->h_fs_uuid)[b]); |
219 | cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); | 237 | cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); |
220 | } | 238 | } |
221 | #else | 239 | #else |
@@ -321,9 +339,9 @@ xlog_find_cycle_start( | |||
321 | 339 | ||
322 | mid_blk = BLK_AVG(first_blk, *last_blk); | 340 | mid_blk = BLK_AVG(first_blk, *last_blk); |
323 | while (mid_blk != first_blk && mid_blk != *last_blk) { | 341 | while (mid_blk != first_blk && mid_blk != *last_blk) { |
324 | if ((error = xlog_bread(log, mid_blk, 1, bp))) | 342 | error = xlog_bread(log, mid_blk, 1, bp, &offset); |
343 | if (error) | ||
325 | return error; | 344 | return error; |
326 | offset = xlog_align(log, mid_blk, 1, bp); | ||
327 | mid_cycle = xlog_get_cycle(offset); | 345 | mid_cycle = xlog_get_cycle(offset); |
328 | if (mid_cycle == cycle) { | 346 | if (mid_cycle == cycle) { |
329 | *last_blk = mid_blk; | 347 | *last_blk = mid_blk; |
@@ -379,10 +397,10 @@ xlog_find_verify_cycle( | |||
379 | 397 | ||
380 | bcount = min(bufblks, (start_blk + nbblks - i)); | 398 | bcount = min(bufblks, (start_blk + nbblks - i)); |
381 | 399 | ||
382 | if ((error = xlog_bread(log, i, bcount, bp))) | 400 | error = xlog_bread(log, i, bcount, bp, &buf); |
401 | if (error) | ||
383 | goto out; | 402 | goto out; |
384 | 403 | ||
385 | buf = xlog_align(log, i, bcount, bp); | ||
386 | for (j = 0; j < bcount; j++) { | 404 | for (j = 0; j < bcount; j++) { |
387 | cycle = xlog_get_cycle(buf); | 405 | cycle = xlog_get_cycle(buf); |
388 | if (cycle == stop_on_cycle_no) { | 406 | if (cycle == stop_on_cycle_no) { |
@@ -436,9 +454,9 @@ xlog_find_verify_log_record( | |||
436 | return ENOMEM; | 454 | return ENOMEM; |
437 | smallmem = 1; | 455 | smallmem = 1; |
438 | } else { | 456 | } else { |
439 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) | 457 | error = xlog_bread(log, start_blk, num_blks, bp, &offset); |
458 | if (error) | ||
440 | goto out; | 459 | goto out; |
441 | offset = xlog_align(log, start_blk, num_blks, bp); | ||
442 | offset += ((num_blks - 1) << BBSHIFT); | 460 | offset += ((num_blks - 1) << BBSHIFT); |
443 | } | 461 | } |
444 | 462 | ||
@@ -453,9 +471,9 @@ xlog_find_verify_log_record( | |||
453 | } | 471 | } |
454 | 472 | ||
455 | if (smallmem) { | 473 | if (smallmem) { |
456 | if ((error = xlog_bread(log, i, 1, bp))) | 474 | error = xlog_bread(log, i, 1, bp, &offset); |
475 | if (error) | ||
457 | goto out; | 476 | goto out; |
458 | offset = xlog_align(log, i, 1, bp); | ||
459 | } | 477 | } |
460 | 478 | ||
461 | head = (xlog_rec_header_t *)offset; | 479 | head = (xlog_rec_header_t *)offset; |
@@ -559,15 +577,18 @@ xlog_find_head( | |||
559 | bp = xlog_get_bp(log, 1); | 577 | bp = xlog_get_bp(log, 1); |
560 | if (!bp) | 578 | if (!bp) |
561 | return ENOMEM; | 579 | return ENOMEM; |
562 | if ((error = xlog_bread(log, 0, 1, bp))) | 580 | |
581 | error = xlog_bread(log, 0, 1, bp, &offset); | ||
582 | if (error) | ||
563 | goto bp_err; | 583 | goto bp_err; |
564 | offset = xlog_align(log, 0, 1, bp); | 584 | |
565 | first_half_cycle = xlog_get_cycle(offset); | 585 | first_half_cycle = xlog_get_cycle(offset); |
566 | 586 | ||
567 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ | 587 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ |
568 | if ((error = xlog_bread(log, last_blk, 1, bp))) | 588 | error = xlog_bread(log, last_blk, 1, bp, &offset); |
589 | if (error) | ||
569 | goto bp_err; | 590 | goto bp_err; |
570 | offset = xlog_align(log, last_blk, 1, bp); | 591 | |
571 | last_half_cycle = xlog_get_cycle(offset); | 592 | last_half_cycle = xlog_get_cycle(offset); |
572 | ASSERT(last_half_cycle != 0); | 593 | ASSERT(last_half_cycle != 0); |
573 | 594 | ||
@@ -817,9 +838,10 @@ xlog_find_tail( | |||
817 | if (!bp) | 838 | if (!bp) |
818 | return ENOMEM; | 839 | return ENOMEM; |
819 | if (*head_blk == 0) { /* special case */ | 840 | if (*head_blk == 0) { /* special case */ |
820 | if ((error = xlog_bread(log, 0, 1, bp))) | 841 | error = xlog_bread(log, 0, 1, bp, &offset); |
842 | if (error) | ||
821 | goto bread_err; | 843 | goto bread_err; |
822 | offset = xlog_align(log, 0, 1, bp); | 844 | |
823 | if (xlog_get_cycle(offset) == 0) { | 845 | if (xlog_get_cycle(offset) == 0) { |
824 | *tail_blk = 0; | 846 | *tail_blk = 0; |
825 | /* leave all other log inited values alone */ | 847 | /* leave all other log inited values alone */ |
@@ -832,9 +854,10 @@ xlog_find_tail( | |||
832 | */ | 854 | */ |
833 | ASSERT(*head_blk < INT_MAX); | 855 | ASSERT(*head_blk < INT_MAX); |
834 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { | 856 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { |
835 | if ((error = xlog_bread(log, i, 1, bp))) | 857 | error = xlog_bread(log, i, 1, bp, &offset); |
858 | if (error) | ||
836 | goto bread_err; | 859 | goto bread_err; |
837 | offset = xlog_align(log, i, 1, bp); | 860 | |
838 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { | 861 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { |
839 | found = 1; | 862 | found = 1; |
840 | break; | 863 | break; |
@@ -848,9 +871,10 @@ xlog_find_tail( | |||
848 | */ | 871 | */ |
849 | if (!found) { | 872 | if (!found) { |
850 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { | 873 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { |
851 | if ((error = xlog_bread(log, i, 1, bp))) | 874 | error = xlog_bread(log, i, 1, bp, &offset); |
875 | if (error) | ||
852 | goto bread_err; | 876 | goto bread_err; |
853 | offset = xlog_align(log, i, 1, bp); | 877 | |
854 | if (XLOG_HEADER_MAGIC_NUM == | 878 | if (XLOG_HEADER_MAGIC_NUM == |
855 | be32_to_cpu(*(__be32 *)offset)) { | 879 | be32_to_cpu(*(__be32 *)offset)) { |
856 | found = 2; | 880 | found = 2; |
@@ -922,10 +946,10 @@ xlog_find_tail( | |||
922 | if (*head_blk == after_umount_blk && | 946 | if (*head_blk == after_umount_blk && |
923 | be32_to_cpu(rhead->h_num_logops) == 1) { | 947 | be32_to_cpu(rhead->h_num_logops) == 1) { |
924 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 948 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
925 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { | 949 | error = xlog_bread(log, umount_data_blk, 1, bp, &offset); |
950 | if (error) | ||
926 | goto bread_err; | 951 | goto bread_err; |
927 | } | 952 | |
928 | offset = xlog_align(log, umount_data_blk, 1, bp); | ||
929 | op_head = (xlog_op_header_t *)offset; | 953 | op_head = (xlog_op_header_t *)offset; |
930 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | 954 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { |
931 | /* | 955 | /* |
@@ -1017,9 +1041,10 @@ xlog_find_zeroed( | |||
1017 | bp = xlog_get_bp(log, 1); | 1041 | bp = xlog_get_bp(log, 1); |
1018 | if (!bp) | 1042 | if (!bp) |
1019 | return ENOMEM; | 1043 | return ENOMEM; |
1020 | if ((error = xlog_bread(log, 0, 1, bp))) | 1044 | error = xlog_bread(log, 0, 1, bp, &offset); |
1045 | if (error) | ||
1021 | goto bp_err; | 1046 | goto bp_err; |
1022 | offset = xlog_align(log, 0, 1, bp); | 1047 | |
1023 | first_cycle = xlog_get_cycle(offset); | 1048 | first_cycle = xlog_get_cycle(offset); |
1024 | if (first_cycle == 0) { /* completely zeroed log */ | 1049 | if (first_cycle == 0) { /* completely zeroed log */ |
1025 | *blk_no = 0; | 1050 | *blk_no = 0; |
@@ -1028,9 +1053,10 @@ xlog_find_zeroed( | |||
1028 | } | 1053 | } |
1029 | 1054 | ||
1030 | /* check partially zeroed log */ | 1055 | /* check partially zeroed log */ |
1031 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) | 1056 | error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); |
1057 | if (error) | ||
1032 | goto bp_err; | 1058 | goto bp_err; |
1033 | offset = xlog_align(log, log_bbnum-1, 1, bp); | 1059 | |
1034 | last_cycle = xlog_get_cycle(offset); | 1060 | last_cycle = xlog_get_cycle(offset); |
1035 | if (last_cycle != 0) { /* log completely written to */ | 1061 | if (last_cycle != 0) { /* log completely written to */ |
1036 | xlog_put_bp(bp); | 1062 | xlog_put_bp(bp); |
@@ -1152,10 +1178,10 @@ xlog_write_log_records( | |||
1152 | */ | 1178 | */ |
1153 | balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); | 1179 | balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); |
1154 | if (balign != start_block) { | 1180 | if (balign != start_block) { |
1155 | if ((error = xlog_bread(log, start_block, 1, bp))) { | 1181 | error = xlog_bread_noalign(log, start_block, 1, bp); |
1156 | xlog_put_bp(bp); | 1182 | if (error) |
1157 | return error; | 1183 | goto out_put_bp; |
1158 | } | 1184 | |
1159 | j = start_block - balign; | 1185 | j = start_block - balign; |
1160 | } | 1186 | } |
1161 | 1187 | ||
@@ -1175,10 +1201,14 @@ xlog_write_log_records( | |||
1175 | balign = BBTOB(ealign - start_block); | 1201 | balign = BBTOB(ealign - start_block); |
1176 | error = XFS_BUF_SET_PTR(bp, offset + balign, | 1202 | error = XFS_BUF_SET_PTR(bp, offset + balign, |
1177 | BBTOB(sectbb)); | 1203 | BBTOB(sectbb)); |
1178 | if (!error) | 1204 | if (error) |
1179 | error = xlog_bread(log, ealign, sectbb, bp); | 1205 | break; |
1180 | if (!error) | 1206 | |
1181 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | 1207 | error = xlog_bread_noalign(log, ealign, sectbb, bp); |
1208 | if (error) | ||
1209 | break; | ||
1210 | |||
1211 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | ||
1182 | if (error) | 1212 | if (error) |
1183 | break; | 1213 | break; |
1184 | } | 1214 | } |
@@ -1195,6 +1225,8 @@ xlog_write_log_records( | |||
1195 | start_block += endcount; | 1225 | start_block += endcount; |
1196 | j = 0; | 1226 | j = 0; |
1197 | } | 1227 | } |
1228 | |||
1229 | out_put_bp: | ||
1198 | xlog_put_bp(bp); | 1230 | xlog_put_bp(bp); |
1199 | return error; | 1231 | return error; |
1200 | } | 1232 | } |
@@ -2511,16 +2543,10 @@ xlog_recover_do_inode_trans( | |||
2511 | } | 2543 | } |
2512 | 2544 | ||
2513 | write_inode_buffer: | 2545 | write_inode_buffer: |
2514 | if (ITEM_TYPE(item) == XFS_LI_INODE) { | 2546 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); |
2515 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); | 2547 | bp->b_mount = mp; |
2516 | bp->b_mount = mp; | 2548 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2517 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2549 | xfs_bdwrite(mp, bp); |
2518 | xfs_bdwrite(mp, bp); | ||
2519 | } else { | ||
2520 | XFS_BUF_STALE(bp); | ||
2521 | error = xfs_bwrite(mp, bp); | ||
2522 | } | ||
2523 | |||
2524 | error: | 2550 | error: |
2525 | if (need_free) | 2551 | if (need_free) |
2526 | kmem_free(in_f); | 2552 | kmem_free(in_f); |
@@ -2769,51 +2795,48 @@ xlog_recover_do_trans( | |||
2769 | int error = 0; | 2795 | int error = 0; |
2770 | xlog_recover_item_t *item, *first_item; | 2796 | xlog_recover_item_t *item, *first_item; |
2771 | 2797 | ||
2772 | if ((error = xlog_recover_reorder_trans(trans))) | 2798 | error = xlog_recover_reorder_trans(trans); |
2799 | if (error) | ||
2773 | return error; | 2800 | return error; |
2801 | |||
2774 | first_item = item = trans->r_itemq; | 2802 | first_item = item = trans->r_itemq; |
2775 | do { | 2803 | do { |
2776 | /* | 2804 | switch (ITEM_TYPE(item)) { |
2777 | * we don't need to worry about the block number being | 2805 | case XFS_LI_BUF: |
2778 | * truncated in > 1 TB buffers because in user-land, | 2806 | error = xlog_recover_do_buffer_trans(log, item, pass); |
2779 | * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so | 2807 | break; |
2780 | * the blknos will get through the user-mode buffer | 2808 | case XFS_LI_INODE: |
2781 | * cache properly. The only bad case is o32 kernels | 2809 | error = xlog_recover_do_inode_trans(log, item, pass); |
2782 | * where xfs_daddr_t is 32-bits but mount will warn us | 2810 | break; |
2783 | * off a > 1 TB filesystem before we get here. | 2811 | case XFS_LI_EFI: |
2784 | */ | 2812 | error = xlog_recover_do_efi_trans(log, item, |
2785 | if ((ITEM_TYPE(item) == XFS_LI_BUF)) { | 2813 | trans->r_lsn, pass); |
2786 | if ((error = xlog_recover_do_buffer_trans(log, item, | 2814 | break; |
2787 | pass))) | 2815 | case XFS_LI_EFD: |
2788 | break; | ||
2789 | } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { | ||
2790 | if ((error = xlog_recover_do_inode_trans(log, item, | ||
2791 | pass))) | ||
2792 | break; | ||
2793 | } else if (ITEM_TYPE(item) == XFS_LI_EFI) { | ||
2794 | if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, | ||
2795 | pass))) | ||
2796 | break; | ||
2797 | } else if (ITEM_TYPE(item) == XFS_LI_EFD) { | ||
2798 | xlog_recover_do_efd_trans(log, item, pass); | 2816 | xlog_recover_do_efd_trans(log, item, pass); |
2799 | } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { | 2817 | error = 0; |
2800 | if ((error = xlog_recover_do_dquot_trans(log, item, | 2818 | break; |
2801 | pass))) | 2819 | case XFS_LI_DQUOT: |
2802 | break; | 2820 | error = xlog_recover_do_dquot_trans(log, item, pass); |
2803 | } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { | 2821 | break; |
2804 | if ((error = xlog_recover_do_quotaoff_trans(log, item, | 2822 | case XFS_LI_QUOTAOFF: |
2805 | pass))) | 2823 | error = xlog_recover_do_quotaoff_trans(log, item, |
2806 | break; | 2824 | pass); |
2807 | } else { | 2825 | break; |
2808 | xlog_warn("XFS: xlog_recover_do_trans"); | 2826 | default: |
2827 | xlog_warn( | ||
2828 | "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item)); | ||
2809 | ASSERT(0); | 2829 | ASSERT(0); |
2810 | error = XFS_ERROR(EIO); | 2830 | error = XFS_ERROR(EIO); |
2811 | break; | 2831 | break; |
2812 | } | 2832 | } |
2833 | |||
2834 | if (error) | ||
2835 | return error; | ||
2813 | item = item->ri_next; | 2836 | item = item->ri_next; |
2814 | } while (first_item != item); | 2837 | } while (first_item != item); |
2815 | 2838 | ||
2816 | return error; | 2839 | return 0; |
2817 | } | 2840 | } |
2818 | 2841 | ||
2819 | /* | 2842 | /* |
@@ -3490,9 +3513,11 @@ xlog_do_recovery_pass( | |||
3490 | hbp = xlog_get_bp(log, 1); | 3513 | hbp = xlog_get_bp(log, 1); |
3491 | if (!hbp) | 3514 | if (!hbp) |
3492 | return ENOMEM; | 3515 | return ENOMEM; |
3493 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) | 3516 | |
3517 | error = xlog_bread(log, tail_blk, 1, hbp, &offset); | ||
3518 | if (error) | ||
3494 | goto bread_err1; | 3519 | goto bread_err1; |
3495 | offset = xlog_align(log, tail_blk, 1, hbp); | 3520 | |
3496 | rhead = (xlog_rec_header_t *)offset; | 3521 | rhead = (xlog_rec_header_t *)offset; |
3497 | error = xlog_valid_rec_header(log, rhead, tail_blk); | 3522 | error = xlog_valid_rec_header(log, rhead, tail_blk); |
3498 | if (error) | 3523 | if (error) |
@@ -3526,9 +3551,10 @@ xlog_do_recovery_pass( | |||
3526 | memset(rhash, 0, sizeof(rhash)); | 3551 | memset(rhash, 0, sizeof(rhash)); |
3527 | if (tail_blk <= head_blk) { | 3552 | if (tail_blk <= head_blk) { |
3528 | for (blk_no = tail_blk; blk_no < head_blk; ) { | 3553 | for (blk_no = tail_blk; blk_no < head_blk; ) { |
3529 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | 3554 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); |
3555 | if (error) | ||
3530 | goto bread_err2; | 3556 | goto bread_err2; |
3531 | offset = xlog_align(log, blk_no, hblks, hbp); | 3557 | |
3532 | rhead = (xlog_rec_header_t *)offset; | 3558 | rhead = (xlog_rec_header_t *)offset; |
3533 | error = xlog_valid_rec_header(log, rhead, blk_no); | 3559 | error = xlog_valid_rec_header(log, rhead, blk_no); |
3534 | if (error) | 3560 | if (error) |
@@ -3536,10 +3562,11 @@ xlog_do_recovery_pass( | |||
3536 | 3562 | ||
3537 | /* blocks in data section */ | 3563 | /* blocks in data section */ |
3538 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3564 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3539 | error = xlog_bread(log, blk_no + hblks, bblks, dbp); | 3565 | error = xlog_bread(log, blk_no + hblks, bblks, dbp, |
3566 | &offset); | ||
3540 | if (error) | 3567 | if (error) |
3541 | goto bread_err2; | 3568 | goto bread_err2; |
3542 | offset = xlog_align(log, blk_no + hblks, bblks, dbp); | 3569 | |
3543 | xlog_unpack_data(rhead, offset, log); | 3570 | xlog_unpack_data(rhead, offset, log); |
3544 | if ((error = xlog_recover_process_data(log, | 3571 | if ((error = xlog_recover_process_data(log, |
3545 | rhash, rhead, offset, pass))) | 3572 | rhash, rhead, offset, pass))) |
@@ -3562,10 +3589,10 @@ xlog_do_recovery_pass( | |||
3562 | wrapped_hblks = 0; | 3589 | wrapped_hblks = 0; |
3563 | if (blk_no + hblks <= log->l_logBBsize) { | 3590 | if (blk_no + hblks <= log->l_logBBsize) { |
3564 | /* Read header in one read */ | 3591 | /* Read header in one read */ |
3565 | error = xlog_bread(log, blk_no, hblks, hbp); | 3592 | error = xlog_bread(log, blk_no, hblks, hbp, |
3593 | &offset); | ||
3566 | if (error) | 3594 | if (error) |
3567 | goto bread_err2; | 3595 | goto bread_err2; |
3568 | offset = xlog_align(log, blk_no, hblks, hbp); | ||
3569 | } else { | 3596 | } else { |
3570 | /* This LR is split across physical log end */ | 3597 | /* This LR is split across physical log end */ |
3571 | if (blk_no != log->l_logBBsize) { | 3598 | if (blk_no != log->l_logBBsize) { |
@@ -3573,12 +3600,13 @@ xlog_do_recovery_pass( | |||
3573 | ASSERT(blk_no <= INT_MAX); | 3600 | ASSERT(blk_no <= INT_MAX); |
3574 | split_hblks = log->l_logBBsize - (int)blk_no; | 3601 | split_hblks = log->l_logBBsize - (int)blk_no; |
3575 | ASSERT(split_hblks > 0); | 3602 | ASSERT(split_hblks > 0); |
3576 | if ((error = xlog_bread(log, blk_no, | 3603 | error = xlog_bread(log, blk_no, |
3577 | split_hblks, hbp))) | 3604 | split_hblks, hbp, |
3605 | &offset); | ||
3606 | if (error) | ||
3578 | goto bread_err2; | 3607 | goto bread_err2; |
3579 | offset = xlog_align(log, blk_no, | ||
3580 | split_hblks, hbp); | ||
3581 | } | 3608 | } |
3609 | |||
3582 | /* | 3610 | /* |
3583 | * Note: this black magic still works with | 3611 | * Note: this black magic still works with |
3584 | * large sector sizes (non-512) only because: | 3612 | * large sector sizes (non-512) only because: |
@@ -3596,14 +3624,19 @@ xlog_do_recovery_pass( | |||
3596 | error = XFS_BUF_SET_PTR(hbp, | 3624 | error = XFS_BUF_SET_PTR(hbp, |
3597 | bufaddr + BBTOB(split_hblks), | 3625 | bufaddr + BBTOB(split_hblks), |
3598 | BBTOB(hblks - split_hblks)); | 3626 | BBTOB(hblks - split_hblks)); |
3599 | if (!error) | 3627 | if (error) |
3600 | error = xlog_bread(log, 0, | 3628 | goto bread_err2; |
3601 | wrapped_hblks, hbp); | 3629 | |
3602 | if (!error) | 3630 | error = xlog_bread_noalign(log, 0, |
3603 | error = XFS_BUF_SET_PTR(hbp, bufaddr, | 3631 | wrapped_hblks, hbp); |
3632 | if (error) | ||
3633 | goto bread_err2; | ||
3634 | |||
3635 | error = XFS_BUF_SET_PTR(hbp, bufaddr, | ||
3604 | BBTOB(hblks)); | 3636 | BBTOB(hblks)); |
3605 | if (error) | 3637 | if (error) |
3606 | goto bread_err2; | 3638 | goto bread_err2; |
3639 | |||
3607 | if (!offset) | 3640 | if (!offset) |
3608 | offset = xlog_align(log, 0, | 3641 | offset = xlog_align(log, 0, |
3609 | wrapped_hblks, hbp); | 3642 | wrapped_hblks, hbp); |
@@ -3619,10 +3652,10 @@ xlog_do_recovery_pass( | |||
3619 | 3652 | ||
3620 | /* Read in data for log record */ | 3653 | /* Read in data for log record */ |
3621 | if (blk_no + bblks <= log->l_logBBsize) { | 3654 | if (blk_no + bblks <= log->l_logBBsize) { |
3622 | error = xlog_bread(log, blk_no, bblks, dbp); | 3655 | error = xlog_bread(log, blk_no, bblks, dbp, |
3656 | &offset); | ||
3623 | if (error) | 3657 | if (error) |
3624 | goto bread_err2; | 3658 | goto bread_err2; |
3625 | offset = xlog_align(log, blk_no, bblks, dbp); | ||
3626 | } else { | 3659 | } else { |
3627 | /* This log record is split across the | 3660 | /* This log record is split across the |
3628 | * physical end of log */ | 3661 | * physical end of log */ |
@@ -3636,12 +3669,13 @@ xlog_do_recovery_pass( | |||
3636 | split_bblks = | 3669 | split_bblks = |
3637 | log->l_logBBsize - (int)blk_no; | 3670 | log->l_logBBsize - (int)blk_no; |
3638 | ASSERT(split_bblks > 0); | 3671 | ASSERT(split_bblks > 0); |
3639 | if ((error = xlog_bread(log, blk_no, | 3672 | error = xlog_bread(log, blk_no, |
3640 | split_bblks, dbp))) | 3673 | split_bblks, dbp, |
3674 | &offset); | ||
3675 | if (error) | ||
3641 | goto bread_err2; | 3676 | goto bread_err2; |
3642 | offset = xlog_align(log, blk_no, | ||
3643 | split_bblks, dbp); | ||
3644 | } | 3677 | } |
3678 | |||
3645 | /* | 3679 | /* |
3646 | * Note: this black magic still works with | 3680 | * Note: this black magic still works with |
3647 | * large sector sizes (non-512) only because: | 3681 | * large sector sizes (non-512) only because: |
@@ -3658,15 +3692,19 @@ xlog_do_recovery_pass( | |||
3658 | error = XFS_BUF_SET_PTR(dbp, | 3692 | error = XFS_BUF_SET_PTR(dbp, |
3659 | bufaddr + BBTOB(split_bblks), | 3693 | bufaddr + BBTOB(split_bblks), |
3660 | BBTOB(bblks - split_bblks)); | 3694 | BBTOB(bblks - split_bblks)); |
3661 | if (!error) | ||
3662 | error = xlog_bread(log, wrapped_hblks, | ||
3663 | bblks - split_bblks, | ||
3664 | dbp); | ||
3665 | if (!error) | ||
3666 | error = XFS_BUF_SET_PTR(dbp, bufaddr, | ||
3667 | h_size); | ||
3668 | if (error) | 3695 | if (error) |
3669 | goto bread_err2; | 3696 | goto bread_err2; |
3697 | |||
3698 | error = xlog_bread_noalign(log, wrapped_hblks, | ||
3699 | bblks - split_bblks, | ||
3700 | dbp); | ||
3701 | if (error) | ||
3702 | goto bread_err2; | ||
3703 | |||
3704 | error = XFS_BUF_SET_PTR(dbp, bufaddr, h_size); | ||
3705 | if (error) | ||
3706 | goto bread_err2; | ||
3707 | |||
3670 | if (!offset) | 3708 | if (!offset) |
3671 | offset = xlog_align(log, wrapped_hblks, | 3709 | offset = xlog_align(log, wrapped_hblks, |
3672 | bblks - split_bblks, dbp); | 3710 | bblks - split_bblks, dbp); |
@@ -3683,17 +3721,21 @@ xlog_do_recovery_pass( | |||
3683 | 3721 | ||
3684 | /* read first part of physical log */ | 3722 | /* read first part of physical log */ |
3685 | while (blk_no < head_blk) { | 3723 | while (blk_no < head_blk) { |
3686 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | 3724 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); |
3725 | if (error) | ||
3687 | goto bread_err2; | 3726 | goto bread_err2; |
3688 | offset = xlog_align(log, blk_no, hblks, hbp); | 3727 | |
3689 | rhead = (xlog_rec_header_t *)offset; | 3728 | rhead = (xlog_rec_header_t *)offset; |
3690 | error = xlog_valid_rec_header(log, rhead, blk_no); | 3729 | error = xlog_valid_rec_header(log, rhead, blk_no); |
3691 | if (error) | 3730 | if (error) |
3692 | goto bread_err2; | 3731 | goto bread_err2; |
3732 | |||
3693 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3733 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3694 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) | 3734 | error = xlog_bread(log, blk_no+hblks, bblks, dbp, |
3735 | &offset); | ||
3736 | if (error) | ||
3695 | goto bread_err2; | 3737 | goto bread_err2; |
3696 | offset = xlog_align(log, blk_no+hblks, bblks, dbp); | 3738 | |
3697 | xlog_unpack_data(rhead, offset, log); | 3739 | xlog_unpack_data(rhead, offset, log); |
3698 | if ((error = xlog_recover_process_data(log, rhash, | 3740 | if ((error = xlog_recover_process_data(log, rhash, |
3699 | rhead, offset, pass))) | 3741 | rhead, offset, pass))) |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 35300250e86d..b101990df027 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -45,7 +45,6 @@ | |||
45 | #include "xfs_fsops.h" | 45 | #include "xfs_fsops.h" |
46 | #include "xfs_utils.h" | 46 | #include "xfs_utils.h" |
47 | 47 | ||
48 | STATIC int xfs_uuid_mount(xfs_mount_t *); | ||
49 | STATIC void xfs_unmountfs_wait(xfs_mount_t *); | 48 | STATIC void xfs_unmountfs_wait(xfs_mount_t *); |
50 | 49 | ||
51 | 50 | ||
@@ -121,6 +120,84 @@ static const struct { | |||
121 | { sizeof(xfs_sb_t), 0 } | 120 | { sizeof(xfs_sb_t), 0 } |
122 | }; | 121 | }; |
123 | 122 | ||
123 | static DEFINE_MUTEX(xfs_uuid_table_mutex); | ||
124 | static int xfs_uuid_table_size; | ||
125 | static uuid_t *xfs_uuid_table; | ||
126 | |||
127 | /* | ||
128 | * See if the UUID is unique among mounted XFS filesystems. | ||
129 | * Mount fails if UUID is nil or a FS with the same UUID is already mounted. | ||
130 | */ | ||
131 | STATIC int | ||
132 | xfs_uuid_mount( | ||
133 | struct xfs_mount *mp) | ||
134 | { | ||
135 | uuid_t *uuid = &mp->m_sb.sb_uuid; | ||
136 | int hole, i; | ||
137 | |||
138 | if (mp->m_flags & XFS_MOUNT_NOUUID) | ||
139 | return 0; | ||
140 | |||
141 | if (uuid_is_nil(uuid)) { | ||
142 | cmn_err(CE_WARN, | ||
143 | "XFS: Filesystem %s has nil UUID - can't mount", | ||
144 | mp->m_fsname); | ||
145 | return XFS_ERROR(EINVAL); | ||
146 | } | ||
147 | |||
148 | mutex_lock(&xfs_uuid_table_mutex); | ||
149 | for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) { | ||
150 | if (uuid_is_nil(&xfs_uuid_table[i])) { | ||
151 | hole = i; | ||
152 | continue; | ||
153 | } | ||
154 | if (uuid_equal(uuid, &xfs_uuid_table[i])) | ||
155 | goto out_duplicate; | ||
156 | } | ||
157 | |||
158 | if (hole < 0) { | ||
159 | xfs_uuid_table = kmem_realloc(xfs_uuid_table, | ||
160 | (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), | ||
161 | xfs_uuid_table_size * sizeof(*xfs_uuid_table), | ||
162 | KM_SLEEP); | ||
163 | hole = xfs_uuid_table_size++; | ||
164 | } | ||
165 | xfs_uuid_table[hole] = *uuid; | ||
166 | mutex_unlock(&xfs_uuid_table_mutex); | ||
167 | |||
168 | return 0; | ||
169 | |||
170 | out_duplicate: | ||
171 | mutex_unlock(&xfs_uuid_table_mutex); | ||
172 | cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount", | ||
173 | mp->m_fsname); | ||
174 | return XFS_ERROR(EINVAL); | ||
175 | } | ||
176 | |||
177 | STATIC void | ||
178 | xfs_uuid_unmount( | ||
179 | struct xfs_mount *mp) | ||
180 | { | ||
181 | uuid_t *uuid = &mp->m_sb.sb_uuid; | ||
182 | int i; | ||
183 | |||
184 | if (mp->m_flags & XFS_MOUNT_NOUUID) | ||
185 | return; | ||
186 | |||
187 | mutex_lock(&xfs_uuid_table_mutex); | ||
188 | for (i = 0; i < xfs_uuid_table_size; i++) { | ||
189 | if (uuid_is_nil(&xfs_uuid_table[i])) | ||
190 | continue; | ||
191 | if (!uuid_equal(uuid, &xfs_uuid_table[i])) | ||
192 | continue; | ||
193 | memset(&xfs_uuid_table[i], 0, sizeof(uuid_t)); | ||
194 | break; | ||
195 | } | ||
196 | ASSERT(i < xfs_uuid_table_size); | ||
197 | mutex_unlock(&xfs_uuid_table_mutex); | ||
198 | } | ||
199 | |||
200 | |||
124 | /* | 201 | /* |
125 | * Free up the resources associated with a mount structure. Assume that | 202 | * Free up the resources associated with a mount structure. Assume that |
126 | * the structure was initially zeroed, so we can tell which fields got | 203 | * the structure was initially zeroed, so we can tell which fields got |
@@ -256,6 +333,22 @@ xfs_mount_validate_sb( | |||
256 | return XFS_ERROR(ENOSYS); | 333 | return XFS_ERROR(ENOSYS); |
257 | } | 334 | } |
258 | 335 | ||
336 | /* | ||
337 | * Currently only very few inode sizes are supported. | ||
338 | */ | ||
339 | switch (sbp->sb_inodesize) { | ||
340 | case 256: | ||
341 | case 512: | ||
342 | case 1024: | ||
343 | case 2048: | ||
344 | break; | ||
345 | default: | ||
346 | xfs_fs_mount_cmn_err(flags, | ||
347 | "inode size of %d bytes not supported", | ||
348 | sbp->sb_inodesize); | ||
349 | return XFS_ERROR(ENOSYS); | ||
350 | } | ||
351 | |||
259 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || | 352 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || |
260 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { | 353 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { |
261 | xfs_fs_mount_cmn_err(flags, | 354 | xfs_fs_mount_cmn_err(flags, |
@@ -574,32 +667,10 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) | |||
574 | mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; | 667 | mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; |
575 | mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; | 668 | mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; |
576 | mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; | 669 | mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; |
577 | mp->m_litino = sbp->sb_inodesize - sizeof(struct xfs_dinode); | ||
578 | mp->m_blockmask = sbp->sb_blocksize - 1; | 670 | mp->m_blockmask = sbp->sb_blocksize - 1; |
579 | mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; | 671 | mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; |
580 | mp->m_blockwmask = mp->m_blockwsize - 1; | 672 | mp->m_blockwmask = mp->m_blockwsize - 1; |
581 | 673 | ||
582 | /* | ||
583 | * Setup for attributes, in case they get created. | ||
584 | * This value is for inodes getting attributes for the first time, | ||
585 | * the per-inode value is for old attribute values. | ||
586 | */ | ||
587 | ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); | ||
588 | switch (sbp->sb_inodesize) { | ||
589 | case 256: | ||
590 | mp->m_attroffset = XFS_LITINO(mp) - | ||
591 | XFS_BMDR_SPACE_CALC(MINABTPTRS); | ||
592 | break; | ||
593 | case 512: | ||
594 | case 1024: | ||
595 | case 2048: | ||
596 | mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); | ||
597 | break; | ||
598 | default: | ||
599 | ASSERT(0); | ||
600 | } | ||
601 | ASSERT(mp->m_attroffset < XFS_LITINO(mp)); | ||
602 | |||
603 | mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); | 674 | mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); |
604 | mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); | 675 | mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); |
605 | mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; | 676 | mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; |
@@ -645,7 +716,7 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) | |||
645 | for (index = 0; index < agcount; index++) { | 716 | for (index = 0; index < agcount; index++) { |
646 | /* | 717 | /* |
647 | * read the agf, then the agi. This gets us | 718 | * read the agf, then the agi. This gets us |
648 | * all the inforamtion we need and populates the | 719 | * all the information we need and populates the |
649 | * per-ag structures for us. | 720 | * per-ag structures for us. |
650 | */ | 721 | */ |
651 | error = xfs_alloc_pagf_init(mp, NULL, index, 0); | 722 | error = xfs_alloc_pagf_init(mp, NULL, index, 0); |
@@ -886,8 +957,6 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
886 | } | 957 | } |
887 | 958 | ||
888 | /* | 959 | /* |
889 | * xfs_mountfs | ||
890 | * | ||
891 | * This function does the following on an initial mount of a file system: | 960 | * This function does the following on an initial mount of a file system: |
892 | * - reads the superblock from disk and init the mount struct | 961 | * - reads the superblock from disk and init the mount struct |
893 | * - if we're a 32-bit kernel, do a size check on the superblock | 962 | * - if we're a 32-bit kernel, do a size check on the superblock |
@@ -905,7 +974,6 @@ xfs_mountfs( | |||
905 | xfs_inode_t *rip; | 974 | xfs_inode_t *rip; |
906 | __uint64_t resblks; | 975 | __uint64_t resblks; |
907 | uint quotamount, quotaflags; | 976 | uint quotamount, quotaflags; |
908 | int uuid_mounted = 0; | ||
909 | int error = 0; | 977 | int error = 0; |
910 | 978 | ||
911 | xfs_mount_common(mp, sbp); | 979 | xfs_mount_common(mp, sbp); |
@@ -960,7 +1028,7 @@ xfs_mountfs( | |||
960 | */ | 1028 | */ |
961 | error = xfs_update_alignment(mp); | 1029 | error = xfs_update_alignment(mp); |
962 | if (error) | 1030 | if (error) |
963 | goto error1; | 1031 | goto out; |
964 | 1032 | ||
965 | xfs_alloc_compute_maxlevels(mp); | 1033 | xfs_alloc_compute_maxlevels(mp); |
966 | xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); | 1034 | xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); |
@@ -971,19 +1039,9 @@ xfs_mountfs( | |||
971 | 1039 | ||
972 | mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); | 1040 | mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); |
973 | 1041 | ||
974 | /* | 1042 | error = xfs_uuid_mount(mp); |
975 | * XFS uses the uuid from the superblock as the unique | 1043 | if (error) |
976 | * identifier for fsid. We can not use the uuid from the volume | 1044 | goto out; |
977 | * since a single partition filesystem is identical to a single | ||
978 | * partition volume/filesystem. | ||
979 | */ | ||
980 | if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) { | ||
981 | if (xfs_uuid_mount(mp)) { | ||
982 | error = XFS_ERROR(EINVAL); | ||
983 | goto error1; | ||
984 | } | ||
985 | uuid_mounted=1; | ||
986 | } | ||
987 | 1045 | ||
988 | /* | 1046 | /* |
989 | * Set the minimum read and write sizes | 1047 | * Set the minimum read and write sizes |
@@ -1007,7 +1065,7 @@ xfs_mountfs( | |||
1007 | */ | 1065 | */ |
1008 | error = xfs_check_sizes(mp); | 1066 | error = xfs_check_sizes(mp); |
1009 | if (error) | 1067 | if (error) |
1010 | goto error1; | 1068 | goto out_remove_uuid; |
1011 | 1069 | ||
1012 | /* | 1070 | /* |
1013 | * Initialize realtime fields in the mount structure | 1071 | * Initialize realtime fields in the mount structure |
@@ -1015,7 +1073,7 @@ xfs_mountfs( | |||
1015 | error = xfs_rtmount_init(mp); | 1073 | error = xfs_rtmount_init(mp); |
1016 | if (error) { | 1074 | if (error) { |
1017 | cmn_err(CE_WARN, "XFS: RT mount failed"); | 1075 | cmn_err(CE_WARN, "XFS: RT mount failed"); |
1018 | goto error1; | 1076 | goto out_remove_uuid; |
1019 | } | 1077 | } |
1020 | 1078 | ||
1021 | /* | 1079 | /* |
@@ -1045,26 +1103,26 @@ xfs_mountfs( | |||
1045 | mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), | 1103 | mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), |
1046 | KM_MAYFAIL); | 1104 | KM_MAYFAIL); |
1047 | if (!mp->m_perag) | 1105 | if (!mp->m_perag) |
1048 | goto error1; | 1106 | goto out_remove_uuid; |
1049 | 1107 | ||
1050 | mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); | 1108 | mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); |
1051 | 1109 | ||
1110 | if (!sbp->sb_logblocks) { | ||
1111 | cmn_err(CE_WARN, "XFS: no log defined"); | ||
1112 | XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); | ||
1113 | error = XFS_ERROR(EFSCORRUPTED); | ||
1114 | goto out_free_perag; | ||
1115 | } | ||
1116 | |||
1052 | /* | 1117 | /* |
1053 | * log's mount-time initialization. Perform 1st part recovery if needed | 1118 | * log's mount-time initialization. Perform 1st part recovery if needed |
1054 | */ | 1119 | */ |
1055 | if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */ | 1120 | error = xfs_log_mount(mp, mp->m_logdev_targp, |
1056 | error = xfs_log_mount(mp, mp->m_logdev_targp, | 1121 | XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), |
1057 | XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), | 1122 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); |
1058 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); | 1123 | if (error) { |
1059 | if (error) { | 1124 | cmn_err(CE_WARN, "XFS: log mount failed"); |
1060 | cmn_err(CE_WARN, "XFS: log mount failed"); | 1125 | goto out_free_perag; |
1061 | goto error2; | ||
1062 | } | ||
1063 | } else { /* No log has been defined */ | ||
1064 | cmn_err(CE_WARN, "XFS: no log defined"); | ||
1065 | XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp); | ||
1066 | error = XFS_ERROR(EFSCORRUPTED); | ||
1067 | goto error2; | ||
1068 | } | 1126 | } |
1069 | 1127 | ||
1070 | /* | 1128 | /* |
@@ -1086,15 +1144,14 @@ xfs_mountfs( | |||
1086 | * If we are currently making the filesystem, the initialisation will | 1144 | * If we are currently making the filesystem, the initialisation will |
1087 | * fail as the perag data is in an undefined state. | 1145 | * fail as the perag data is in an undefined state. |
1088 | */ | 1146 | */ |
1089 | |||
1090 | if (xfs_sb_version_haslazysbcount(&mp->m_sb) && | 1147 | if (xfs_sb_version_haslazysbcount(&mp->m_sb) && |
1091 | !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && | 1148 | !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && |
1092 | !mp->m_sb.sb_inprogress) { | 1149 | !mp->m_sb.sb_inprogress) { |
1093 | error = xfs_initialize_perag_data(mp, sbp->sb_agcount); | 1150 | error = xfs_initialize_perag_data(mp, sbp->sb_agcount); |
1094 | if (error) { | 1151 | if (error) |
1095 | goto error2; | 1152 | goto out_free_perag; |
1096 | } | ||
1097 | } | 1153 | } |
1154 | |||
1098 | /* | 1155 | /* |
1099 | * Get and sanity-check the root inode. | 1156 | * Get and sanity-check the root inode. |
1100 | * Save the pointer to it in the mount structure. | 1157 | * Save the pointer to it in the mount structure. |
@@ -1102,7 +1159,7 @@ xfs_mountfs( | |||
1102 | error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); | 1159 | error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); |
1103 | if (error) { | 1160 | if (error) { |
1104 | cmn_err(CE_WARN, "XFS: failed to read root inode"); | 1161 | cmn_err(CE_WARN, "XFS: failed to read root inode"); |
1105 | goto error3; | 1162 | goto out_log_dealloc; |
1106 | } | 1163 | } |
1107 | 1164 | ||
1108 | ASSERT(rip != NULL); | 1165 | ASSERT(rip != NULL); |
@@ -1116,7 +1173,7 @@ xfs_mountfs( | |||
1116 | XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, | 1173 | XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, |
1117 | mp); | 1174 | mp); |
1118 | error = XFS_ERROR(EFSCORRUPTED); | 1175 | error = XFS_ERROR(EFSCORRUPTED); |
1119 | goto error4; | 1176 | goto out_rele_rip; |
1120 | } | 1177 | } |
1121 | mp->m_rootip = rip; /* save it */ | 1178 | mp->m_rootip = rip; /* save it */ |
1122 | 1179 | ||
@@ -1131,7 +1188,7 @@ xfs_mountfs( | |||
1131 | * Free up the root inode. | 1188 | * Free up the root inode. |
1132 | */ | 1189 | */ |
1133 | cmn_err(CE_WARN, "XFS: failed to read RT inodes"); | 1190 | cmn_err(CE_WARN, "XFS: failed to read RT inodes"); |
1134 | goto error4; | 1191 | goto out_rele_rip; |
1135 | } | 1192 | } |
1136 | 1193 | ||
1137 | /* | 1194 | /* |
@@ -1143,7 +1200,7 @@ xfs_mountfs( | |||
1143 | error = xfs_mount_log_sb(mp, mp->m_update_flags); | 1200 | error = xfs_mount_log_sb(mp, mp->m_update_flags); |
1144 | if (error) { | 1201 | if (error) { |
1145 | cmn_err(CE_WARN, "XFS: failed to write sb changes"); | 1202 | cmn_err(CE_WARN, "XFS: failed to write sb changes"); |
1146 | goto error4; | 1203 | goto out_rtunmount; |
1147 | } | 1204 | } |
1148 | } | 1205 | } |
1149 | 1206 | ||
@@ -1152,7 +1209,7 @@ xfs_mountfs( | |||
1152 | */ | 1209 | */ |
1153 | error = XFS_QM_INIT(mp, "amount, "aflags); | 1210 | error = XFS_QM_INIT(mp, "amount, "aflags); |
1154 | if (error) | 1211 | if (error) |
1155 | goto error4; | 1212 | goto out_rtunmount; |
1156 | 1213 | ||
1157 | /* | 1214 | /* |
1158 | * Finish recovering the file system. This part needed to be | 1215 | * Finish recovering the file system. This part needed to be |
@@ -1162,7 +1219,7 @@ xfs_mountfs( | |||
1162 | error = xfs_log_mount_finish(mp); | 1219 | error = xfs_log_mount_finish(mp); |
1163 | if (error) { | 1220 | if (error) { |
1164 | cmn_err(CE_WARN, "XFS: log mount finish failed"); | 1221 | cmn_err(CE_WARN, "XFS: log mount finish failed"); |
1165 | goto error4; | 1222 | goto out_rtunmount; |
1166 | } | 1223 | } |
1167 | 1224 | ||
1168 | /* | 1225 | /* |
@@ -1170,7 +1227,7 @@ xfs_mountfs( | |||
1170 | */ | 1227 | */ |
1171 | error = XFS_QM_MOUNT(mp, quotamount, quotaflags); | 1228 | error = XFS_QM_MOUNT(mp, quotamount, quotaflags); |
1172 | if (error) | 1229 | if (error) |
1173 | goto error4; | 1230 | goto out_rtunmount; |
1174 | 1231 | ||
1175 | /* | 1232 | /* |
1176 | * Now we are mounted, reserve a small amount of unused space for | 1233 | * Now we are mounted, reserve a small amount of unused space for |
@@ -1194,18 +1251,17 @@ xfs_mountfs( | |||
1194 | 1251 | ||
1195 | return 0; | 1252 | return 0; |
1196 | 1253 | ||
1197 | error4: | 1254 | out_rtunmount: |
1198 | /* | 1255 | xfs_rtunmount_inodes(mp); |
1199 | * Free up the root inode. | 1256 | out_rele_rip: |
1200 | */ | ||
1201 | IRELE(rip); | 1257 | IRELE(rip); |
1202 | error3: | 1258 | out_log_dealloc: |
1203 | xfs_log_unmount_dealloc(mp); | 1259 | xfs_log_unmount(mp); |
1204 | error2: | 1260 | out_free_perag: |
1205 | xfs_free_perag(mp); | 1261 | xfs_free_perag(mp); |
1206 | error1: | 1262 | out_remove_uuid: |
1207 | if (uuid_mounted) | 1263 | xfs_uuid_unmount(mp); |
1208 | uuid_table_remove(&mp->m_sb.sb_uuid); | 1264 | out: |
1209 | return error; | 1265 | return error; |
1210 | } | 1266 | } |
1211 | 1267 | ||
@@ -1226,15 +1282,12 @@ xfs_unmountfs( | |||
1226 | */ | 1282 | */ |
1227 | XFS_QM_UNMOUNT(mp); | 1283 | XFS_QM_UNMOUNT(mp); |
1228 | 1284 | ||
1229 | if (mp->m_rbmip) | 1285 | xfs_rtunmount_inodes(mp); |
1230 | IRELE(mp->m_rbmip); | ||
1231 | if (mp->m_rsumip) | ||
1232 | IRELE(mp->m_rsumip); | ||
1233 | IRELE(mp->m_rootip); | 1286 | IRELE(mp->m_rootip); |
1234 | 1287 | ||
1235 | /* | 1288 | /* |
1236 | * We can potentially deadlock here if we have an inode cluster | 1289 | * We can potentially deadlock here if we have an inode cluster |
1237 | * that has been freed has it's buffer still pinned in memory because | 1290 | * that has been freed has its buffer still pinned in memory because |
1238 | * the transaction is still sitting in a iclog. The stale inodes | 1291 | * the transaction is still sitting in a iclog. The stale inodes |
1239 | * on that buffer will have their flush locks held until the | 1292 | * on that buffer will have their flush locks held until the |
1240 | * transaction hits the disk and the callbacks run. the inode | 1293 | * transaction hits the disk and the callbacks run. the inode |
@@ -1266,7 +1319,7 @@ xfs_unmountfs( | |||
1266 | * Unreserve any blocks we have so that when we unmount we don't account | 1319 | * Unreserve any blocks we have so that when we unmount we don't account |
1267 | * the reserved free space as used. This is really only necessary for | 1320 | * the reserved free space as used. This is really only necessary for |
1268 | * lazy superblock counting because it trusts the incore superblock | 1321 | * lazy superblock counting because it trusts the incore superblock |
1269 | * counters to be aboslutely correct on clean unmount. | 1322 | * counters to be absolutely correct on clean unmount. |
1270 | * | 1323 | * |
1271 | * We don't bother correcting this elsewhere for lazy superblock | 1324 | * We don't bother correcting this elsewhere for lazy superblock |
1272 | * counting because on mount of an unclean filesystem we reconstruct the | 1325 | * counting because on mount of an unclean filesystem we reconstruct the |
@@ -1288,10 +1341,9 @@ xfs_unmountfs( | |||
1288 | "Freespace may not be correct on next mount."); | 1341 | "Freespace may not be correct on next mount."); |
1289 | xfs_unmountfs_writesb(mp); | 1342 | xfs_unmountfs_writesb(mp); |
1290 | xfs_unmountfs_wait(mp); /* wait for async bufs */ | 1343 | xfs_unmountfs_wait(mp); /* wait for async bufs */ |
1291 | xfs_log_unmount(mp); /* Done! No more fs ops. */ | 1344 | xfs_log_unmount_write(mp); |
1292 | 1345 | xfs_log_unmount(mp); | |
1293 | if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) | 1346 | xfs_uuid_unmount(mp); |
1294 | uuid_table_remove(&mp->m_sb.sb_uuid); | ||
1295 | 1347 | ||
1296 | #if defined(DEBUG) | 1348 | #if defined(DEBUG) |
1297 | xfs_errortag_clearall(mp, 0); | 1349 | xfs_errortag_clearall(mp, 0); |
@@ -1793,29 +1845,6 @@ xfs_freesb( | |||
1793 | } | 1845 | } |
1794 | 1846 | ||
1795 | /* | 1847 | /* |
1796 | * See if the UUID is unique among mounted XFS filesystems. | ||
1797 | * Mount fails if UUID is nil or a FS with the same UUID is already mounted. | ||
1798 | */ | ||
1799 | STATIC int | ||
1800 | xfs_uuid_mount( | ||
1801 | xfs_mount_t *mp) | ||
1802 | { | ||
1803 | if (uuid_is_nil(&mp->m_sb.sb_uuid)) { | ||
1804 | cmn_err(CE_WARN, | ||
1805 | "XFS: Filesystem %s has nil UUID - can't mount", | ||
1806 | mp->m_fsname); | ||
1807 | return -1; | ||
1808 | } | ||
1809 | if (!uuid_table_insert(&mp->m_sb.sb_uuid)) { | ||
1810 | cmn_err(CE_WARN, | ||
1811 | "XFS: Filesystem %s has duplicate UUID - can't mount", | ||
1812 | mp->m_fsname); | ||
1813 | return -1; | ||
1814 | } | ||
1815 | return 0; | ||
1816 | } | ||
1817 | |||
1818 | /* | ||
1819 | * Used to log changes to the superblock unit and width fields which could | 1848 | * Used to log changes to the superblock unit and width fields which could |
1820 | * be altered by the mount options, as well as any potential sb_features2 | 1849 | * be altered by the mount options, as well as any potential sb_features2 |
1821 | * fixup. Only the first superblock is updated. | 1850 | * fixup. Only the first superblock is updated. |
@@ -1868,7 +1897,7 @@ xfs_mount_log_sb( | |||
1868 | * we disable the per-cpu counter and go through the slow path. | 1897 | * we disable the per-cpu counter and go through the slow path. |
1869 | * | 1898 | * |
1870 | * The slow path is the current xfs_mod_incore_sb() function. This means that | 1899 | * The slow path is the current xfs_mod_incore_sb() function. This means that |
1871 | * when we disable a per-cpu counter, we need to drain it's resources back to | 1900 | * when we disable a per-cpu counter, we need to drain its resources back to |
1872 | * the global superblock. We do this after disabling the counter to prevent | 1901 | * the global superblock. We do this after disabling the counter to prevent |
1873 | * more threads from queueing up on the counter. | 1902 | * more threads from queueing up on the counter. |
1874 | * | 1903 | * |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f5e9937f9bdb..7af44adffc8f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -136,7 +136,6 @@ typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *, | |||
136 | struct xfs_dquot *, struct xfs_dquot *, uint); | 136 | struct xfs_dquot *, struct xfs_dquot *, uint); |
137 | typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *); | 137 | typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *); |
138 | typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); | 138 | typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); |
139 | typedef int (*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t); | ||
140 | 139 | ||
141 | typedef struct xfs_qmops { | 140 | typedef struct xfs_qmops { |
142 | xfs_qminit_t xfs_qminit; | 141 | xfs_qminit_t xfs_qminit; |
@@ -154,7 +153,6 @@ typedef struct xfs_qmops { | |||
154 | xfs_dqvopchownresv_t xfs_dqvopchownresv; | 153 | xfs_dqvopchownresv_t xfs_dqvopchownresv; |
155 | xfs_dqstatvfs_t xfs_dqstatvfs; | 154 | xfs_dqstatvfs_t xfs_dqstatvfs; |
156 | xfs_dqsync_t xfs_dqsync; | 155 | xfs_dqsync_t xfs_dqsync; |
157 | xfs_quotactl_t xfs_quotactl; | ||
158 | struct xfs_dqtrxops *xfs_dqtrxops; | 156 | struct xfs_dqtrxops *xfs_dqtrxops; |
159 | } xfs_qmops_t; | 157 | } xfs_qmops_t; |
160 | 158 | ||
@@ -188,8 +186,6 @@ typedef struct xfs_qmops { | |||
188 | (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp) | 186 | (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp) |
189 | #define XFS_QM_DQSYNC(mp, flags) \ | 187 | #define XFS_QM_DQSYNC(mp, flags) \ |
190 | (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags) | 188 | (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags) |
191 | #define XFS_QM_QUOTACTL(mp, cmd, id, addr) \ | ||
192 | (*(mp)->m_qm_ops->xfs_quotactl)(mp, cmd, id, addr) | ||
193 | 189 | ||
194 | #ifdef HAVE_PERCPU_SB | 190 | #ifdef HAVE_PERCPU_SB |
195 | 191 | ||
@@ -273,19 +269,17 @@ typedef struct xfs_mount { | |||
273 | uint m_inobt_mnr[2]; /* min inobt btree records */ | 269 | uint m_inobt_mnr[2]; /* min inobt btree records */ |
274 | uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ | 270 | uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ |
275 | uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ | 271 | uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ |
276 | uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ | 272 | uint m_in_maxlevels; /* max inobt btree levels. */ |
277 | struct xfs_perag *m_perag; /* per-ag accounting info */ | 273 | struct xfs_perag *m_perag; /* per-ag accounting info */ |
278 | struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ | 274 | struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ |
279 | struct mutex m_growlock; /* growfs mutex */ | 275 | struct mutex m_growlock; /* growfs mutex */ |
280 | int m_fixedfsid[2]; /* unchanged for life of FS */ | 276 | int m_fixedfsid[2]; /* unchanged for life of FS */ |
281 | uint m_dmevmask; /* DMI events for this FS */ | 277 | uint m_dmevmask; /* DMI events for this FS */ |
282 | __uint64_t m_flags; /* global mount flags */ | 278 | __uint64_t m_flags; /* global mount flags */ |
283 | uint m_attroffset; /* inode attribute offset */ | ||
284 | uint m_dir_node_ents; /* #entries in a dir danode */ | 279 | uint m_dir_node_ents; /* #entries in a dir danode */ |
285 | uint m_attr_node_ents; /* #entries in attr danode */ | 280 | uint m_attr_node_ents; /* #entries in attr danode */ |
286 | int m_ialloc_inos; /* inodes in inode allocation */ | 281 | int m_ialloc_inos; /* inodes in inode allocation */ |
287 | int m_ialloc_blks; /* blocks in inode allocation */ | 282 | int m_ialloc_blks; /* blocks in inode allocation */ |
288 | int m_litino; /* size of inode union area */ | ||
289 | int m_inoalign_mask;/* mask sb_inoalignmt if used */ | 283 | int m_inoalign_mask;/* mask sb_inoalignmt if used */ |
290 | uint m_qflags; /* quota status flags */ | 284 | uint m_qflags; /* quota status flags */ |
291 | xfs_trans_reservations_t m_reservations;/* precomputed res values */ | 285 | xfs_trans_reservations_t m_reservations;/* precomputed res values */ |
@@ -293,9 +287,6 @@ typedef struct xfs_mount { | |||
293 | __uint64_t m_maxioffset; /* maximum inode offset */ | 287 | __uint64_t m_maxioffset; /* maximum inode offset */ |
294 | __uint64_t m_resblks; /* total reserved blocks */ | 288 | __uint64_t m_resblks; /* total reserved blocks */ |
295 | __uint64_t m_resblks_avail;/* available reserved blocks */ | 289 | __uint64_t m_resblks_avail;/* available reserved blocks */ |
296 | #if XFS_BIG_INUMS | ||
297 | xfs_ino_t m_inoadd; /* add value for ino64_offset */ | ||
298 | #endif | ||
299 | int m_dalign; /* stripe unit */ | 290 | int m_dalign; /* stripe unit */ |
300 | int m_swidth; /* stripe width */ | 291 | int m_swidth; /* stripe width */ |
301 | int m_sinoalign; /* stripe unit inode alignment */ | 292 | int m_sinoalign; /* stripe unit inode alignment */ |
@@ -337,7 +328,6 @@ typedef struct xfs_mount { | |||
337 | #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops | 328 | #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops |
338 | must be synchronous except | 329 | must be synchronous except |
339 | for space allocations */ | 330 | for space allocations */ |
340 | #define XFS_MOUNT_INO64 (1ULL << 1) | ||
341 | #define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ | 331 | #define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ |
342 | #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) | 332 | #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) |
343 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem | 333 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem |
@@ -389,8 +379,8 @@ typedef struct xfs_mount { | |||
389 | * Synchronous read and write sizes. This should be | 379 | * Synchronous read and write sizes. This should be |
390 | * better for NFSv2 wsync filesystems. | 380 | * better for NFSv2 wsync filesystems. |
391 | */ | 381 | */ |
392 | #define XFS_WSYNC_READIO_LOG 15 /* 32K */ | 382 | #define XFS_WSYNC_READIO_LOG 15 /* 32k */ |
393 | #define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */ | 383 | #define XFS_WSYNC_WRITEIO_LOG 14 /* 16k */ |
394 | 384 | ||
395 | /* | 385 | /* |
396 | * Allow large block sizes to be reported to userspace programs if the | 386 | * Allow large block sizes to be reported to userspace programs if the |
@@ -500,9 +490,6 @@ typedef struct xfs_mod_sb { | |||
500 | int64_t msb_delta; /* Change to make to specified field */ | 490 | int64_t msb_delta; /* Change to make to specified field */ |
501 | } xfs_mod_sb_t; | 491 | } xfs_mod_sb_t; |
502 | 492 | ||
503 | #define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) | ||
504 | #define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) | ||
505 | |||
506 | extern int xfs_log_sbcount(xfs_mount_t *, uint); | 493 | extern int xfs_log_sbcount(xfs_mount_t *, uint); |
507 | extern int xfs_mountfs(xfs_mount_t *mp); | 494 | extern int xfs_mountfs(xfs_mount_t *mp); |
508 | extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); | 495 | extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); |
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 27f80581520a..e101790ea8e7 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c | |||
@@ -126,7 +126,6 @@ static struct xfs_qmops xfs_qmcore_stub = { | |||
126 | .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr, | 126 | .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr, |
127 | .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval, | 127 | .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval, |
128 | .xfs_dqsync = (xfs_dqsync_t) fs_noerr, | 128 | .xfs_dqsync = (xfs_dqsync_t) fs_noerr, |
129 | .xfs_quotactl = (xfs_quotactl_t) fs_nosys, | ||
130 | }; | 129 | }; |
131 | 130 | ||
132 | int | 131 | int |
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 48965ecaa155..f5d1202dde25 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #ifndef __XFS_QUOTA_H__ | 18 | #ifndef __XFS_QUOTA_H__ |
19 | #define __XFS_QUOTA_H__ | 19 | #define __XFS_QUOTA_H__ |
20 | 20 | ||
21 | struct xfs_trans; | ||
22 | |||
21 | /* | 23 | /* |
22 | * The ondisk form of a dquot structure. | 24 | * The ondisk form of a dquot structure. |
23 | */ | 25 | */ |
@@ -185,7 +187,6 @@ typedef struct xfs_qoff_logformat { | |||
185 | * to a single function. None of these XFS_QMOPT_* flags are meant to have | 187 | * to a single function. None of these XFS_QMOPT_* flags are meant to have |
186 | * persistent values (ie. their values can and will change between versions) | 188 | * persistent values (ie. their values can and will change between versions) |
187 | */ | 189 | */ |
188 | #define XFS_QMOPT_DQLOCK 0x0000001 /* dqlock */ | ||
189 | #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ | 190 | #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ |
190 | #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ | 191 | #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ |
191 | #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ | 192 | #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index c5bb86f3ec05..385f6dceba5d 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -2288,6 +2288,16 @@ xfs_rtmount_inodes( | |||
2288 | return 0; | 2288 | return 0; |
2289 | } | 2289 | } |
2290 | 2290 | ||
2291 | void | ||
2292 | xfs_rtunmount_inodes( | ||
2293 | struct xfs_mount *mp) | ||
2294 | { | ||
2295 | if (mp->m_rbmip) | ||
2296 | IRELE(mp->m_rbmip); | ||
2297 | if (mp->m_rsumip) | ||
2298 | IRELE(mp->m_rsumip); | ||
2299 | } | ||
2300 | |||
2291 | /* | 2301 | /* |
2292 | * Pick an extent for allocation at the start of a new realtime file. | 2302 | * Pick an extent for allocation at the start of a new realtime file. |
2293 | * Use the sequence number stored in the atime field of the bitmap inode. | 2303 | * Use the sequence number stored in the atime field of the bitmap inode. |
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 8d8dcd215716..b2d67adb6a08 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h | |||
@@ -23,8 +23,8 @@ struct xfs_trans; | |||
23 | 23 | ||
24 | /* Min and max rt extent sizes, specified in bytes */ | 24 | /* Min and max rt extent sizes, specified in bytes */ |
25 | #define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */ | 25 | #define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */ |
26 | #define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64KB */ | 26 | #define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64kB */ |
27 | #define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4KB */ | 27 | #define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */ |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * Constants for bit manipulations. | 30 | * Constants for bit manipulations. |
@@ -108,6 +108,9 @@ xfs_rtfree_extent( | |||
108 | int /* error */ | 108 | int /* error */ |
109 | xfs_rtmount_init( | 109 | xfs_rtmount_init( |
110 | struct xfs_mount *mp); /* file system mount structure */ | 110 | struct xfs_mount *mp); /* file system mount structure */ |
111 | void | ||
112 | xfs_rtunmount_inodes( | ||
113 | struct xfs_mount *mp); | ||
111 | 114 | ||
112 | /* | 115 | /* |
113 | * Get the bitmap and summary inodes into the mount structure | 116 | * Get the bitmap and summary inodes into the mount structure |
@@ -146,6 +149,7 @@ xfs_growfs_rt( | |||
146 | # define xfs_growfs_rt(mp,in) (ENOSYS) | 149 | # define xfs_growfs_rt(mp,in) (ENOSYS) |
147 | # define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) | 150 | # define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) |
148 | # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) | 151 | # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) |
152 | # define xfs_rtunmount_inodes(m) | ||
149 | #endif /* CONFIG_XFS_RT */ | 153 | #endif /* CONFIG_XFS_RT */ |
150 | 154 | ||
151 | #endif /* __KERNEL__ */ | 155 | #endif /* __KERNEL__ */ |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index d6fe4a88d79f..775249a54f6f 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -292,7 +292,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
292 | * In a write transaction we can allocate a maximum of 2 | 292 | * In a write transaction we can allocate a maximum of 2 |
293 | * extents. This gives: | 293 | * extents. This gives: |
294 | * the inode getting the new extents: inode size | 294 | * the inode getting the new extents: inode size |
295 | * the inode\'s bmap btree: max depth * block size | 295 | * the inode's bmap btree: max depth * block size |
296 | * the agfs of the ags from which the extents are allocated: 2 * sector | 296 | * the agfs of the ags from which the extents are allocated: 2 * sector |
297 | * the superblock free block counter: sector size | 297 | * the superblock free block counter: sector size |
298 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size | 298 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
@@ -321,7 +321,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
321 | /* | 321 | /* |
322 | * In truncating a file we free up to two extents at once. We can modify: | 322 | * In truncating a file we free up to two extents at once. We can modify: |
323 | * the inode being truncated: inode size | 323 | * the inode being truncated: inode size |
324 | * the inode\'s bmap btree: (max depth + 1) * block size | 324 | * the inode's bmap btree: (max depth + 1) * block size |
325 | * And the bmap_finish transaction can free the blocks and bmap blocks: | 325 | * And the bmap_finish transaction can free the blocks and bmap blocks: |
326 | * the agf for each of the ags: 4 * sector size | 326 | * the agf for each of the ags: 4 * sector size |
327 | * the agfl for each of the ags: 4 * sector size | 327 | * the agfl for each of the ags: 4 * sector size |
@@ -343,7 +343,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
343 | (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \ | 343 | (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \ |
344 | (128 * 5) + \ | 344 | (128 * 5) + \ |
345 | XFS_ALLOCFREE_LOG_RES(mp, 1) + \ | 345 | XFS_ALLOCFREE_LOG_RES(mp, 1) + \ |
346 | (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ | 346 | (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ |
347 | XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) | 347 | XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) |
348 | 348 | ||
349 | #define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) | 349 | #define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) |
@@ -431,8 +431,8 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
431 | * the new inode: inode size | 431 | * the new inode: inode size |
432 | * the inode btree entry: 1 block | 432 | * the inode btree entry: 1 block |
433 | * the directory btree: (max depth + v2) * dir block size | 433 | * the directory btree: (max depth + v2) * dir block size |
434 | * the directory inode\'s bmap btree: (max depth + v2) * block size | 434 | * the directory inode's bmap btree: (max depth + v2) * block size |
435 | * the blocks for the symlink: 1 KB | 435 | * the blocks for the symlink: 1 kB |
436 | * Or in the first xact we allocate some inodes giving: | 436 | * Or in the first xact we allocate some inodes giving: |
437 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | 437 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
438 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize | 438 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize |
@@ -449,9 +449,9 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
449 | (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \ | 449 | (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \ |
450 | (2 * (mp)->m_sb.sb_sectsize + \ | 450 | (2 * (mp)->m_sb.sb_sectsize + \ |
451 | XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ | 451 | XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ |
452 | XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \ | 452 | XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \ |
453 | XFS_ALLOCFREE_LOG_RES(mp, 1) + \ | 453 | XFS_ALLOCFREE_LOG_RES(mp, 1) + \ |
454 | (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ | 454 | (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ |
455 | XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) | 455 | XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) |
456 | 456 | ||
457 | #define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) | 457 | #define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) |
@@ -463,7 +463,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
463 | * the inode btree entry: block size | 463 | * the inode btree entry: block size |
464 | * the superblock for the nlink flag: sector size | 464 | * the superblock for the nlink flag: sector size |
465 | * the directory btree: (max depth + v2) * dir block size | 465 | * the directory btree: (max depth + v2) * dir block size |
466 | * the directory inode\'s bmap btree: (max depth + v2) * block size | 466 | * the directory inode's bmap btree: (max depth + v2) * block size |
467 | * Or in the first xact we allocate some inodes giving: | 467 | * Or in the first xact we allocate some inodes giving: |
468 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | 468 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
469 | * the superblock for the nlink flag: sector size | 469 | * the superblock for the nlink flag: sector size |
@@ -481,9 +481,9 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
481 | (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \ | 481 | (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \ |
482 | (3 * (mp)->m_sb.sb_sectsize + \ | 482 | (3 * (mp)->m_sb.sb_sectsize + \ |
483 | XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ | 483 | XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ |
484 | XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \ | 484 | XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \ |
485 | XFS_ALLOCFREE_LOG_RES(mp, 1) + \ | 485 | XFS_ALLOCFREE_LOG_RES(mp, 1) + \ |
486 | (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ | 486 | (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ |
487 | XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) | 487 | XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) |
488 | 488 | ||
489 | #define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) | 489 | #define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) |
@@ -513,7 +513,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
513 | MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \ | 513 | MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \ |
514 | (128 * 5) + \ | 514 | (128 * 5) + \ |
515 | XFS_ALLOCFREE_LOG_RES(mp, 1) + \ | 515 | XFS_ALLOCFREE_LOG_RES(mp, 1) + \ |
516 | (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ | 516 | (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ |
517 | XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) | 517 | XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) |
518 | 518 | ||
519 | 519 | ||
@@ -637,7 +637,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | |||
637 | /* | 637 | /* |
638 | * Removing the attribute fork of a file | 638 | * Removing the attribute fork of a file |
639 | * the inode being truncated: inode size | 639 | * the inode being truncated: inode size |
640 | * the inode\'s bmap btree: max depth * block size | 640 | * the inode's bmap btree: max depth * block size |
641 | * And the bmap_finish transaction can free the blocks and bmap blocks: | 641 | * And the bmap_finish transaction can free the blocks and bmap blocks: |
642 | * the agf for each of the ags: 4 * sector size | 642 | * the agf for each of the ags: 4 * sector size |
643 | * the agfl for each of the ags: 4 * sector size | 643 | * the agfl for each of the ags: 4 * sector size |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 2d47f10f8bed..f31271c30de9 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -79,7 +79,7 @@ xfs_trans_ail_tail( | |||
79 | * the push is run asynchronously in a separate thread, so we return the tail | 79 | * the push is run asynchronously in a separate thread, so we return the tail |
80 | * of the log right now instead of the tail after the push. This means we will | 80 | * of the log right now instead of the tail after the push. This means we will |
81 | * either continue right away, or we will sleep waiting on the async thread to | 81 | * either continue right away, or we will sleep waiting on the async thread to |
82 | * do it's work. | 82 | * do its work. |
83 | * | 83 | * |
84 | * We do this unlocked - we only need to know whether there is anything in the | 84 | * We do this unlocked - we only need to know whether there is anything in the |
85 | * AIL at the time we are called. We don't need to access the contents of | 85 | * AIL at the time we are called. We don't need to access the contents of |
@@ -160,7 +160,7 @@ xfs_trans_ail_cursor_next( | |||
160 | /* | 160 | /* |
161 | * Now that the traversal is complete, we need to remove the cursor | 161 | * Now that the traversal is complete, we need to remove the cursor |
162 | * from the list of traversing cursors. Avoid removing the embedded | 162 | * from the list of traversing cursors. Avoid removing the embedded |
163 | * push cursor, but use the fact it is alway present to make the | 163 | * push cursor, but use the fact it is always present to make the |
164 | * list deletion simple. | 164 | * list deletion simple. |
165 | */ | 165 | */ |
166 | void | 166 | void |
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index e110bf57d7f4..eb3fc57f9eef 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_trans_priv.h" | 24 | #include "xfs_trans_priv.h" |
25 | /* XXX: from here down needed until struct xfs_trans has it's own ailp */ | 25 | /* XXX: from here down needed until struct xfs_trans has its own ailp */ |
26 | #include "xfs_bit.h" | 26 | #include "xfs_bit.h" |
27 | #include "xfs_buf_item.h" | 27 | #include "xfs_buf_item.h" |
28 | #include "xfs_sb.h" | 28 | #include "xfs_sb.h" |
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h index 4ea2e5074bdd..7d2c920dfb9c 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/xfs_trans_space.h | |||
@@ -47,7 +47,7 @@ | |||
47 | #define XFS_DIRREMOVE_SPACE_RES(mp) \ | 47 | #define XFS_DIRREMOVE_SPACE_RES(mp) \ |
48 | XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) | 48 | XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) |
49 | #define XFS_IALLOC_SPACE_RES(mp) \ | 49 | #define XFS_IALLOC_SPACE_RES(mp) \ |
50 | (XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp)-1) | 50 | (XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels - 1) |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Space reservation values for various transactions. | 53 | * Space reservation values for various transactions. |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index b2f724502f1b..d725428c9df6 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -21,14 +21,6 @@ | |||
21 | #ifdef __KERNEL__ | 21 | #ifdef __KERNEL__ |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * POSIX Extensions | ||
25 | */ | ||
26 | typedef unsigned char uchar_t; | ||
27 | typedef unsigned short ushort_t; | ||
28 | typedef unsigned int uint_t; | ||
29 | typedef unsigned long ulong_t; | ||
30 | |||
31 | /* | ||
32 | * Additional type declarations for XFS | 24 | * Additional type declarations for XFS |
33 | */ | 25 | */ |
34 | typedef signed char __int8_t; | 26 | typedef signed char __int8_t; |
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index fcc2285d03ed..79b9e5ea5359 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c | |||
@@ -374,7 +374,7 @@ xfs_truncate_file( | |||
374 | 374 | ||
375 | /* | 375 | /* |
376 | * Follow the normal truncate locking protocol. Since we | 376 | * Follow the normal truncate locking protocol. Since we |
377 | * hold the inode in the transaction, we know that it's number | 377 | * hold the inode in the transaction, we know that its number |
378 | * of references will stay constant. | 378 | * of references will stay constant. |
379 | */ | 379 | */ |
380 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 380 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0e55c5d7db5f..7394c7af5de5 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -1136,7 +1136,7 @@ xfs_inactive( | |||
1136 | * If the inode is already free, then there can be nothing | 1136 | * If the inode is already free, then there can be nothing |
1137 | * to clean up here. | 1137 | * to clean up here. |
1138 | */ | 1138 | */ |
1139 | if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) { | 1139 | if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { |
1140 | ASSERT(ip->i_df.if_real_bytes == 0); | 1140 | ASSERT(ip->i_df.if_real_bytes == 0); |
1141 | ASSERT(ip->i_df.if_broot_bytes == 0); | 1141 | ASSERT(ip->i_df.if_broot_bytes == 0); |
1142 | return VN_INACTIVE_CACHE; | 1142 | return VN_INACTIVE_CACHE; |
@@ -1387,23 +1387,28 @@ xfs_create( | |||
1387 | xfs_inode_t **ipp, | 1387 | xfs_inode_t **ipp, |
1388 | cred_t *credp) | 1388 | cred_t *credp) |
1389 | { | 1389 | { |
1390 | xfs_mount_t *mp = dp->i_mount; | 1390 | int is_dir = S_ISDIR(mode); |
1391 | xfs_inode_t *ip; | 1391 | struct xfs_mount *mp = dp->i_mount; |
1392 | xfs_trans_t *tp; | 1392 | struct xfs_inode *ip = NULL; |
1393 | struct xfs_trans *tp = NULL; | ||
1393 | int error; | 1394 | int error; |
1394 | xfs_bmap_free_t free_list; | 1395 | xfs_bmap_free_t free_list; |
1395 | xfs_fsblock_t first_block; | 1396 | xfs_fsblock_t first_block; |
1396 | boolean_t unlock_dp_on_error = B_FALSE; | 1397 | boolean_t unlock_dp_on_error = B_FALSE; |
1397 | int dm_event_sent = 0; | ||
1398 | uint cancel_flags; | 1398 | uint cancel_flags; |
1399 | int committed; | 1399 | int committed; |
1400 | xfs_prid_t prid; | 1400 | xfs_prid_t prid; |
1401 | struct xfs_dquot *udqp, *gdqp; | 1401 | struct xfs_dquot *udqp = NULL; |
1402 | struct xfs_dquot *gdqp = NULL; | ||
1402 | uint resblks; | 1403 | uint resblks; |
1404 | uint log_res; | ||
1405 | uint log_count; | ||
1403 | 1406 | ||
1404 | ASSERT(!*ipp); | ||
1405 | xfs_itrace_entry(dp); | 1407 | xfs_itrace_entry(dp); |
1406 | 1408 | ||
1409 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1410 | return XFS_ERROR(EIO); | ||
1411 | |||
1407 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { | 1412 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { |
1408 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, | 1413 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, |
1409 | dp, DM_RIGHT_NULL, NULL, | 1414 | dp, DM_RIGHT_NULL, NULL, |
@@ -1412,84 +1417,97 @@ xfs_create( | |||
1412 | 1417 | ||
1413 | if (error) | 1418 | if (error) |
1414 | return error; | 1419 | return error; |
1415 | dm_event_sent = 1; | ||
1416 | } | 1420 | } |
1417 | 1421 | ||
1418 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1419 | return XFS_ERROR(EIO); | ||
1420 | |||
1421 | /* Return through std_return after this point. */ | ||
1422 | |||
1423 | udqp = gdqp = NULL; | ||
1424 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 1422 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) |
1425 | prid = dp->i_d.di_projid; | 1423 | prid = dp->i_d.di_projid; |
1426 | else | 1424 | else |
1427 | prid = (xfs_prid_t)dfltprid; | 1425 | prid = dfltprid; |
1428 | 1426 | ||
1429 | /* | 1427 | /* |
1430 | * Make sure that we have allocated dquot(s) on disk. | 1428 | * Make sure that we have allocated dquot(s) on disk. |
1431 | */ | 1429 | */ |
1432 | error = XFS_QM_DQVOPALLOC(mp, dp, | 1430 | error = XFS_QM_DQVOPALLOC(mp, dp, |
1433 | current_fsuid(), current_fsgid(), prid, | 1431 | current_fsuid(), current_fsgid(), prid, |
1434 | XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); | 1432 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); |
1435 | if (error) | 1433 | if (error) |
1436 | goto std_return; | 1434 | goto std_return; |
1437 | 1435 | ||
1438 | ip = NULL; | 1436 | if (is_dir) { |
1437 | rdev = 0; | ||
1438 | resblks = XFS_MKDIR_SPACE_RES(mp, name->len); | ||
1439 | log_res = XFS_MKDIR_LOG_RES(mp); | ||
1440 | log_count = XFS_MKDIR_LOG_COUNT; | ||
1441 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); | ||
1442 | } else { | ||
1443 | resblks = XFS_CREATE_SPACE_RES(mp, name->len); | ||
1444 | log_res = XFS_CREATE_LOG_RES(mp); | ||
1445 | log_count = XFS_CREATE_LOG_COUNT; | ||
1446 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); | ||
1447 | } | ||
1439 | 1448 | ||
1440 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); | ||
1441 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 1449 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
1442 | resblks = XFS_CREATE_SPACE_RES(mp, name->len); | 1450 | |
1443 | /* | 1451 | /* |
1444 | * Initially assume that the file does not exist and | 1452 | * Initially assume that the file does not exist and |
1445 | * reserve the resources for that case. If that is not | 1453 | * reserve the resources for that case. If that is not |
1446 | * the case we'll drop the one we have and get a more | 1454 | * the case we'll drop the one we have and get a more |
1447 | * appropriate transaction later. | 1455 | * appropriate transaction later. |
1448 | */ | 1456 | */ |
1449 | error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, | 1457 | error = xfs_trans_reserve(tp, resblks, log_res, 0, |
1450 | XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); | 1458 | XFS_TRANS_PERM_LOG_RES, log_count); |
1451 | if (error == ENOSPC) { | 1459 | if (error == ENOSPC) { |
1452 | resblks = 0; | 1460 | resblks = 0; |
1453 | error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, | 1461 | error = xfs_trans_reserve(tp, 0, log_res, 0, |
1454 | XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); | 1462 | XFS_TRANS_PERM_LOG_RES, log_count); |
1455 | } | 1463 | } |
1456 | if (error) { | 1464 | if (error) { |
1457 | cancel_flags = 0; | 1465 | cancel_flags = 0; |
1458 | goto error_return; | 1466 | goto out_trans_cancel; |
1459 | } | 1467 | } |
1460 | 1468 | ||
1461 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); | 1469 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); |
1462 | unlock_dp_on_error = B_TRUE; | 1470 | unlock_dp_on_error = B_TRUE; |
1463 | 1471 | ||
1464 | xfs_bmap_init(&free_list, &first_block); | 1472 | /* |
1473 | * Check for directory link count overflow. | ||
1474 | */ | ||
1475 | if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) { | ||
1476 | error = XFS_ERROR(EMLINK); | ||
1477 | goto out_trans_cancel; | ||
1478 | } | ||
1465 | 1479 | ||
1466 | ASSERT(ip == NULL); | 1480 | xfs_bmap_init(&free_list, &first_block); |
1467 | 1481 | ||
1468 | /* | 1482 | /* |
1469 | * Reserve disk quota and the inode. | 1483 | * Reserve disk quota and the inode. |
1470 | */ | 1484 | */ |
1471 | error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); | 1485 | error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); |
1472 | if (error) | 1486 | if (error) |
1473 | goto error_return; | 1487 | goto out_trans_cancel; |
1474 | 1488 | ||
1475 | error = xfs_dir_canenter(tp, dp, name, resblks); | 1489 | error = xfs_dir_canenter(tp, dp, name, resblks); |
1476 | if (error) | 1490 | if (error) |
1477 | goto error_return; | 1491 | goto out_trans_cancel; |
1478 | error = xfs_dir_ialloc(&tp, dp, mode, 1, | 1492 | |
1479 | rdev, credp, prid, resblks > 0, | 1493 | /* |
1480 | &ip, &committed); | 1494 | * A newly created regular or special file just has one directory |
1495 | * entry pointing to them, but a directory also the "." entry | ||
1496 | * pointing to itself. | ||
1497 | */ | ||
1498 | error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp, | ||
1499 | prid, resblks > 0, &ip, &committed); | ||
1481 | if (error) { | 1500 | if (error) { |
1482 | if (error == ENOSPC) | 1501 | if (error == ENOSPC) |
1483 | goto error_return; | 1502 | goto out_trans_cancel; |
1484 | goto abort_return; | 1503 | goto out_trans_abort; |
1485 | } | 1504 | } |
1486 | xfs_itrace_ref(ip); | ||
1487 | 1505 | ||
1488 | /* | 1506 | /* |
1489 | * At this point, we've gotten a newly allocated inode. | 1507 | * At this point, we've gotten a newly allocated inode. |
1490 | * It is locked (and joined to the transaction). | 1508 | * It is locked (and joined to the transaction). |
1491 | */ | 1509 | */ |
1492 | 1510 | xfs_itrace_ref(ip); | |
1493 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 1511 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
1494 | 1512 | ||
1495 | /* | 1513 | /* |
@@ -1508,19 +1526,28 @@ xfs_create( | |||
1508 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); | 1526 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); |
1509 | if (error) { | 1527 | if (error) { |
1510 | ASSERT(error != ENOSPC); | 1528 | ASSERT(error != ENOSPC); |
1511 | goto abort_return; | 1529 | goto out_trans_abort; |
1512 | } | 1530 | } |
1513 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 1531 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
1514 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | 1532 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); |
1515 | 1533 | ||
1534 | if (is_dir) { | ||
1535 | error = xfs_dir_init(tp, ip, dp); | ||
1536 | if (error) | ||
1537 | goto out_bmap_cancel; | ||
1538 | |||
1539 | error = xfs_bumplink(tp, dp); | ||
1540 | if (error) | ||
1541 | goto out_bmap_cancel; | ||
1542 | } | ||
1543 | |||
1516 | /* | 1544 | /* |
1517 | * If this is a synchronous mount, make sure that the | 1545 | * If this is a synchronous mount, make sure that the |
1518 | * create transaction goes to disk before returning to | 1546 | * create transaction goes to disk before returning to |
1519 | * the user. | 1547 | * the user. |
1520 | */ | 1548 | */ |
1521 | if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { | 1549 | if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) |
1522 | xfs_trans_set_sync(tp); | 1550 | xfs_trans_set_sync(tp); |
1523 | } | ||
1524 | 1551 | ||
1525 | /* | 1552 | /* |
1526 | * Attach the dquot(s) to the inodes and modify them incore. | 1553 | * Attach the dquot(s) to the inodes and modify them incore. |
@@ -1537,16 +1564,13 @@ xfs_create( | |||
1537 | IHOLD(ip); | 1564 | IHOLD(ip); |
1538 | 1565 | ||
1539 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 1566 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1540 | if (error) { | 1567 | if (error) |
1541 | xfs_bmap_cancel(&free_list); | 1568 | goto out_abort_rele; |
1542 | goto abort_rele; | ||
1543 | } | ||
1544 | 1569 | ||
1545 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1570 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
1546 | if (error) { | 1571 | if (error) { |
1547 | IRELE(ip); | 1572 | IRELE(ip); |
1548 | tp = NULL; | 1573 | goto out_dqrele; |
1549 | goto error_return; | ||
1550 | } | 1574 | } |
1551 | 1575 | ||
1552 | XFS_QM_DQRELE(mp, udqp); | 1576 | XFS_QM_DQRELE(mp, udqp); |
@@ -1555,26 +1579,22 @@ xfs_create( | |||
1555 | *ipp = ip; | 1579 | *ipp = ip; |
1556 | 1580 | ||
1557 | /* Fallthrough to std_return with error = 0 */ | 1581 | /* Fallthrough to std_return with error = 0 */ |
1558 | 1582 | std_return: | |
1559 | std_return: | 1583 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { |
1560 | if ((*ipp || (error != 0 && dm_event_sent != 0)) && | 1584 | XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dp, DM_RIGHT_NULL, |
1561 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { | 1585 | ip, DM_RIGHT_NULL, name->name, NULL, mode, |
1562 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, | 1586 | error, 0); |
1563 | dp, DM_RIGHT_NULL, | ||
1564 | *ipp ? ip : NULL, | ||
1565 | DM_RIGHT_NULL, name->name, NULL, | ||
1566 | mode, error, 0); | ||
1567 | } | 1587 | } |
1588 | |||
1568 | return error; | 1589 | return error; |
1569 | 1590 | ||
1570 | abort_return: | 1591 | out_bmap_cancel: |
1592 | xfs_bmap_cancel(&free_list); | ||
1593 | out_trans_abort: | ||
1571 | cancel_flags |= XFS_TRANS_ABORT; | 1594 | cancel_flags |= XFS_TRANS_ABORT; |
1572 | /* FALLTHROUGH */ | 1595 | out_trans_cancel: |
1573 | 1596 | xfs_trans_cancel(tp, cancel_flags); | |
1574 | error_return: | 1597 | out_dqrele: |
1575 | if (tp != NULL) | ||
1576 | xfs_trans_cancel(tp, cancel_flags); | ||
1577 | |||
1578 | XFS_QM_DQRELE(mp, udqp); | 1598 | XFS_QM_DQRELE(mp, udqp); |
1579 | XFS_QM_DQRELE(mp, gdqp); | 1599 | XFS_QM_DQRELE(mp, gdqp); |
1580 | 1600 | ||
@@ -1583,20 +1603,18 @@ std_return: | |||
1583 | 1603 | ||
1584 | goto std_return; | 1604 | goto std_return; |
1585 | 1605 | ||
1586 | abort_rele: | 1606 | out_abort_rele: |
1587 | /* | 1607 | /* |
1588 | * Wait until after the current transaction is aborted to | 1608 | * Wait until after the current transaction is aborted to |
1589 | * release the inode. This prevents recursive transactions | 1609 | * release the inode. This prevents recursive transactions |
1590 | * and deadlocks from xfs_inactive. | 1610 | * and deadlocks from xfs_inactive. |
1591 | */ | 1611 | */ |
1612 | xfs_bmap_cancel(&free_list); | ||
1592 | cancel_flags |= XFS_TRANS_ABORT; | 1613 | cancel_flags |= XFS_TRANS_ABORT; |
1593 | xfs_trans_cancel(tp, cancel_flags); | 1614 | xfs_trans_cancel(tp, cancel_flags); |
1594 | IRELE(ip); | 1615 | IRELE(ip); |
1595 | 1616 | unlock_dp_on_error = B_FALSE; | |
1596 | XFS_QM_DQRELE(mp, udqp); | 1617 | goto out_dqrele; |
1597 | XFS_QM_DQRELE(mp, gdqp); | ||
1598 | |||
1599 | goto std_return; | ||
1600 | } | 1618 | } |
1601 | 1619 | ||
1602 | #ifdef DEBUG | 1620 | #ifdef DEBUG |
@@ -2004,8 +2022,10 @@ xfs_link( | |||
2004 | /* Return through std_return after this point. */ | 2022 | /* Return through std_return after this point. */ |
2005 | 2023 | ||
2006 | error = XFS_QM_DQATTACH(mp, sip, 0); | 2024 | error = XFS_QM_DQATTACH(mp, sip, 0); |
2007 | if (!error && sip != tdp) | 2025 | if (error) |
2008 | error = XFS_QM_DQATTACH(mp, tdp, 0); | 2026 | goto std_return; |
2027 | |||
2028 | error = XFS_QM_DQATTACH(mp, tdp, 0); | ||
2009 | if (error) | 2029 | if (error) |
2010 | goto std_return; | 2030 | goto std_return; |
2011 | 2031 | ||
@@ -2110,209 +2130,6 @@ std_return: | |||
2110 | goto std_return; | 2130 | goto std_return; |
2111 | } | 2131 | } |
2112 | 2132 | ||
2113 | |||
2114 | int | ||
2115 | xfs_mkdir( | ||
2116 | xfs_inode_t *dp, | ||
2117 | struct xfs_name *dir_name, | ||
2118 | mode_t mode, | ||
2119 | xfs_inode_t **ipp, | ||
2120 | cred_t *credp) | ||
2121 | { | ||
2122 | xfs_mount_t *mp = dp->i_mount; | ||
2123 | xfs_inode_t *cdp; /* inode of created dir */ | ||
2124 | xfs_trans_t *tp; | ||
2125 | int cancel_flags; | ||
2126 | int error; | ||
2127 | int committed; | ||
2128 | xfs_bmap_free_t free_list; | ||
2129 | xfs_fsblock_t first_block; | ||
2130 | boolean_t unlock_dp_on_error = B_FALSE; | ||
2131 | boolean_t created = B_FALSE; | ||
2132 | int dm_event_sent = 0; | ||
2133 | xfs_prid_t prid; | ||
2134 | struct xfs_dquot *udqp, *gdqp; | ||
2135 | uint resblks; | ||
2136 | |||
2137 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
2138 | return XFS_ERROR(EIO); | ||
2139 | |||
2140 | tp = NULL; | ||
2141 | |||
2142 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { | ||
2143 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, | ||
2144 | dp, DM_RIGHT_NULL, NULL, | ||
2145 | DM_RIGHT_NULL, dir_name->name, NULL, | ||
2146 | mode, 0, 0); | ||
2147 | if (error) | ||
2148 | return error; | ||
2149 | dm_event_sent = 1; | ||
2150 | } | ||
2151 | |||
2152 | /* Return through std_return after this point. */ | ||
2153 | |||
2154 | xfs_itrace_entry(dp); | ||
2155 | |||
2156 | mp = dp->i_mount; | ||
2157 | udqp = gdqp = NULL; | ||
2158 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | ||
2159 | prid = dp->i_d.di_projid; | ||
2160 | else | ||
2161 | prid = (xfs_prid_t)dfltprid; | ||
2162 | |||
2163 | /* | ||
2164 | * Make sure that we have allocated dquot(s) on disk. | ||
2165 | */ | ||
2166 | error = XFS_QM_DQVOPALLOC(mp, dp, | ||
2167 | current_fsuid(), current_fsgid(), prid, | ||
2168 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); | ||
2169 | if (error) | ||
2170 | goto std_return; | ||
2171 | |||
2172 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); | ||
2173 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
2174 | resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len); | ||
2175 | error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, | ||
2176 | XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); | ||
2177 | if (error == ENOSPC) { | ||
2178 | resblks = 0; | ||
2179 | error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0, | ||
2180 | XFS_TRANS_PERM_LOG_RES, | ||
2181 | XFS_MKDIR_LOG_COUNT); | ||
2182 | } | ||
2183 | if (error) { | ||
2184 | cancel_flags = 0; | ||
2185 | goto error_return; | ||
2186 | } | ||
2187 | |||
2188 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); | ||
2189 | unlock_dp_on_error = B_TRUE; | ||
2190 | |||
2191 | /* | ||
2192 | * Check for directory link count overflow. | ||
2193 | */ | ||
2194 | if (dp->i_d.di_nlink >= XFS_MAXLINK) { | ||
2195 | error = XFS_ERROR(EMLINK); | ||
2196 | goto error_return; | ||
2197 | } | ||
2198 | |||
2199 | /* | ||
2200 | * Reserve disk quota and the inode. | ||
2201 | */ | ||
2202 | error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); | ||
2203 | if (error) | ||
2204 | goto error_return; | ||
2205 | |||
2206 | error = xfs_dir_canenter(tp, dp, dir_name, resblks); | ||
2207 | if (error) | ||
2208 | goto error_return; | ||
2209 | /* | ||
2210 | * create the directory inode. | ||
2211 | */ | ||
2212 | error = xfs_dir_ialloc(&tp, dp, mode, 2, | ||
2213 | 0, credp, prid, resblks > 0, | ||
2214 | &cdp, NULL); | ||
2215 | if (error) { | ||
2216 | if (error == ENOSPC) | ||
2217 | goto error_return; | ||
2218 | goto abort_return; | ||
2219 | } | ||
2220 | xfs_itrace_ref(cdp); | ||
2221 | |||
2222 | /* | ||
2223 | * Now we add the directory inode to the transaction. | ||
2224 | * We waited until now since xfs_dir_ialloc might start | ||
2225 | * a new transaction. Had we joined the transaction | ||
2226 | * earlier, the locks might have gotten released. An error | ||
2227 | * from here on will result in the transaction cancel | ||
2228 | * unlocking dp so don't do it explicitly in the error path. | ||
2229 | */ | ||
2230 | IHOLD(dp); | ||
2231 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | ||
2232 | unlock_dp_on_error = B_FALSE; | ||
2233 | |||
2234 | xfs_bmap_init(&free_list, &first_block); | ||
2235 | |||
2236 | error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino, | ||
2237 | &first_block, &free_list, resblks ? | ||
2238 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); | ||
2239 | if (error) { | ||
2240 | ASSERT(error != ENOSPC); | ||
2241 | goto error1; | ||
2242 | } | ||
2243 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
2244 | |||
2245 | error = xfs_dir_init(tp, cdp, dp); | ||
2246 | if (error) | ||
2247 | goto error2; | ||
2248 | |||
2249 | error = xfs_bumplink(tp, dp); | ||
2250 | if (error) | ||
2251 | goto error2; | ||
2252 | |||
2253 | created = B_TRUE; | ||
2254 | |||
2255 | *ipp = cdp; | ||
2256 | IHOLD(cdp); | ||
2257 | |||
2258 | /* | ||
2259 | * Attach the dquots to the new inode and modify the icount incore. | ||
2260 | */ | ||
2261 | XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp); | ||
2262 | |||
2263 | /* | ||
2264 | * If this is a synchronous mount, make sure that the | ||
2265 | * mkdir transaction goes to disk before returning to | ||
2266 | * the user. | ||
2267 | */ | ||
2268 | if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { | ||
2269 | xfs_trans_set_sync(tp); | ||
2270 | } | ||
2271 | |||
2272 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
2273 | if (error) { | ||
2274 | IRELE(cdp); | ||
2275 | goto error2; | ||
2276 | } | ||
2277 | |||
2278 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
2279 | XFS_QM_DQRELE(mp, udqp); | ||
2280 | XFS_QM_DQRELE(mp, gdqp); | ||
2281 | if (error) { | ||
2282 | IRELE(cdp); | ||
2283 | } | ||
2284 | |||
2285 | /* Fall through to std_return with error = 0 or errno from | ||
2286 | * xfs_trans_commit. */ | ||
2287 | |||
2288 | std_return: | ||
2289 | if ((created || (error != 0 && dm_event_sent != 0)) && | ||
2290 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { | ||
2291 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, | ||
2292 | dp, DM_RIGHT_NULL, | ||
2293 | created ? cdp : NULL, | ||
2294 | DM_RIGHT_NULL, | ||
2295 | dir_name->name, NULL, | ||
2296 | mode, error, 0); | ||
2297 | } | ||
2298 | return error; | ||
2299 | |||
2300 | error2: | ||
2301 | error1: | ||
2302 | xfs_bmap_cancel(&free_list); | ||
2303 | abort_return: | ||
2304 | cancel_flags |= XFS_TRANS_ABORT; | ||
2305 | error_return: | ||
2306 | xfs_trans_cancel(tp, cancel_flags); | ||
2307 | XFS_QM_DQRELE(mp, udqp); | ||
2308 | XFS_QM_DQRELE(mp, gdqp); | ||
2309 | |||
2310 | if (unlock_dp_on_error) | ||
2311 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
2312 | |||
2313 | goto std_return; | ||
2314 | } | ||
2315 | |||
2316 | int | 2133 | int |
2317 | xfs_symlink( | 2134 | xfs_symlink( |
2318 | xfs_inode_t *dp, | 2135 | xfs_inode_t *dp, |
@@ -2587,51 +2404,6 @@ std_return: | |||
2587 | } | 2404 | } |
2588 | 2405 | ||
2589 | int | 2406 | int |
2590 | xfs_inode_flush( | ||
2591 | xfs_inode_t *ip, | ||
2592 | int flags) | ||
2593 | { | ||
2594 | xfs_mount_t *mp = ip->i_mount; | ||
2595 | int error = 0; | ||
2596 | |||
2597 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
2598 | return XFS_ERROR(EIO); | ||
2599 | |||
2600 | /* | ||
2601 | * Bypass inodes which have already been cleaned by | ||
2602 | * the inode flush clustering code inside xfs_iflush | ||
2603 | */ | ||
2604 | if (xfs_inode_clean(ip)) | ||
2605 | return 0; | ||
2606 | |||
2607 | /* | ||
2608 | * We make this non-blocking if the inode is contended, | ||
2609 | * return EAGAIN to indicate to the caller that they | ||
2610 | * did not succeed. This prevents the flush path from | ||
2611 | * blocking on inodes inside another operation right | ||
2612 | * now, they get caught later by xfs_sync. | ||
2613 | */ | ||
2614 | if (flags & FLUSH_SYNC) { | ||
2615 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
2616 | xfs_iflock(ip); | ||
2617 | } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | ||
2618 | if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { | ||
2619 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
2620 | return EAGAIN; | ||
2621 | } | ||
2622 | } else { | ||
2623 | return EAGAIN; | ||
2624 | } | ||
2625 | |||
2626 | error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC | ||
2627 | : XFS_IFLUSH_ASYNC_NOBLOCK); | ||
2628 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
2629 | |||
2630 | return error; | ||
2631 | } | ||
2632 | |||
2633 | |||
2634 | int | ||
2635 | xfs_set_dmattrs( | 2407 | xfs_set_dmattrs( |
2636 | xfs_inode_t *ip, | 2408 | xfs_inode_t *ip, |
2637 | u_int evmask, | 2409 | u_int evmask, |
@@ -2676,7 +2448,7 @@ xfs_reclaim( | |||
2676 | ASSERT(!VN_MAPPED(VFS_I(ip))); | 2448 | ASSERT(!VN_MAPPED(VFS_I(ip))); |
2677 | 2449 | ||
2678 | /* bad inode, get out here ASAP */ | 2450 | /* bad inode, get out here ASAP */ |
2679 | if (VN_BAD(VFS_I(ip))) { | 2451 | if (is_bad_inode(VFS_I(ip))) { |
2680 | xfs_ireclaim(ip); | 2452 | xfs_ireclaim(ip); |
2681 | return 0; | 2453 | return 0; |
2682 | } | 2454 | } |
@@ -3090,7 +2862,7 @@ xfs_free_file_space( | |||
3090 | 2862 | ||
3091 | /* | 2863 | /* |
3092 | * Need to zero the stuff we're not freeing, on disk. | 2864 | * Need to zero the stuff we're not freeing, on disk. |
3093 | * If its a realtime file & can't use unwritten extents then we | 2865 | * If it's a realtime file & can't use unwritten extents then we |
3094 | * actually need to zero the extent edges. Otherwise xfs_bunmapi | 2866 | * actually need to zero the extent edges. Otherwise xfs_bunmapi |
3095 | * will take care of it for us. | 2867 | * will take care of it for us. |
3096 | */ | 2868 | */ |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 76df328c61b4..04373c6c61ff 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -31,14 +31,11 @@ int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, | |||
31 | struct xfs_inode *ip); | 31 | struct xfs_inode *ip); |
32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | 32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
33 | struct xfs_name *target_name); | 33 | struct xfs_name *target_name); |
34 | int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, | ||
35 | mode_t mode, struct xfs_inode **ipp, cred_t *credp); | ||
36 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, | 34 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, |
37 | xfs_off_t *offset, filldir_t filldir); | 35 | xfs_off_t *offset, filldir_t filldir); |
38 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, | 36 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
39 | const char *target_path, mode_t mode, struct xfs_inode **ipp, | 37 | const char *target_path, mode_t mode, struct xfs_inode **ipp, |
40 | cred_t *credp); | 38 | cred_t *credp); |
41 | int xfs_inode_flush(struct xfs_inode *ip, int flags); | ||
42 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); | 39 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); |
43 | int xfs_reclaim(struct xfs_inode *ip); | 40 | int xfs_reclaim(struct xfs_inode *ip); |
44 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, | 41 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, |