diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-03 12:53:22 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-03 12:53:22 -0400 |
commit | 9b59f0316bc556a1b63518f0b1224cf9be48467b (patch) | |
tree | d6ffccbe5d9ce5f55c1b2efaf02220c701d4420a | |
parent | ac7c1a776dfe1a9c83ea7885f858f5f1a144d8af (diff) | |
parent | 0d8fe329a80714e0f729ae48cba8d64cbe5701cb (diff) |
Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
* 'for-linus' of git://git.open-osd.org/linux-open-osd:
fs: Add exofs to Kernel build
exofs: Documentation
exofs: export_operations
exofs: super_operations and file_system_type
exofs: dir_inode and directory operations
exofs: address_space_operations
exofs: symlink_inode and fast_symlink_inode operations
exofs: file and file_inode operations
exofs: Kbuild, Headers and osd utils
-rw-r--r-- | Documentation/filesystems/exofs.txt | 176 | ||||
-rw-r--r-- | fs/Kconfig | 2 | ||||
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/exofs/BUGS | 3 | ||||
-rw-r--r-- | fs/exofs/Kbuild | 16 | ||||
-rw-r--r-- | fs/exofs/Kconfig | 13 | ||||
-rw-r--r-- | fs/exofs/common.h | 184 | ||||
-rw-r--r-- | fs/exofs/dir.c | 672 | ||||
-rw-r--r-- | fs/exofs/exofs.h | 180 | ||||
-rw-r--r-- | fs/exofs/file.c | 87 | ||||
-rw-r--r-- | fs/exofs/inode.c | 1303 | ||||
-rw-r--r-- | fs/exofs/namei.c | 342 | ||||
-rw-r--r-- | fs/exofs/osd.c | 153 | ||||
-rw-r--r-- | fs/exofs/super.c | 584 | ||||
-rw-r--r-- | fs/exofs/symlink.c | 57 |
15 files changed, 3773 insertions, 0 deletions
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt new file mode 100644 index 000000000000..0ced74c2f73c --- /dev/null +++ b/Documentation/filesystems/exofs.txt | |||
@@ -0,0 +1,176 @@ | |||
1 | =============================================================================== | ||
2 | WHAT IS EXOFS? | ||
3 | =============================================================================== | ||
4 | |||
5 | exofs is a file system that uses an OSD and exports the API of a normal Linux | ||
6 | file system. Users access exofs like any other local file system, and exofs | ||
7 | will in turn issue commands to the local OSD initiator. | ||
8 | |||
9 | OSD is a new T10 command set that views storage devices not as a large/flat | ||
10 | array of sectors but as a container of objects, each having a length, quota, | ||
11 | time attributes and more. Each object is addressed by a 64bit ID, and is | ||
12 | contained in a 64bit ID partition. Each object has associated attributes | ||
13 | attached to it, which are integral part of the object and provide metadata about | ||
14 | the object. The standard defines some common obligatory attributes, but user | ||
15 | attributes can be added as needed. | ||
16 | |||
17 | =============================================================================== | ||
18 | ENVIRONMENT | ||
19 | =============================================================================== | ||
20 | |||
21 | To use this file system, you need to have an object store to run it on. You | ||
22 | may download a target from: | ||
23 | http://open-osd.org | ||
24 | |||
25 | See Documentation/scsi/osd.txt for how to setup a working osd environment. | ||
26 | |||
27 | =============================================================================== | ||
28 | USAGE | ||
29 | =============================================================================== | ||
30 | |||
31 | 1. Download and compile exofs and open-osd initiator: | ||
32 | You need an external Kernel source tree or kernel headers from your | ||
33 | distribution. (anything based on 2.6.26 or later). | ||
34 | |||
35 | a. download open-osd including exofs source using: | ||
36 | [parent-directory]$ git clone git://git.open-osd.org/open-osd.git | ||
37 | |||
38 | b. Build the library module like this: | ||
39 | [parent-directory]$ make -C KSRC=$(KER_DIR) open-osd | ||
40 | |||
41 | This will build both the open-osd initiator as well as the exofs kernel | ||
42 | module. Use whatever parameters you compiled your Kernel with and | ||
43 | $(KER_DIR) above pointing to the Kernel you compile against. See the file | ||
44 | open-osd/top-level-Makefile for an example. | ||
45 | |||
46 | 2. Get the OSD initiator and target set up properly, and login to the target. | ||
47 | See Documentation/scsi/osd.txt for farther instructions. Also see ./do-osd | ||
48 | for example script that does all these steps. | ||
49 | |||
50 | 3. Insmod the exofs.ko module: | ||
51 | [exofs]$ insmod exofs.ko | ||
52 | |||
53 | 4. Make sure the directory where you want to mount exists. If not, create it. | ||
54 | (For example, mkdir /mnt/exofs) | ||
55 | |||
56 | 5. At first run you will need to invoke the mkfs.exofs application | ||
57 | |||
58 | As an example, this will create the file system on: | ||
59 | /dev/osd0 partition ID 65536 | ||
60 | |||
61 | mkfs.exofs --pid=65536 --format /dev/osd0 | ||
62 | |||
63 | The --format is optional if not specified no OSD_FORMAT will be | ||
64 | preformed and a clean file system will be created in the specified pid, | ||
65 | in the available space of the target. (Use --format=size_in_meg to limit | ||
66 | the total LUN space available) | ||
67 | |||
68 | If pid already exist it will be deleted and a new one will be created in it's | ||
69 | place. Be careful. | ||
70 | |||
71 | An exofs lives inside a single OSD partition. You can create multiple exofs | ||
72 | filesystems on the same device using multiple pids. | ||
73 | |||
74 | (run mkfs.exofs without any parameters for usage help message) | ||
75 | |||
76 | 6. Mount the file system. | ||
77 | |||
78 | For example, to mount /dev/osd0, partition ID 0x10000 on /mnt/exofs: | ||
79 | |||
80 | mount -t exofs -o pid=65536 /dev/osd0 /mnt/exofs/ | ||
81 | |||
82 | 7. For reference (See do-exofs example script): | ||
83 | do-exofs start - an example of how to perform the above steps. | ||
84 | do-exofs stop - an example of how to unmount the file system. | ||
85 | do-exofs format - an example of how to format and mkfs a new exofs. | ||
86 | |||
87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): | ||
88 | CONFIG_EXOFS_DEBUG - for debug messages and extra checks. | ||
89 | |||
90 | =============================================================================== | ||
91 | exofs mount options | ||
92 | =============================================================================== | ||
93 | Similar to any mount command: | ||
94 | mount -t exofs -o exofs_options /dev/osdX mount_exofs_directory | ||
95 | |||
96 | Where: | ||
97 | -t exofs: specifies the exofs file system | ||
98 | |||
99 | /dev/osdX: X is a decimal number. /dev/osdX was created after a successful | ||
100 | login into an OSD target. | ||
101 | |||
102 | mount_exofs_directory: The directory to mount the file system on | ||
103 | |||
104 | exofs specific options: Options are separated by commas (,) | ||
105 | pid=<integer> - The partition number to mount/create as | ||
106 | container of the filesystem. | ||
107 | This option is mandatory | ||
108 | to=<integer> - Timeout in ticks for a single command | ||
109 | default is (60 * HZ) [for debugging only] | ||
110 | |||
111 | =============================================================================== | ||
112 | DESIGN | ||
113 | =============================================================================== | ||
114 | |||
115 | * The file system control block (AKA on-disk superblock) resides in an object | ||
116 | with a special ID (defined in common.h). | ||
117 | Information included in the file system control block is used to fill the | ||
118 | in-memory superblock structure at mount time. This object is created before | ||
119 | the file system is used by mkexofs.c It contains information such as: | ||
120 | - The file system's magic number | ||
121 | - The next inode number to be allocated | ||
122 | |||
123 | * Each file resides in its own object and contains the data (and it will be | ||
124 | possible to extend the file over multiple objects, though this has not been | ||
125 | implemented yet). | ||
126 | |||
127 | * A directory is treated as a file, and essentially contains a list of <file | ||
128 | name, inode #> pairs for files that are found in that directory. The object | ||
129 | IDs correspond to the files' inode numbers and will be allocated according to | ||
130 | a bitmap (stored in a separate object). Now they are allocated using a | ||
131 | counter. | ||
132 | |||
133 | * Each file's control block (AKA on-disk inode) is stored in its object's | ||
134 | attributes. This applies to both regular files and other types (directories, | ||
135 | device files, symlinks, etc.). | ||
136 | |||
137 | * Credentials are generated per object (inode and superblock) when they is | ||
138 | created in memory (read off disk or created). The credential works for all | ||
139 | operations and is used as long as the object remains in memory. | ||
140 | |||
141 | * Async OSD operations are used whenever possible, but the target may execute | ||
142 | them out of order. The operations that concern us are create, delete, | ||
143 | readpage, writepage, update_inode, and truncate. The following pairs of | ||
144 | operations should execute in the order written, and we need to prevent them | ||
145 | from executing in reverse order: | ||
146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED | ||
147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - | ||
148 | in create's callback function, and when we successfully do a read_inode. | ||
149 | OBJ_2BCREATED is set in the beginning of the create function, so we | ||
150 | know that we should wait. | ||
151 | - create/delete: delete should wait until the object is created | ||
152 | on the OSD. | ||
153 | - create/readpage: readpage should be able to return a page | ||
154 | full of zeroes in this case. If there was a write already | ||
155 | en-route (i.e. create, writepage, readpage) then the page | ||
156 | would be locked, and so it would really be the same as | ||
157 | create/writepage. | ||
158 | - create/writepage: if writepage is called for a sync write, it | ||
159 | should wait until the object is created on the OSD. | ||
160 | Otherwise, it should just return. | ||
161 | - create/truncate: truncate should wait until the object is | ||
162 | created on the OSD. | ||
163 | - create/update_inode: update_inode should wait until the | ||
164 | object is created on the OSD. | ||
165 | - Handled by VFS locks: | ||
166 | - readpage/delete: shouldn't happen because of page lock. | ||
167 | - writepage/delete: shouldn't happen because of page lock. | ||
168 | - readpage/writepage: shouldn't happen because of page lock. | ||
169 | |||
170 | =============================================================================== | ||
171 | LICENSE/COPYRIGHT | ||
172 | =============================================================================== | ||
173 | The exofs file system is based on ext2 v0.5b (distributed with the Linux kernel | ||
174 | version 2.6.10). All files include the original copyrights, and the license | ||
175 | is GPL version 2 (only version 2, as is true for the Linux kernel). The | ||
176 | Linux kernel can be downloaded from www.kernel.org. | ||
diff --git a/fs/Kconfig b/fs/Kconfig index cef8b18ceaa3..ae3b34a2ea69 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -169,6 +169,8 @@ source "fs/romfs/Kconfig" | |||
169 | source "fs/sysv/Kconfig" | 169 | source "fs/sysv/Kconfig" |
170 | source "fs/ufs/Kconfig" | 170 | source "fs/ufs/Kconfig" |
171 | 171 | ||
172 | source "fs/exofs/Kconfig" | ||
173 | |||
172 | endif # MISC_FILESYSTEMS | 174 | endif # MISC_FILESYSTEMS |
173 | 175 | ||
174 | menuconfig NETWORK_FILESYSTEMS | 176 | menuconfig NETWORK_FILESYSTEMS |
diff --git a/fs/Makefile b/fs/Makefile index b5cd8e18dd9f..15f73014a208 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -120,3 +120,4 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ | |||
120 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ | 120 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ |
121 | obj-$(CONFIG_BTRFS_FS) += btrfs/ | 121 | obj-$(CONFIG_BTRFS_FS) += btrfs/ |
122 | obj-$(CONFIG_GFS2_FS) += gfs2/ | 122 | obj-$(CONFIG_GFS2_FS) += gfs2/ |
123 | obj-$(CONFIG_EXOFS_FS) += exofs/ | ||
diff --git a/fs/exofs/BUGS b/fs/exofs/BUGS new file mode 100644 index 000000000000..1b2d4c63a579 --- /dev/null +++ b/fs/exofs/BUGS | |||
@@ -0,0 +1,3 @@ | |||
1 | - Out-of-space may cause a severe problem if the object (and directory entry) | ||
2 | were written, but the inode attributes failed. Then if the filesystem was | ||
3 | unmounted and mounted the kernel can get into an endless loop doing a readdir. | ||
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild new file mode 100644 index 000000000000..cc2d22db119c --- /dev/null +++ b/fs/exofs/Kbuild | |||
@@ -0,0 +1,16 @@ | |||
1 | # | ||
2 | # Kbuild for the EXOFS module | ||
3 | # | ||
4 | # Copyright (C) 2008 Panasas Inc. All rights reserved. | ||
5 | # | ||
6 | # Authors: | ||
7 | # Boaz Harrosh <bharrosh@panasas.com> | ||
8 | # | ||
9 | # This program is free software; you can redistribute it and/or modify | ||
10 | # it under the terms of the GNU General Public License version 2 | ||
11 | # | ||
12 | # Kbuild - Gets included from the Kernels Makefile and build system | ||
13 | # | ||
14 | |||
15 | exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o | ||
16 | obj-$(CONFIG_EXOFS_FS) += exofs.o | ||
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig new file mode 100644 index 000000000000..86194b2f799d --- /dev/null +++ b/fs/exofs/Kconfig | |||
@@ -0,0 +1,13 @@ | |||
1 | config EXOFS_FS | ||
2 | tristate "exofs: OSD based file system support" | ||
3 | depends on SCSI_OSD_ULD | ||
4 | help | ||
5 | EXOFS is a file system that uses an OSD storage device, | ||
6 | as its backing storage. | ||
7 | |||
8 | # Debugging-related stuff | ||
9 | config EXOFS_DEBUG | ||
10 | bool "Enable debugging" | ||
11 | depends on EXOFS_FS | ||
12 | help | ||
13 | This option enables EXOFS debug prints. | ||
diff --git a/fs/exofs/common.h b/fs/exofs/common.h new file mode 100644 index 000000000000..b1512c4bb8c7 --- /dev/null +++ b/fs/exofs/common.h | |||
@@ -0,0 +1,184 @@ | |||
1 | /* | ||
2 | * common.h - Common definitions for both Kernel and user-mode utilities | ||
3 | * | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
6 | * Copyright (C) 2005, 2006 | ||
7 | * International Business Machines | ||
8 | * Copyright (C) 2008, 2009 | ||
9 | * Boaz Harrosh <bharrosh@panasas.com> | ||
10 | * | ||
11 | * Copyrights for code taken from ext2: | ||
12 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
13 | * Remy Card (card@masi.ibp.fr) | ||
14 | * Laboratoire MASI - Institut Blaise Pascal | ||
15 | * Universite Pierre et Marie Curie (Paris VI) | ||
16 | * from | ||
17 | * linux/fs/minix/inode.c | ||
18 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
19 | * | ||
20 | * This file is part of exofs. | ||
21 | * | ||
22 | * exofs is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
25 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
26 | * version of GPL for exofs is version 2. | ||
27 | * | ||
28 | * exofs is distributed in the hope that it will be useful, | ||
29 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
30 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
31 | * GNU General Public License for more details. | ||
32 | * | ||
33 | * You should have received a copy of the GNU General Public License | ||
34 | * along with exofs; if not, write to the Free Software | ||
35 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
36 | */ | ||
37 | |||
38 | #ifndef __EXOFS_COM_H__ | ||
39 | #define __EXOFS_COM_H__ | ||
40 | |||
41 | #include <linux/types.h> | ||
42 | |||
43 | #include <scsi/osd_attributes.h> | ||
44 | #include <scsi/osd_initiator.h> | ||
45 | #include <scsi/osd_sec.h> | ||
46 | |||
47 | /**************************************************************************** | ||
48 | * Object ID related defines | ||
49 | * NOTE: inode# = object ID - EXOFS_OBJ_OFF | ||
50 | ****************************************************************************/ | ||
51 | #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ | ||
52 | #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ | ||
53 | #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ | ||
54 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ | ||
55 | |||
56 | /* exofs Application specific page/attribute */ | ||
57 | # define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) | ||
58 | # define EXOFS_ATTR_INODE_DATA 1 | ||
59 | |||
60 | /* | ||
61 | * The maximum number of files we can have is limited by the size of the | ||
62 | * inode number. This is the largest object ID that the file system supports. | ||
63 | * Object IDs 0, 1, and 2 are always in use (see above defines). | ||
64 | */ | ||
65 | enum { | ||
66 | EXOFS_MAX_INO_ID = (sizeof(ino_t) * 8 == 64) ? ULLONG_MAX : | ||
67 | (1ULL << (sizeof(ino_t) * 8ULL - 1ULL)), | ||
68 | EXOFS_MAX_ID = (EXOFS_MAX_INO_ID - 1 - EXOFS_OBJ_OFF), | ||
69 | }; | ||
70 | |||
71 | /**************************************************************************** | ||
72 | * Misc. | ||
73 | ****************************************************************************/ | ||
74 | #define EXOFS_BLKSHIFT 12 | ||
75 | #define EXOFS_BLKSIZE (1UL << EXOFS_BLKSHIFT) | ||
76 | |||
77 | /**************************************************************************** | ||
78 | * superblock-related things | ||
79 | ****************************************************************************/ | ||
80 | #define EXOFS_SUPER_MAGIC 0x5DF5 | ||
81 | |||
82 | /* | ||
83 | * The file system control block - stored in an object's data (mainly, the one | ||
84 | * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored | ||
85 | * on disk. Right now it just has a magic value, which is basically a sanity | ||
86 | * check on our ability to communicate with the object store. | ||
87 | */ | ||
88 | struct exofs_fscb { | ||
89 | __le64 s_nextid; /* Highest object ID used */ | ||
90 | __le32 s_numfiles; /* Number of files on fs */ | ||
91 | __le16 s_magic; /* Magic signature */ | ||
92 | __le16 s_newfs; /* Non-zero if this is a new fs */ | ||
93 | }; | ||
94 | |||
95 | /**************************************************************************** | ||
96 | * inode-related things | ||
97 | ****************************************************************************/ | ||
98 | #define EXOFS_IDATA 5 | ||
99 | |||
100 | /* | ||
101 | * The file control block - stored in an object's attributes. This is where | ||
102 | * the in-memory inode is stored on disk. | ||
103 | */ | ||
104 | struct exofs_fcb { | ||
105 | __le64 i_size; /* Size of the file */ | ||
106 | __le16 i_mode; /* File mode */ | ||
107 | __le16 i_links_count; /* Links count */ | ||
108 | __le32 i_uid; /* Owner Uid */ | ||
109 | __le32 i_gid; /* Group Id */ | ||
110 | __le32 i_atime; /* Access time */ | ||
111 | __le32 i_ctime; /* Creation time */ | ||
112 | __le32 i_mtime; /* Modification time */ | ||
113 | __le32 i_flags; /* File flags (unused for now)*/ | ||
114 | __le32 i_generation; /* File version (for NFS) */ | ||
115 | __le32 i_data[EXOFS_IDATA]; /* Short symlink names and device #s */ | ||
116 | }; | ||
117 | |||
118 | #define EXOFS_INO_ATTR_SIZE sizeof(struct exofs_fcb) | ||
119 | |||
120 | /* This is the Attribute the fcb is stored in */ | ||
121 | static const struct __weak osd_attr g_attr_inode_data = ATTR_DEF( | ||
122 | EXOFS_APAGE_FS_DATA, | ||
123 | EXOFS_ATTR_INODE_DATA, | ||
124 | EXOFS_INO_ATTR_SIZE); | ||
125 | |||
126 | /**************************************************************************** | ||
127 | * dentry-related things | ||
128 | ****************************************************************************/ | ||
129 | #define EXOFS_NAME_LEN 255 | ||
130 | |||
131 | /* | ||
132 | * The on-disk directory entry | ||
133 | */ | ||
134 | struct exofs_dir_entry { | ||
135 | __le64 inode_no; /* inode number */ | ||
136 | __le16 rec_len; /* directory entry length */ | ||
137 | u8 name_len; /* name length */ | ||
138 | u8 file_type; /* umm...file type */ | ||
139 | char name[EXOFS_NAME_LEN]; /* file name */ | ||
140 | }; | ||
141 | |||
142 | enum { | ||
143 | EXOFS_FT_UNKNOWN, | ||
144 | EXOFS_FT_REG_FILE, | ||
145 | EXOFS_FT_DIR, | ||
146 | EXOFS_FT_CHRDEV, | ||
147 | EXOFS_FT_BLKDEV, | ||
148 | EXOFS_FT_FIFO, | ||
149 | EXOFS_FT_SOCK, | ||
150 | EXOFS_FT_SYMLINK, | ||
151 | EXOFS_FT_MAX | ||
152 | }; | ||
153 | |||
154 | #define EXOFS_DIR_PAD 4 | ||
155 | #define EXOFS_DIR_ROUND (EXOFS_DIR_PAD - 1) | ||
156 | #define EXOFS_DIR_REC_LEN(name_len) \ | ||
157 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ | ||
158 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) | ||
159 | |||
160 | /************************* | ||
161 | * function declarations * | ||
162 | *************************/ | ||
163 | /* osd.c */ | ||
164 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
165 | const struct osd_obj_id *obj); | ||
166 | |||
167 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid); | ||
168 | static inline int exofs_check_ok(struct osd_request *or) | ||
169 | { | ||
170 | return exofs_check_ok_resid(or, NULL, NULL); | ||
171 | } | ||
172 | int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred); | ||
173 | int exofs_async_op(struct osd_request *or, | ||
174 | osd_req_done_fn *async_done, void *caller_context, u8 *cred); | ||
175 | |||
176 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); | ||
177 | |||
178 | int osd_req_read_kern(struct osd_request *or, | ||
179 | const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); | ||
180 | |||
181 | int osd_req_write_kern(struct osd_request *or, | ||
182 | const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); | ||
183 | |||
184 | #endif /*ifndef __EXOFS_COM_H__*/ | ||
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c new file mode 100644 index 000000000000..65b0c8c776a1 --- /dev/null +++ b/fs/exofs/dir.c | |||
@@ -0,0 +1,672 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include "exofs.h" | ||
37 | |||
38 | static inline unsigned exofs_chunk_size(struct inode *inode) | ||
39 | { | ||
40 | return inode->i_sb->s_blocksize; | ||
41 | } | ||
42 | |||
43 | static inline void exofs_put_page(struct page *page) | ||
44 | { | ||
45 | kunmap(page); | ||
46 | page_cache_release(page); | ||
47 | } | ||
48 | |||
49 | /* Accesses dir's inode->i_size must be called under inode lock */ | ||
50 | static inline unsigned long dir_pages(struct inode *inode) | ||
51 | { | ||
52 | return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
53 | } | ||
54 | |||
55 | static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr) | ||
56 | { | ||
57 | loff_t last_byte = inode->i_size; | ||
58 | |||
59 | last_byte -= page_nr << PAGE_CACHE_SHIFT; | ||
60 | if (last_byte > PAGE_CACHE_SIZE) | ||
61 | last_byte = PAGE_CACHE_SIZE; | ||
62 | return last_byte; | ||
63 | } | ||
64 | |||
65 | static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len) | ||
66 | { | ||
67 | struct address_space *mapping = page->mapping; | ||
68 | struct inode *dir = mapping->host; | ||
69 | int err = 0; | ||
70 | |||
71 | dir->i_version++; | ||
72 | |||
73 | if (!PageUptodate(page)) | ||
74 | SetPageUptodate(page); | ||
75 | |||
76 | if (pos+len > dir->i_size) { | ||
77 | i_size_write(dir, pos+len); | ||
78 | mark_inode_dirty(dir); | ||
79 | } | ||
80 | set_page_dirty(page); | ||
81 | |||
82 | if (IS_DIRSYNC(dir)) | ||
83 | err = write_one_page(page, 1); | ||
84 | else | ||
85 | unlock_page(page); | ||
86 | |||
87 | return err; | ||
88 | } | ||
89 | |||
90 | static void exofs_check_page(struct page *page) | ||
91 | { | ||
92 | struct inode *dir = page->mapping->host; | ||
93 | unsigned chunk_size = exofs_chunk_size(dir); | ||
94 | char *kaddr = page_address(page); | ||
95 | unsigned offs, rec_len; | ||
96 | unsigned limit = PAGE_CACHE_SIZE; | ||
97 | struct exofs_dir_entry *p; | ||
98 | char *error; | ||
99 | |||
100 | /* if the page is the last one in the directory */ | ||
101 | if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { | ||
102 | limit = dir->i_size & ~PAGE_CACHE_MASK; | ||
103 | if (limit & (chunk_size - 1)) | ||
104 | goto Ebadsize; | ||
105 | if (!limit) | ||
106 | goto out; | ||
107 | } | ||
108 | for (offs = 0; offs <= limit - EXOFS_DIR_REC_LEN(1); offs += rec_len) { | ||
109 | p = (struct exofs_dir_entry *)(kaddr + offs); | ||
110 | rec_len = le16_to_cpu(p->rec_len); | ||
111 | |||
112 | if (rec_len < EXOFS_DIR_REC_LEN(1)) | ||
113 | goto Eshort; | ||
114 | if (rec_len & 3) | ||
115 | goto Ealign; | ||
116 | if (rec_len < EXOFS_DIR_REC_LEN(p->name_len)) | ||
117 | goto Enamelen; | ||
118 | if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) | ||
119 | goto Espan; | ||
120 | } | ||
121 | if (offs != limit) | ||
122 | goto Eend; | ||
123 | out: | ||
124 | SetPageChecked(page); | ||
125 | return; | ||
126 | |||
127 | Ebadsize: | ||
128 | EXOFS_ERR("ERROR [exofs_check_page]: " | ||
129 | "size of directory #%lu is not a multiple of chunk size", | ||
130 | dir->i_ino | ||
131 | ); | ||
132 | goto fail; | ||
133 | Eshort: | ||
134 | error = "rec_len is smaller than minimal"; | ||
135 | goto bad_entry; | ||
136 | Ealign: | ||
137 | error = "unaligned directory entry"; | ||
138 | goto bad_entry; | ||
139 | Enamelen: | ||
140 | error = "rec_len is too small for name_len"; | ||
141 | goto bad_entry; | ||
142 | Espan: | ||
143 | error = "directory entry across blocks"; | ||
144 | goto bad_entry; | ||
145 | bad_entry: | ||
146 | EXOFS_ERR( | ||
147 | "ERROR [exofs_check_page]: bad entry in directory #%lu: %s - " | ||
148 | "offset=%lu, inode=%llu, rec_len=%d, name_len=%d", | ||
149 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
150 | _LLU(le64_to_cpu(p->inode_no)), | ||
151 | rec_len, p->name_len); | ||
152 | goto fail; | ||
153 | Eend: | ||
154 | p = (struct exofs_dir_entry *)(kaddr + offs); | ||
155 | EXOFS_ERR("ERROR [exofs_check_page]: " | ||
156 | "entry in directory #%lu spans the page boundary" | ||
157 | "offset=%lu, inode=%llu", | ||
158 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
159 | _LLU(le64_to_cpu(p->inode_no))); | ||
160 | fail: | ||
161 | SetPageChecked(page); | ||
162 | SetPageError(page); | ||
163 | } | ||
164 | |||
165 | static struct page *exofs_get_page(struct inode *dir, unsigned long n) | ||
166 | { | ||
167 | struct address_space *mapping = dir->i_mapping; | ||
168 | struct page *page = read_mapping_page(mapping, n, NULL); | ||
169 | |||
170 | if (!IS_ERR(page)) { | ||
171 | kmap(page); | ||
172 | if (!PageChecked(page)) | ||
173 | exofs_check_page(page); | ||
174 | if (PageError(page)) | ||
175 | goto fail; | ||
176 | } | ||
177 | return page; | ||
178 | |||
179 | fail: | ||
180 | exofs_put_page(page); | ||
181 | return ERR_PTR(-EIO); | ||
182 | } | ||
183 | |||
184 | static inline int exofs_match(int len, const unsigned char *name, | ||
185 | struct exofs_dir_entry *de) | ||
186 | { | ||
187 | if (len != de->name_len) | ||
188 | return 0; | ||
189 | if (!de->inode_no) | ||
190 | return 0; | ||
191 | return !memcmp(name, de->name, len); | ||
192 | } | ||
193 | |||
194 | static inline | ||
195 | struct exofs_dir_entry *exofs_next_entry(struct exofs_dir_entry *p) | ||
196 | { | ||
197 | return (struct exofs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); | ||
198 | } | ||
199 | |||
200 | static inline unsigned | ||
201 | exofs_validate_entry(char *base, unsigned offset, unsigned mask) | ||
202 | { | ||
203 | struct exofs_dir_entry *de = (struct exofs_dir_entry *)(base + offset); | ||
204 | struct exofs_dir_entry *p = | ||
205 | (struct exofs_dir_entry *)(base + (offset&mask)); | ||
206 | while ((char *)p < (char *)de) { | ||
207 | if (p->rec_len == 0) | ||
208 | break; | ||
209 | p = exofs_next_entry(p); | ||
210 | } | ||
211 | return (char *)p - base; | ||
212 | } | ||
213 | |||
214 | static unsigned char exofs_filetype_table[EXOFS_FT_MAX] = { | ||
215 | [EXOFS_FT_UNKNOWN] = DT_UNKNOWN, | ||
216 | [EXOFS_FT_REG_FILE] = DT_REG, | ||
217 | [EXOFS_FT_DIR] = DT_DIR, | ||
218 | [EXOFS_FT_CHRDEV] = DT_CHR, | ||
219 | [EXOFS_FT_BLKDEV] = DT_BLK, | ||
220 | [EXOFS_FT_FIFO] = DT_FIFO, | ||
221 | [EXOFS_FT_SOCK] = DT_SOCK, | ||
222 | [EXOFS_FT_SYMLINK] = DT_LNK, | ||
223 | }; | ||
224 | |||
225 | #define S_SHIFT 12 | ||
226 | static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = { | ||
227 | [S_IFREG >> S_SHIFT] = EXOFS_FT_REG_FILE, | ||
228 | [S_IFDIR >> S_SHIFT] = EXOFS_FT_DIR, | ||
229 | [S_IFCHR >> S_SHIFT] = EXOFS_FT_CHRDEV, | ||
230 | [S_IFBLK >> S_SHIFT] = EXOFS_FT_BLKDEV, | ||
231 | [S_IFIFO >> S_SHIFT] = EXOFS_FT_FIFO, | ||
232 | [S_IFSOCK >> S_SHIFT] = EXOFS_FT_SOCK, | ||
233 | [S_IFLNK >> S_SHIFT] = EXOFS_FT_SYMLINK, | ||
234 | }; | ||
235 | |||
236 | static inline | ||
237 | void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode) | ||
238 | { | ||
239 | mode_t mode = inode->i_mode; | ||
240 | de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; | ||
241 | } | ||
242 | |||
243 | static int | ||
244 | exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
245 | { | ||
246 | loff_t pos = filp->f_pos; | ||
247 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
248 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | ||
249 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
250 | unsigned long npages = dir_pages(inode); | ||
251 | unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); | ||
252 | unsigned char *types = NULL; | ||
253 | int need_revalidate = (filp->f_version != inode->i_version); | ||
254 | |||
255 | if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) | ||
256 | return 0; | ||
257 | |||
258 | types = exofs_filetype_table; | ||
259 | |||
260 | for ( ; n < npages; n++, offset = 0) { | ||
261 | char *kaddr, *limit; | ||
262 | struct exofs_dir_entry *de; | ||
263 | struct page *page = exofs_get_page(inode, n); | ||
264 | |||
265 | if (IS_ERR(page)) { | ||
266 | EXOFS_ERR("ERROR: " | ||
267 | "bad page in #%lu", | ||
268 | inode->i_ino); | ||
269 | filp->f_pos += PAGE_CACHE_SIZE - offset; | ||
270 | return PTR_ERR(page); | ||
271 | } | ||
272 | kaddr = page_address(page); | ||
273 | if (unlikely(need_revalidate)) { | ||
274 | if (offset) { | ||
275 | offset = exofs_validate_entry(kaddr, offset, | ||
276 | chunk_mask); | ||
277 | filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; | ||
278 | } | ||
279 | filp->f_version = inode->i_version; | ||
280 | need_revalidate = 0; | ||
281 | } | ||
282 | de = (struct exofs_dir_entry *)(kaddr + offset); | ||
283 | limit = kaddr + exofs_last_byte(inode, n) - | ||
284 | EXOFS_DIR_REC_LEN(1); | ||
285 | for (; (char *)de <= limit; de = exofs_next_entry(de)) { | ||
286 | if (de->rec_len == 0) { | ||
287 | EXOFS_ERR("ERROR: " | ||
288 | "zero-length directory entry"); | ||
289 | exofs_put_page(page); | ||
290 | return -EIO; | ||
291 | } | ||
292 | if (de->inode_no) { | ||
293 | int over; | ||
294 | unsigned char d_type = DT_UNKNOWN; | ||
295 | |||
296 | if (types && de->file_type < EXOFS_FT_MAX) | ||
297 | d_type = types[de->file_type]; | ||
298 | |||
299 | offset = (char *)de - kaddr; | ||
300 | over = filldir(dirent, de->name, de->name_len, | ||
301 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
302 | le64_to_cpu(de->inode_no), | ||
303 | d_type); | ||
304 | if (over) { | ||
305 | exofs_put_page(page); | ||
306 | return 0; | ||
307 | } | ||
308 | } | ||
309 | filp->f_pos += le16_to_cpu(de->rec_len); | ||
310 | } | ||
311 | exofs_put_page(page); | ||
312 | } | ||
313 | |||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | struct exofs_dir_entry *exofs_find_entry(struct inode *dir, | ||
318 | struct dentry *dentry, struct page **res_page) | ||
319 | { | ||
320 | const unsigned char *name = dentry->d_name.name; | ||
321 | int namelen = dentry->d_name.len; | ||
322 | unsigned reclen = EXOFS_DIR_REC_LEN(namelen); | ||
323 | unsigned long start, n; | ||
324 | unsigned long npages = dir_pages(dir); | ||
325 | struct page *page = NULL; | ||
326 | struct exofs_i_info *oi = exofs_i(dir); | ||
327 | struct exofs_dir_entry *de; | ||
328 | |||
329 | if (npages == 0) | ||
330 | goto out; | ||
331 | |||
332 | *res_page = NULL; | ||
333 | |||
334 | start = oi->i_dir_start_lookup; | ||
335 | if (start >= npages) | ||
336 | start = 0; | ||
337 | n = start; | ||
338 | do { | ||
339 | char *kaddr; | ||
340 | page = exofs_get_page(dir, n); | ||
341 | if (!IS_ERR(page)) { | ||
342 | kaddr = page_address(page); | ||
343 | de = (struct exofs_dir_entry *) kaddr; | ||
344 | kaddr += exofs_last_byte(dir, n) - reclen; | ||
345 | while ((char *) de <= kaddr) { | ||
346 | if (de->rec_len == 0) { | ||
347 | EXOFS_ERR( | ||
348 | "ERROR: exofs_find_entry: " | ||
349 | "zero-length directory entry"); | ||
350 | exofs_put_page(page); | ||
351 | goto out; | ||
352 | } | ||
353 | if (exofs_match(namelen, name, de)) | ||
354 | goto found; | ||
355 | de = exofs_next_entry(de); | ||
356 | } | ||
357 | exofs_put_page(page); | ||
358 | } | ||
359 | if (++n >= npages) | ||
360 | n = 0; | ||
361 | } while (n != start); | ||
362 | out: | ||
363 | return NULL; | ||
364 | |||
365 | found: | ||
366 | *res_page = page; | ||
367 | oi->i_dir_start_lookup = n; | ||
368 | return de; | ||
369 | } | ||
370 | |||
371 | struct exofs_dir_entry *exofs_dotdot(struct inode *dir, struct page **p) | ||
372 | { | ||
373 | struct page *page = exofs_get_page(dir, 0); | ||
374 | struct exofs_dir_entry *de = NULL; | ||
375 | |||
376 | if (!IS_ERR(page)) { | ||
377 | de = exofs_next_entry( | ||
378 | (struct exofs_dir_entry *)page_address(page)); | ||
379 | *p = page; | ||
380 | } | ||
381 | return de; | ||
382 | } | ||
383 | |||
384 | ino_t exofs_parent_ino(struct dentry *child) | ||
385 | { | ||
386 | struct page *page; | ||
387 | struct exofs_dir_entry *de; | ||
388 | ino_t ino; | ||
389 | |||
390 | de = exofs_dotdot(child->d_inode, &page); | ||
391 | if (!de) | ||
392 | return 0; | ||
393 | |||
394 | ino = le64_to_cpu(de->inode_no); | ||
395 | exofs_put_page(page); | ||
396 | return ino; | ||
397 | } | ||
398 | |||
399 | ino_t exofs_inode_by_name(struct inode *dir, struct dentry *dentry) | ||
400 | { | ||
401 | ino_t res = 0; | ||
402 | struct exofs_dir_entry *de; | ||
403 | struct page *page; | ||
404 | |||
405 | de = exofs_find_entry(dir, dentry, &page); | ||
406 | if (de) { | ||
407 | res = le64_to_cpu(de->inode_no); | ||
408 | exofs_put_page(page); | ||
409 | } | ||
410 | return res; | ||
411 | } | ||
412 | |||
413 | int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de, | ||
414 | struct page *page, struct inode *inode) | ||
415 | { | ||
416 | loff_t pos = page_offset(page) + | ||
417 | (char *) de - (char *) page_address(page); | ||
418 | unsigned len = le16_to_cpu(de->rec_len); | ||
419 | int err; | ||
420 | |||
421 | lock_page(page); | ||
422 | err = exofs_write_begin(NULL, page->mapping, pos, len, | ||
423 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
424 | if (err) | ||
425 | EXOFS_ERR("exofs_set_link: exofs_write_begin FAILD => %d\n", | ||
426 | err); | ||
427 | |||
428 | de->inode_no = cpu_to_le64(inode->i_ino); | ||
429 | exofs_set_de_type(de, inode); | ||
430 | if (likely(!err)) | ||
431 | err = exofs_commit_chunk(page, pos, len); | ||
432 | exofs_put_page(page); | ||
433 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
434 | mark_inode_dirty(dir); | ||
435 | return err; | ||
436 | } | ||
437 | |||
438 | int exofs_add_link(struct dentry *dentry, struct inode *inode) | ||
439 | { | ||
440 | struct inode *dir = dentry->d_parent->d_inode; | ||
441 | const unsigned char *name = dentry->d_name.name; | ||
442 | int namelen = dentry->d_name.len; | ||
443 | unsigned chunk_size = exofs_chunk_size(dir); | ||
444 | unsigned reclen = EXOFS_DIR_REC_LEN(namelen); | ||
445 | unsigned short rec_len, name_len; | ||
446 | struct page *page = NULL; | ||
447 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
448 | struct exofs_dir_entry *de; | ||
449 | unsigned long npages = dir_pages(dir); | ||
450 | unsigned long n; | ||
451 | char *kaddr; | ||
452 | loff_t pos; | ||
453 | int err; | ||
454 | |||
455 | for (n = 0; n <= npages; n++) { | ||
456 | char *dir_end; | ||
457 | |||
458 | page = exofs_get_page(dir, n); | ||
459 | err = PTR_ERR(page); | ||
460 | if (IS_ERR(page)) | ||
461 | goto out; | ||
462 | lock_page(page); | ||
463 | kaddr = page_address(page); | ||
464 | dir_end = kaddr + exofs_last_byte(dir, n); | ||
465 | de = (struct exofs_dir_entry *)kaddr; | ||
466 | kaddr += PAGE_CACHE_SIZE - reclen; | ||
467 | while ((char *)de <= kaddr) { | ||
468 | if ((char *)de == dir_end) { | ||
469 | name_len = 0; | ||
470 | rec_len = chunk_size; | ||
471 | de->rec_len = cpu_to_le16(chunk_size); | ||
472 | de->inode_no = 0; | ||
473 | goto got_it; | ||
474 | } | ||
475 | if (de->rec_len == 0) { | ||
476 | EXOFS_ERR("ERROR: exofs_add_link: " | ||
477 | "zero-length directory entry"); | ||
478 | err = -EIO; | ||
479 | goto out_unlock; | ||
480 | } | ||
481 | err = -EEXIST; | ||
482 | if (exofs_match(namelen, name, de)) | ||
483 | goto out_unlock; | ||
484 | name_len = EXOFS_DIR_REC_LEN(de->name_len); | ||
485 | rec_len = le16_to_cpu(de->rec_len); | ||
486 | if (!de->inode_no && rec_len >= reclen) | ||
487 | goto got_it; | ||
488 | if (rec_len >= name_len + reclen) | ||
489 | goto got_it; | ||
490 | de = (struct exofs_dir_entry *) ((char *) de + rec_len); | ||
491 | } | ||
492 | unlock_page(page); | ||
493 | exofs_put_page(page); | ||
494 | } | ||
495 | |||
496 | EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode); | ||
497 | return -EINVAL; | ||
498 | |||
499 | got_it: | ||
500 | pos = page_offset(page) + | ||
501 | (char *)de - (char *)page_address(page); | ||
502 | err = exofs_write_begin(NULL, page->mapping, pos, rec_len, 0, | ||
503 | &page, NULL); | ||
504 | if (err) | ||
505 | goto out_unlock; | ||
506 | if (de->inode_no) { | ||
507 | struct exofs_dir_entry *de1 = | ||
508 | (struct exofs_dir_entry *)((char *)de + name_len); | ||
509 | de1->rec_len = cpu_to_le16(rec_len - name_len); | ||
510 | de->rec_len = cpu_to_le16(name_len); | ||
511 | de = de1; | ||
512 | } | ||
513 | de->name_len = namelen; | ||
514 | memcpy(de->name, name, namelen); | ||
515 | de->inode_no = cpu_to_le64(inode->i_ino); | ||
516 | exofs_set_de_type(de, inode); | ||
517 | err = exofs_commit_chunk(page, pos, rec_len); | ||
518 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
519 | mark_inode_dirty(dir); | ||
520 | sbi->s_numfiles++; | ||
521 | |||
522 | out_put: | ||
523 | exofs_put_page(page); | ||
524 | out: | ||
525 | return err; | ||
526 | out_unlock: | ||
527 | unlock_page(page); | ||
528 | goto out_put; | ||
529 | } | ||
530 | |||
531 | int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page) | ||
532 | { | ||
533 | struct address_space *mapping = page->mapping; | ||
534 | struct inode *inode = mapping->host; | ||
535 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
536 | char *kaddr = page_address(page); | ||
537 | unsigned from = ((char *)dir - kaddr) & ~(exofs_chunk_size(inode)-1); | ||
538 | unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); | ||
539 | loff_t pos; | ||
540 | struct exofs_dir_entry *pde = NULL; | ||
541 | struct exofs_dir_entry *de = (struct exofs_dir_entry *) (kaddr + from); | ||
542 | int err; | ||
543 | |||
544 | while (de < dir) { | ||
545 | if (de->rec_len == 0) { | ||
546 | EXOFS_ERR("ERROR: exofs_delete_entry:" | ||
547 | "zero-length directory entry"); | ||
548 | err = -EIO; | ||
549 | goto out; | ||
550 | } | ||
551 | pde = de; | ||
552 | de = exofs_next_entry(de); | ||
553 | } | ||
554 | if (pde) | ||
555 | from = (char *)pde - (char *)page_address(page); | ||
556 | pos = page_offset(page) + from; | ||
557 | lock_page(page); | ||
558 | err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0, | ||
559 | &page, NULL); | ||
560 | if (err) | ||
561 | EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILD => %d\n", | ||
562 | err); | ||
563 | if (pde) | ||
564 | pde->rec_len = cpu_to_le16(to - from); | ||
565 | dir->inode_no = 0; | ||
566 | if (likely(!err)) | ||
567 | err = exofs_commit_chunk(page, pos, to - from); | ||
568 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | ||
569 | mark_inode_dirty(inode); | ||
570 | sbi->s_numfiles--; | ||
571 | out: | ||
572 | exofs_put_page(page); | ||
573 | return err; | ||
574 | } | ||
575 | |||
576 | /* kept aligned on 4 bytes */ | ||
577 | #define THIS_DIR ".\0\0" | ||
578 | #define PARENT_DIR "..\0" | ||
579 | |||
580 | int exofs_make_empty(struct inode *inode, struct inode *parent) | ||
581 | { | ||
582 | struct address_space *mapping = inode->i_mapping; | ||
583 | struct page *page = grab_cache_page(mapping, 0); | ||
584 | unsigned chunk_size = exofs_chunk_size(inode); | ||
585 | struct exofs_dir_entry *de; | ||
586 | int err; | ||
587 | void *kaddr; | ||
588 | |||
589 | if (!page) | ||
590 | return -ENOMEM; | ||
591 | |||
592 | err = exofs_write_begin(NULL, page->mapping, 0, chunk_size, 0, | ||
593 | &page, NULL); | ||
594 | if (err) { | ||
595 | unlock_page(page); | ||
596 | goto fail; | ||
597 | } | ||
598 | |||
599 | kaddr = kmap_atomic(page, KM_USER0); | ||
600 | de = (struct exofs_dir_entry *)kaddr; | ||
601 | de->name_len = 1; | ||
602 | de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1)); | ||
603 | memcpy(de->name, THIS_DIR, sizeof(THIS_DIR)); | ||
604 | de->inode_no = cpu_to_le64(inode->i_ino); | ||
605 | exofs_set_de_type(de, inode); | ||
606 | |||
607 | de = (struct exofs_dir_entry *)(kaddr + EXOFS_DIR_REC_LEN(1)); | ||
608 | de->name_len = 2; | ||
609 | de->rec_len = cpu_to_le16(chunk_size - EXOFS_DIR_REC_LEN(1)); | ||
610 | de->inode_no = cpu_to_le64(parent->i_ino); | ||
611 | memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); | ||
612 | exofs_set_de_type(de, inode); | ||
613 | kunmap_atomic(page, KM_USER0); | ||
614 | err = exofs_commit_chunk(page, 0, chunk_size); | ||
615 | fail: | ||
616 | page_cache_release(page); | ||
617 | return err; | ||
618 | } | ||
619 | |||
620 | int exofs_empty_dir(struct inode *inode) | ||
621 | { | ||
622 | struct page *page = NULL; | ||
623 | unsigned long i, npages = dir_pages(inode); | ||
624 | |||
625 | for (i = 0; i < npages; i++) { | ||
626 | char *kaddr; | ||
627 | struct exofs_dir_entry *de; | ||
628 | page = exofs_get_page(inode, i); | ||
629 | |||
630 | if (IS_ERR(page)) | ||
631 | continue; | ||
632 | |||
633 | kaddr = page_address(page); | ||
634 | de = (struct exofs_dir_entry *)kaddr; | ||
635 | kaddr += exofs_last_byte(inode, i) - EXOFS_DIR_REC_LEN(1); | ||
636 | |||
637 | while ((char *)de <= kaddr) { | ||
638 | if (de->rec_len == 0) { | ||
639 | EXOFS_ERR("ERROR: exofs_empty_dir: " | ||
640 | "zero-length directory entry" | ||
641 | "kaddr=%p, de=%p\n", kaddr, de); | ||
642 | goto not_empty; | ||
643 | } | ||
644 | if (de->inode_no != 0) { | ||
645 | /* check for . and .. */ | ||
646 | if (de->name[0] != '.') | ||
647 | goto not_empty; | ||
648 | if (de->name_len > 2) | ||
649 | goto not_empty; | ||
650 | if (de->name_len < 2) { | ||
651 | if (le64_to_cpu(de->inode_no) != | ||
652 | inode->i_ino) | ||
653 | goto not_empty; | ||
654 | } else if (de->name[1] != '.') | ||
655 | goto not_empty; | ||
656 | } | ||
657 | de = exofs_next_entry(de); | ||
658 | } | ||
659 | exofs_put_page(page); | ||
660 | } | ||
661 | return 1; | ||
662 | |||
663 | not_empty: | ||
664 | exofs_put_page(page); | ||
665 | return 0; | ||
666 | } | ||
667 | |||
668 | const struct file_operations exofs_dir_operations = { | ||
669 | .llseek = generic_file_llseek, | ||
670 | .read = generic_read_dir, | ||
671 | .readdir = exofs_readdir, | ||
672 | }; | ||
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h new file mode 100644 index 000000000000..0fd4c7859679 --- /dev/null +++ b/fs/exofs/exofs.h | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/fs.h> | ||
37 | #include <linux/time.h> | ||
38 | #include "common.h" | ||
39 | |||
40 | #ifndef __EXOFS_H__ | ||
41 | #define __EXOFS_H__ | ||
42 | |||
43 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) | ||
44 | |||
45 | #ifdef CONFIG_EXOFS_DEBUG | ||
46 | #define EXOFS_DBGMSG(fmt, a...) \ | ||
47 | printk(KERN_NOTICE "exofs @%s:%d: " fmt, __func__, __LINE__, ##a) | ||
48 | #else | ||
49 | #define EXOFS_DBGMSG(fmt, a...) \ | ||
50 | do { if (0) printk(fmt, ##a); } while (0) | ||
51 | #endif | ||
52 | |||
53 | /* u64 has problems with printk this will cast it to unsigned long long */ | ||
54 | #define _LLU(x) (unsigned long long)(x) | ||
55 | |||
56 | /* | ||
57 | * our extension to the in-memory superblock | ||
58 | */ | ||
59 | struct exofs_sb_info { | ||
60 | struct osd_dev *s_dev; /* returned by get_osd_dev */ | ||
61 | osd_id s_pid; /* partition ID of file system*/ | ||
62 | int s_timeout; /* timeout for OSD operations */ | ||
63 | uint64_t s_nextid; /* highest object ID used */ | ||
64 | uint32_t s_numfiles; /* number of files on fs */ | ||
65 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ | ||
66 | u32 s_next_generation; /* next gen # to use */ | ||
67 | atomic_t s_curr_pending; /* number of pending commands */ | ||
68 | uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ | ||
69 | }; | ||
70 | |||
71 | /* | ||
72 | * our extension to the in-memory inode | ||
73 | */ | ||
74 | struct exofs_i_info { | ||
75 | unsigned long i_flags; /* various atomic flags */ | ||
76 | uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ | ||
77 | uint32_t i_dir_start_lookup; /* which page to start lookup */ | ||
78 | wait_queue_head_t i_wq; /* wait queue for inode */ | ||
79 | uint64_t i_commit_size; /* the object's written length */ | ||
80 | uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ | ||
81 | struct inode vfs_inode; /* normal in-memory inode */ | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * our inode flags | ||
86 | */ | ||
87 | #define OBJ_2BCREATED 0 /* object will be created soon*/ | ||
88 | #define OBJ_CREATED 1 /* object has been created on the osd*/ | ||
89 | |||
90 | static inline int obj_2bcreated(struct exofs_i_info *oi) | ||
91 | { | ||
92 | return test_bit(OBJ_2BCREATED, &oi->i_flags); | ||
93 | } | ||
94 | |||
95 | static inline void set_obj_2bcreated(struct exofs_i_info *oi) | ||
96 | { | ||
97 | set_bit(OBJ_2BCREATED, &oi->i_flags); | ||
98 | } | ||
99 | |||
100 | static inline int obj_created(struct exofs_i_info *oi) | ||
101 | { | ||
102 | return test_bit(OBJ_CREATED, &oi->i_flags); | ||
103 | } | ||
104 | |||
105 | static inline void set_obj_created(struct exofs_i_info *oi) | ||
106 | { | ||
107 | set_bit(OBJ_CREATED, &oi->i_flags); | ||
108 | } | ||
109 | |||
110 | int __exofs_wait_obj_created(struct exofs_i_info *oi); | ||
111 | static inline int wait_obj_created(struct exofs_i_info *oi) | ||
112 | { | ||
113 | if (likely(obj_created(oi))) | ||
114 | return 0; | ||
115 | |||
116 | return __exofs_wait_obj_created(oi); | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * get to our inode from the vfs inode | ||
121 | */ | ||
122 | static inline struct exofs_i_info *exofs_i(struct inode *inode) | ||
123 | { | ||
124 | return container_of(inode, struct exofs_i_info, vfs_inode); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Maximum count of links to a file | ||
129 | */ | ||
130 | #define EXOFS_LINK_MAX 32000 | ||
131 | |||
132 | /************************* | ||
133 | * function declarations * | ||
134 | *************************/ | ||
135 | /* inode.c */ | ||
136 | void exofs_truncate(struct inode *inode); | ||
137 | int exofs_setattr(struct dentry *, struct iattr *); | ||
138 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
139 | loff_t pos, unsigned len, unsigned flags, | ||
140 | struct page **pagep, void **fsdata); | ||
141 | extern struct inode *exofs_iget(struct super_block *, unsigned long); | ||
142 | struct inode *exofs_new_inode(struct inode *, int); | ||
143 | extern int exofs_write_inode(struct inode *, int); | ||
144 | extern void exofs_delete_inode(struct inode *); | ||
145 | |||
146 | /* dir.c: */ | ||
147 | int exofs_add_link(struct dentry *, struct inode *); | ||
148 | ino_t exofs_inode_by_name(struct inode *, struct dentry *); | ||
149 | int exofs_delete_entry(struct exofs_dir_entry *, struct page *); | ||
150 | int exofs_make_empty(struct inode *, struct inode *); | ||
151 | struct exofs_dir_entry *exofs_find_entry(struct inode *, struct dentry *, | ||
152 | struct page **); | ||
153 | int exofs_empty_dir(struct inode *); | ||
154 | struct exofs_dir_entry *exofs_dotdot(struct inode *, struct page **); | ||
155 | ino_t exofs_parent_ino(struct dentry *child); | ||
156 | int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, | ||
157 | struct inode *); | ||
158 | |||
159 | /********************* | ||
160 | * operation vectors * | ||
161 | *********************/ | ||
162 | /* dir.c: */ | ||
163 | extern const struct file_operations exofs_dir_operations; | ||
164 | |||
165 | /* file.c */ | ||
166 | extern const struct inode_operations exofs_file_inode_operations; | ||
167 | extern const struct file_operations exofs_file_operations; | ||
168 | |||
169 | /* inode.c */ | ||
170 | extern const struct address_space_operations exofs_aops; | ||
171 | |||
172 | /* namei.c */ | ||
173 | extern const struct inode_operations exofs_dir_inode_operations; | ||
174 | extern const struct inode_operations exofs_special_inode_operations; | ||
175 | |||
176 | /* symlink.c */ | ||
177 | extern const struct inode_operations exofs_symlink_inode_operations; | ||
178 | extern const struct inode_operations exofs_fast_symlink_inode_operations; | ||
179 | |||
180 | #endif | ||
diff --git a/fs/exofs/file.c b/fs/exofs/file.c new file mode 100644 index 000000000000..6ed7fe484752 --- /dev/null +++ b/fs/exofs/file.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/buffer_head.h> | ||
37 | |||
38 | #include "exofs.h" | ||
39 | |||
40 | static int exofs_release_file(struct inode *inode, struct file *filp) | ||
41 | { | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static int exofs_file_fsync(struct file *filp, struct dentry *dentry, | ||
46 | int datasync) | ||
47 | { | ||
48 | int ret; | ||
49 | struct address_space *mapping = filp->f_mapping; | ||
50 | |||
51 | ret = filemap_write_and_wait(mapping); | ||
52 | if (ret) | ||
53 | return ret; | ||
54 | |||
55 | /*Note: file_fsync below also calles sync_blockdev, which is a no-op | ||
56 | * for exofs, but other then that it does sync_inode and | ||
57 | * sync_superblock which is what we need here. | ||
58 | */ | ||
59 | return file_fsync(filp, dentry, datasync); | ||
60 | } | ||
61 | |||
62 | static int exofs_flush(struct file *file, fl_owner_t id) | ||
63 | { | ||
64 | exofs_file_fsync(file, file->f_path.dentry, 1); | ||
65 | /* TODO: Flush the OSD target */ | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | const struct file_operations exofs_file_operations = { | ||
70 | .llseek = generic_file_llseek, | ||
71 | .read = do_sync_read, | ||
72 | .write = do_sync_write, | ||
73 | .aio_read = generic_file_aio_read, | ||
74 | .aio_write = generic_file_aio_write, | ||
75 | .mmap = generic_file_mmap, | ||
76 | .open = generic_file_open, | ||
77 | .release = exofs_release_file, | ||
78 | .fsync = exofs_file_fsync, | ||
79 | .flush = exofs_flush, | ||
80 | .splice_read = generic_file_splice_read, | ||
81 | .splice_write = generic_file_splice_write, | ||
82 | }; | ||
83 | |||
84 | const struct inode_operations exofs_file_inode_operations = { | ||
85 | .truncate = exofs_truncate, | ||
86 | .setattr = exofs_setattr, | ||
87 | }; | ||
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c new file mode 100644 index 000000000000..ba8d9fab4693 --- /dev/null +++ b/fs/exofs/inode.c | |||
@@ -0,0 +1,1303 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/writeback.h> | ||
37 | #include <linux/buffer_head.h> | ||
38 | #include <scsi/scsi_device.h> | ||
39 | |||
40 | #include "exofs.h" | ||
41 | |||
42 | #ifdef CONFIG_EXOFS_DEBUG | ||
43 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | ||
44 | #endif | ||
45 | |||
46 | struct page_collect { | ||
47 | struct exofs_sb_info *sbi; | ||
48 | struct request_queue *req_q; | ||
49 | struct inode *inode; | ||
50 | unsigned expected_pages; | ||
51 | |||
52 | struct bio *bio; | ||
53 | unsigned nr_pages; | ||
54 | unsigned long length; | ||
55 | loff_t pg_first; /* keep 64bit also in 32-arches */ | ||
56 | }; | ||
57 | |||
58 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | ||
59 | struct inode *inode) | ||
60 | { | ||
61 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
62 | struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; | ||
63 | |||
64 | pcol->sbi = sbi; | ||
65 | pcol->req_q = req_q; | ||
66 | pcol->inode = inode; | ||
67 | pcol->expected_pages = expected_pages; | ||
68 | |||
69 | pcol->bio = NULL; | ||
70 | pcol->nr_pages = 0; | ||
71 | pcol->length = 0; | ||
72 | pcol->pg_first = -1; | ||
73 | |||
74 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
75 | expected_pages); | ||
76 | } | ||
77 | |||
78 | static void _pcol_reset(struct page_collect *pcol) | ||
79 | { | ||
80 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); | ||
81 | |||
82 | pcol->bio = NULL; | ||
83 | pcol->nr_pages = 0; | ||
84 | pcol->length = 0; | ||
85 | pcol->pg_first = -1; | ||
86 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | ||
87 | pcol->inode->i_ino, pcol->expected_pages); | ||
88 | |||
89 | /* this is probably the end of the loop but in writes | ||
90 | * it might not end here. don't be left with nothing | ||
91 | */ | ||
92 | if (!pcol->expected_pages) | ||
93 | pcol->expected_pages = 128; | ||
94 | } | ||
95 | |||
96 | static int pcol_try_alloc(struct page_collect *pcol) | ||
97 | { | ||
98 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | ||
99 | |||
100 | for (; pages; pages >>= 1) { | ||
101 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | ||
102 | if (likely(pcol->bio)) | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | ||
107 | pcol->expected_pages); | ||
108 | return -ENOMEM; | ||
109 | } | ||
110 | |||
111 | static void pcol_free(struct page_collect *pcol) | ||
112 | { | ||
113 | bio_put(pcol->bio); | ||
114 | pcol->bio = NULL; | ||
115 | } | ||
116 | |||
117 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | ||
118 | unsigned len) | ||
119 | { | ||
120 | int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); | ||
121 | if (unlikely(len != added_len)) | ||
122 | return -ENOMEM; | ||
123 | |||
124 | ++pcol->nr_pages; | ||
125 | pcol->length += len; | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static int update_read_page(struct page *page, int ret) | ||
130 | { | ||
131 | if (ret == 0) { | ||
132 | /* Everything is OK */ | ||
133 | SetPageUptodate(page); | ||
134 | if (PageError(page)) | ||
135 | ClearPageError(page); | ||
136 | } else if (ret == -EFAULT) { | ||
137 | /* In this case we were trying to read something that wasn't on | ||
138 | * disk yet - return a page full of zeroes. This should be OK, | ||
139 | * because the object should be empty (if there was a write | ||
140 | * before this read, the read would be waiting with the page | ||
141 | * locked */ | ||
142 | clear_highpage(page); | ||
143 | |||
144 | SetPageUptodate(page); | ||
145 | if (PageError(page)) | ||
146 | ClearPageError(page); | ||
147 | ret = 0; /* recovered error */ | ||
148 | EXOFS_DBGMSG("recovered read error\n"); | ||
149 | } else /* Error */ | ||
150 | SetPageError(page); | ||
151 | |||
152 | return ret; | ||
153 | } | ||
154 | |||
155 | static void update_write_page(struct page *page, int ret) | ||
156 | { | ||
157 | if (ret) { | ||
158 | mapping_set_error(page->mapping, ret); | ||
159 | SetPageError(page); | ||
160 | } | ||
161 | end_page_writeback(page); | ||
162 | } | ||
163 | |||
164 | /* Called at the end of reads, to optionally unlock pages and update their | ||
165 | * status. | ||
166 | */ | ||
167 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | ||
168 | bool do_unlock) | ||
169 | { | ||
170 | struct bio_vec *bvec; | ||
171 | int i; | ||
172 | u64 resid; | ||
173 | u64 good_bytes; | ||
174 | u64 length = 0; | ||
175 | int ret = exofs_check_ok_resid(or, &resid, NULL); | ||
176 | |||
177 | osd_end_request(or); | ||
178 | |||
179 | if (likely(!ret)) | ||
180 | good_bytes = pcol->length; | ||
181 | else if (!resid) | ||
182 | good_bytes = 0; | ||
183 | else | ||
184 | good_bytes = pcol->length - resid; | ||
185 | |||
186 | EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx" | ||
187 | " length=0x%lx nr_pages=%u\n", | ||
188 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
189 | pcol->nr_pages); | ||
190 | |||
191 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
192 | struct page *page = bvec->bv_page; | ||
193 | struct inode *inode = page->mapping->host; | ||
194 | int page_stat; | ||
195 | |||
196 | if (inode != pcol->inode) | ||
197 | continue; /* osd might add more pages at end */ | ||
198 | |||
199 | if (likely(length < good_bytes)) | ||
200 | page_stat = 0; | ||
201 | else | ||
202 | page_stat = ret; | ||
203 | |||
204 | EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | ||
205 | inode->i_ino, page->index, | ||
206 | page_stat ? "bad_bytes" : "good_bytes"); | ||
207 | |||
208 | ret = update_read_page(page, page_stat); | ||
209 | if (do_unlock) | ||
210 | unlock_page(page); | ||
211 | length += bvec->bv_len; | ||
212 | } | ||
213 | |||
214 | pcol_free(pcol); | ||
215 | EXOFS_DBGMSG("readpages_done END\n"); | ||
216 | return ret; | ||
217 | } | ||
218 | |||
219 | /* callback of async reads */ | ||
220 | static void readpages_done(struct osd_request *or, void *p) | ||
221 | { | ||
222 | struct page_collect *pcol = p; | ||
223 | |||
224 | __readpages_done(or, pcol, true); | ||
225 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
226 | kfree(p); | ||
227 | } | ||
228 | |||
229 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | ||
230 | { | ||
231 | struct bio_vec *bvec; | ||
232 | int i; | ||
233 | |||
234 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
235 | struct page *page = bvec->bv_page; | ||
236 | |||
237 | if (rw == READ) | ||
238 | update_read_page(page, ret); | ||
239 | else | ||
240 | update_write_page(page, ret); | ||
241 | |||
242 | unlock_page(page); | ||
243 | } | ||
244 | pcol_free(pcol); | ||
245 | } | ||
246 | |||
247 | static int read_exec(struct page_collect *pcol, bool is_sync) | ||
248 | { | ||
249 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
250 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
251 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
252 | struct osd_request *or = NULL; | ||
253 | struct page_collect *pcol_copy = NULL; | ||
254 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
255 | int ret; | ||
256 | |||
257 | if (!pcol->bio) | ||
258 | return 0; | ||
259 | |||
260 | /* see comment in _readpage() about sync reads */ | ||
261 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | ||
262 | |||
263 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
264 | if (unlikely(!or)) { | ||
265 | ret = -ENOMEM; | ||
266 | goto err; | ||
267 | } | ||
268 | |||
269 | osd_req_read(or, &obj, pcol->bio, i_start); | ||
270 | |||
271 | if (is_sync) { | ||
272 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | ||
273 | return __readpages_done(or, pcol, false); | ||
274 | } | ||
275 | |||
276 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
277 | if (!pcol_copy) { | ||
278 | ret = -ENOMEM; | ||
279 | goto err; | ||
280 | } | ||
281 | |||
282 | *pcol_copy = *pcol; | ||
283 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | ||
284 | if (unlikely(ret)) | ||
285 | goto err; | ||
286 | |||
287 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
288 | |||
289 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | ||
290 | obj.id, _LLU(i_start), pcol->length); | ||
291 | |||
292 | /* pages ownership was passed to pcol_copy */ | ||
293 | _pcol_reset(pcol); | ||
294 | return 0; | ||
295 | |||
296 | err: | ||
297 | if (!is_sync) | ||
298 | _unlock_pcol_pages(pcol, ret, READ); | ||
299 | kfree(pcol_copy); | ||
300 | if (or) | ||
301 | osd_end_request(or); | ||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | /* readpage_strip is called either directly from readpage() or by the VFS from | ||
306 | * within read_cache_pages(), to add one more page to be read. It will try to | ||
307 | * collect as many contiguous pages as posible. If a discontinuity is | ||
308 | * encountered, or it runs out of resources, it will submit the previous segment | ||
309 | * and will start a new collection. Eventually caller must submit the last | ||
310 | * segment if present. | ||
311 | */ | ||
312 | static int readpage_strip(void *data, struct page *page) | ||
313 | { | ||
314 | struct page_collect *pcol = data; | ||
315 | struct inode *inode = pcol->inode; | ||
316 | struct exofs_i_info *oi = exofs_i(inode); | ||
317 | loff_t i_size = i_size_read(inode); | ||
318 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
319 | size_t len; | ||
320 | int ret; | ||
321 | |||
322 | /* FIXME: Just for debugging, will be removed */ | ||
323 | if (PageUptodate(page)) | ||
324 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, | ||
325 | page->index); | ||
326 | |||
327 | if (page->index < end_index) | ||
328 | len = PAGE_CACHE_SIZE; | ||
329 | else if (page->index == end_index) | ||
330 | len = i_size & ~PAGE_CACHE_MASK; | ||
331 | else | ||
332 | len = 0; | ||
333 | |||
334 | if (!len || !obj_created(oi)) { | ||
335 | /* this will be out of bounds, or doesn't exist yet. | ||
336 | * Current page is cleared and the request is split | ||
337 | */ | ||
338 | clear_highpage(page); | ||
339 | |||
340 | SetPageUptodate(page); | ||
341 | if (PageError(page)) | ||
342 | ClearPageError(page); | ||
343 | |||
344 | unlock_page(page); | ||
345 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | ||
346 | " splitting\n", inode->i_ino, page->index); | ||
347 | |||
348 | return read_exec(pcol, false); | ||
349 | } | ||
350 | |||
351 | try_again: | ||
352 | |||
353 | if (unlikely(pcol->pg_first == -1)) { | ||
354 | pcol->pg_first = page->index; | ||
355 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
356 | page->index)) { | ||
357 | /* Discontinuity detected, split the request */ | ||
358 | ret = read_exec(pcol, false); | ||
359 | if (unlikely(ret)) | ||
360 | goto fail; | ||
361 | goto try_again; | ||
362 | } | ||
363 | |||
364 | if (!pcol->bio) { | ||
365 | ret = pcol_try_alloc(pcol); | ||
366 | if (unlikely(ret)) | ||
367 | goto fail; | ||
368 | } | ||
369 | |||
370 | if (len != PAGE_CACHE_SIZE) | ||
371 | zero_user(page, len, PAGE_CACHE_SIZE - len); | ||
372 | |||
373 | EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
374 | inode->i_ino, page->index, len); | ||
375 | |||
376 | ret = pcol_add_page(pcol, page, len); | ||
377 | if (ret) { | ||
378 | EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | ||
379 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", | ||
380 | page, len, pcol->nr_pages, pcol->length); | ||
381 | |||
382 | /* split the request, and start again with current page */ | ||
383 | ret = read_exec(pcol, false); | ||
384 | if (unlikely(ret)) | ||
385 | goto fail; | ||
386 | |||
387 | goto try_again; | ||
388 | } | ||
389 | |||
390 | return 0; | ||
391 | |||
392 | fail: | ||
393 | /* SetPageError(page); ??? */ | ||
394 | unlock_page(page); | ||
395 | return ret; | ||
396 | } | ||
397 | |||
398 | static int exofs_readpages(struct file *file, struct address_space *mapping, | ||
399 | struct list_head *pages, unsigned nr_pages) | ||
400 | { | ||
401 | struct page_collect pcol; | ||
402 | int ret; | ||
403 | |||
404 | _pcol_init(&pcol, nr_pages, mapping->host); | ||
405 | |||
406 | ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); | ||
407 | if (ret) { | ||
408 | EXOFS_ERR("read_cache_pages => %d\n", ret); | ||
409 | return ret; | ||
410 | } | ||
411 | |||
412 | return read_exec(&pcol, false); | ||
413 | } | ||
414 | |||
415 | static int _readpage(struct page *page, bool is_sync) | ||
416 | { | ||
417 | struct page_collect pcol; | ||
418 | int ret; | ||
419 | |||
420 | _pcol_init(&pcol, 1, page->mapping->host); | ||
421 | |||
422 | /* readpage_strip might call read_exec(,async) inside at several places | ||
423 | * but this is safe for is_async=0 since read_exec will not do anything | ||
424 | * when we have a single page. | ||
425 | */ | ||
426 | ret = readpage_strip(&pcol, page); | ||
427 | if (ret) { | ||
428 | EXOFS_ERR("_readpage => %d\n", ret); | ||
429 | return ret; | ||
430 | } | ||
431 | |||
432 | return read_exec(&pcol, is_sync); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * We don't need the file | ||
437 | */ | ||
438 | static int exofs_readpage(struct file *file, struct page *page) | ||
439 | { | ||
440 | return _readpage(page, false); | ||
441 | } | ||
442 | |||
443 | /* Callback for osd_write. All writes are asynchronouse */ | ||
444 | static void writepages_done(struct osd_request *or, void *p) | ||
445 | { | ||
446 | struct page_collect *pcol = p; | ||
447 | struct bio_vec *bvec; | ||
448 | int i; | ||
449 | u64 resid; | ||
450 | u64 good_bytes; | ||
451 | u64 length = 0; | ||
452 | |||
453 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
454 | |||
455 | osd_end_request(or); | ||
456 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
457 | |||
458 | if (likely(!ret)) | ||
459 | good_bytes = pcol->length; | ||
460 | else if (!resid) | ||
461 | good_bytes = 0; | ||
462 | else | ||
463 | good_bytes = pcol->length - resid; | ||
464 | |||
465 | EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx" | ||
466 | " length=0x%lx nr_pages=%u\n", | ||
467 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
468 | pcol->nr_pages); | ||
469 | |||
470 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
471 | struct page *page = bvec->bv_page; | ||
472 | struct inode *inode = page->mapping->host; | ||
473 | int page_stat; | ||
474 | |||
475 | if (inode != pcol->inode) | ||
476 | continue; /* osd might add more pages to a bio */ | ||
477 | |||
478 | if (likely(length < good_bytes)) | ||
479 | page_stat = 0; | ||
480 | else | ||
481 | page_stat = ret; | ||
482 | |||
483 | update_write_page(page, page_stat); | ||
484 | unlock_page(page); | ||
485 | EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | ||
486 | inode->i_ino, page->index, page_stat); | ||
487 | |||
488 | length += bvec->bv_len; | ||
489 | } | ||
490 | |||
491 | pcol_free(pcol); | ||
492 | kfree(pcol); | ||
493 | EXOFS_DBGMSG("writepages_done END\n"); | ||
494 | } | ||
495 | |||
496 | static int write_exec(struct page_collect *pcol) | ||
497 | { | ||
498 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
499 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
500 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
501 | struct osd_request *or = NULL; | ||
502 | struct page_collect *pcol_copy = NULL; | ||
503 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
504 | int ret; | ||
505 | |||
506 | if (!pcol->bio) | ||
507 | return 0; | ||
508 | |||
509 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
510 | if (unlikely(!or)) { | ||
511 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
512 | ret = -ENOMEM; | ||
513 | goto err; | ||
514 | } | ||
515 | |||
516 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
517 | if (!pcol_copy) { | ||
518 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | ||
519 | ret = -ENOMEM; | ||
520 | goto err; | ||
521 | } | ||
522 | |||
523 | *pcol_copy = *pcol; | ||
524 | |||
525 | osd_req_write(or, &obj, pcol_copy->bio, i_start); | ||
526 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | ||
527 | if (unlikely(ret)) { | ||
528 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | ||
529 | goto err; | ||
530 | } | ||
531 | |||
532 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
533 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | ||
534 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | ||
535 | pcol->length); | ||
536 | /* pages ownership was passed to pcol_copy */ | ||
537 | _pcol_reset(pcol); | ||
538 | return 0; | ||
539 | |||
540 | err: | ||
541 | _unlock_pcol_pages(pcol, ret, WRITE); | ||
542 | kfree(pcol_copy); | ||
543 | if (or) | ||
544 | osd_end_request(or); | ||
545 | return ret; | ||
546 | } | ||
547 | |||
548 | /* writepage_strip is called either directly from writepage() or by the VFS from | ||
549 | * within write_cache_pages(), to add one more page to be written to storage. | ||
550 | * It will try to collect as many contiguous pages as possible. If a | ||
551 | * discontinuity is encountered or it runs out of resources it will submit the | ||
552 | * previous segment and will start a new collection. | ||
553 | * Eventually caller must submit the last segment if present. | ||
554 | */ | ||
555 | static int writepage_strip(struct page *page, | ||
556 | struct writeback_control *wbc_unused, void *data) | ||
557 | { | ||
558 | struct page_collect *pcol = data; | ||
559 | struct inode *inode = pcol->inode; | ||
560 | struct exofs_i_info *oi = exofs_i(inode); | ||
561 | loff_t i_size = i_size_read(inode); | ||
562 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
563 | size_t len; | ||
564 | int ret; | ||
565 | |||
566 | BUG_ON(!PageLocked(page)); | ||
567 | |||
568 | ret = wait_obj_created(oi); | ||
569 | if (unlikely(ret)) | ||
570 | goto fail; | ||
571 | |||
572 | if (page->index < end_index) | ||
573 | /* in this case, the page is within the limits of the file */ | ||
574 | len = PAGE_CACHE_SIZE; | ||
575 | else { | ||
576 | len = i_size & ~PAGE_CACHE_MASK; | ||
577 | |||
578 | if (page->index > end_index || !len) { | ||
579 | /* in this case, the page is outside the limits | ||
580 | * (truncate in progress) | ||
581 | */ | ||
582 | ret = write_exec(pcol); | ||
583 | if (unlikely(ret)) | ||
584 | goto fail; | ||
585 | if (PageError(page)) | ||
586 | ClearPageError(page); | ||
587 | unlock_page(page); | ||
588 | return 0; | ||
589 | } | ||
590 | } | ||
591 | |||
592 | try_again: | ||
593 | |||
594 | if (unlikely(pcol->pg_first == -1)) { | ||
595 | pcol->pg_first = page->index; | ||
596 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
597 | page->index)) { | ||
598 | /* Discontinuity detected, split the request */ | ||
599 | ret = write_exec(pcol); | ||
600 | if (unlikely(ret)) | ||
601 | goto fail; | ||
602 | goto try_again; | ||
603 | } | ||
604 | |||
605 | if (!pcol->bio) { | ||
606 | ret = pcol_try_alloc(pcol); | ||
607 | if (unlikely(ret)) | ||
608 | goto fail; | ||
609 | } | ||
610 | |||
611 | EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
612 | inode->i_ino, page->index, len); | ||
613 | |||
614 | ret = pcol_add_page(pcol, page, len); | ||
615 | if (unlikely(ret)) { | ||
616 | EXOFS_DBGMSG("Failed pcol_add_page " | ||
617 | "nr_pages=%u total_length=0x%lx\n", | ||
618 | pcol->nr_pages, pcol->length); | ||
619 | |||
620 | /* split the request, next loop will start again */ | ||
621 | ret = write_exec(pcol); | ||
622 | if (unlikely(ret)) { | ||
623 | EXOFS_DBGMSG("write_exec faild => %d", ret); | ||
624 | goto fail; | ||
625 | } | ||
626 | |||
627 | goto try_again; | ||
628 | } | ||
629 | |||
630 | BUG_ON(PageWriteback(page)); | ||
631 | set_page_writeback(page); | ||
632 | |||
633 | return 0; | ||
634 | |||
635 | fail: | ||
636 | set_bit(AS_EIO, &page->mapping->flags); | ||
637 | unlock_page(page); | ||
638 | return ret; | ||
639 | } | ||
640 | |||
641 | static int exofs_writepages(struct address_space *mapping, | ||
642 | struct writeback_control *wbc) | ||
643 | { | ||
644 | struct page_collect pcol; | ||
645 | long start, end, expected_pages; | ||
646 | int ret; | ||
647 | |||
648 | start = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
649 | end = (wbc->range_end == LLONG_MAX) ? | ||
650 | start + mapping->nrpages : | ||
651 | wbc->range_end >> PAGE_CACHE_SHIFT; | ||
652 | |||
653 | if (start || end) | ||
654 | expected_pages = min(end - start + 1, 32L); | ||
655 | else | ||
656 | expected_pages = mapping->nrpages; | ||
657 | |||
658 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | ||
659 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | ||
660 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | ||
661 | mapping->nrpages, start, end); | ||
662 | |||
663 | _pcol_init(&pcol, expected_pages, mapping->host); | ||
664 | |||
665 | ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); | ||
666 | if (ret) { | ||
667 | EXOFS_ERR("write_cache_pages => %d\n", ret); | ||
668 | return ret; | ||
669 | } | ||
670 | |||
671 | return write_exec(&pcol); | ||
672 | } | ||
673 | |||
674 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) | ||
675 | { | ||
676 | struct page_collect pcol; | ||
677 | int ret; | ||
678 | |||
679 | _pcol_init(&pcol, 1, page->mapping->host); | ||
680 | |||
681 | ret = writepage_strip(page, NULL, &pcol); | ||
682 | if (ret) { | ||
683 | EXOFS_ERR("exofs_writepage => %d\n", ret); | ||
684 | return ret; | ||
685 | } | ||
686 | |||
687 | return write_exec(&pcol); | ||
688 | } | ||
689 | |||
690 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
691 | loff_t pos, unsigned len, unsigned flags, | ||
692 | struct page **pagep, void **fsdata) | ||
693 | { | ||
694 | int ret = 0; | ||
695 | struct page *page; | ||
696 | |||
697 | page = *pagep; | ||
698 | if (page == NULL) { | ||
699 | ret = simple_write_begin(file, mapping, pos, len, flags, pagep, | ||
700 | fsdata); | ||
701 | if (ret) { | ||
702 | EXOFS_DBGMSG("simple_write_begin faild\n"); | ||
703 | return ret; | ||
704 | } | ||
705 | |||
706 | page = *pagep; | ||
707 | } | ||
708 | |||
709 | /* read modify write */ | ||
710 | if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { | ||
711 | ret = _readpage(page, true); | ||
712 | if (ret) { | ||
713 | /*SetPageError was done by _readpage. Is it ok?*/ | ||
714 | unlock_page(page); | ||
715 | EXOFS_DBGMSG("__readpage_filler faild\n"); | ||
716 | } | ||
717 | } | ||
718 | |||
719 | return ret; | ||
720 | } | ||
721 | |||
722 | static int exofs_write_begin_export(struct file *file, | ||
723 | struct address_space *mapping, | ||
724 | loff_t pos, unsigned len, unsigned flags, | ||
725 | struct page **pagep, void **fsdata) | ||
726 | { | ||
727 | *pagep = NULL; | ||
728 | |||
729 | return exofs_write_begin(file, mapping, pos, len, flags, pagep, | ||
730 | fsdata); | ||
731 | } | ||
732 | |||
733 | const struct address_space_operations exofs_aops = { | ||
734 | .readpage = exofs_readpage, | ||
735 | .readpages = exofs_readpages, | ||
736 | .writepage = exofs_writepage, | ||
737 | .writepages = exofs_writepages, | ||
738 | .write_begin = exofs_write_begin_export, | ||
739 | .write_end = simple_write_end, | ||
740 | }; | ||
741 | |||
742 | /****************************************************************************** | ||
743 | * INODE OPERATIONS | ||
744 | *****************************************************************************/ | ||
745 | |||
746 | /* | ||
747 | * Test whether an inode is a fast symlink. | ||
748 | */ | ||
749 | static inline int exofs_inode_is_fast_symlink(struct inode *inode) | ||
750 | { | ||
751 | struct exofs_i_info *oi = exofs_i(inode); | ||
752 | |||
753 | return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * get_block_t - Fill in a buffer_head | ||
758 | * An OSD takes care of block allocation so we just fake an allocation by | ||
759 | * putting in the inode's sector_t in the buffer_head. | ||
760 | * TODO: What about the case of create==0 and @iblock does not exist in the | ||
761 | * object? | ||
762 | */ | ||
763 | static int exofs_get_block(struct inode *inode, sector_t iblock, | ||
764 | struct buffer_head *bh_result, int create) | ||
765 | { | ||
766 | map_bh(bh_result, inode->i_sb, iblock); | ||
767 | return 0; | ||
768 | } | ||
769 | |||
770 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | ||
771 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | ||
772 | |||
773 | /* | ||
774 | * Truncate a file to the specified size - all we have to do is set the size | ||
775 | * attribute. We make sure the object exists first. | ||
776 | */ | ||
777 | void exofs_truncate(struct inode *inode) | ||
778 | { | ||
779 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
780 | struct exofs_i_info *oi = exofs_i(inode); | ||
781 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
782 | struct osd_request *or; | ||
783 | struct osd_attr attr; | ||
784 | loff_t isize = i_size_read(inode); | ||
785 | __be64 newsize; | ||
786 | int ret; | ||
787 | |||
788 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | ||
789 | || S_ISLNK(inode->i_mode))) | ||
790 | return; | ||
791 | if (exofs_inode_is_fast_symlink(inode)) | ||
792 | return; | ||
793 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
794 | return; | ||
795 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
796 | |||
797 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
798 | |||
799 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
800 | if (unlikely(!or)) { | ||
801 | EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); | ||
802 | goto fail; | ||
803 | } | ||
804 | |||
805 | osd_req_set_attributes(or, &obj); | ||
806 | |||
807 | newsize = cpu_to_be64((u64)isize); | ||
808 | attr = g_attr_logical_length; | ||
809 | attr.val_ptr = &newsize; | ||
810 | osd_req_add_set_attr_list(or, &attr, 1); | ||
811 | |||
812 | /* if we are about to truncate an object, and it hasn't been | ||
813 | * created yet, wait | ||
814 | */ | ||
815 | if (unlikely(wait_obj_created(oi))) | ||
816 | goto fail; | ||
817 | |||
818 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
819 | osd_end_request(or); | ||
820 | if (ret) | ||
821 | goto fail; | ||
822 | |||
823 | out: | ||
824 | mark_inode_dirty(inode); | ||
825 | return; | ||
826 | fail: | ||
827 | make_bad_inode(inode); | ||
828 | goto out; | ||
829 | } | ||
830 | |||
831 | /* | ||
832 | * Set inode attributes - just call generic functions. | ||
833 | */ | ||
834 | int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
835 | { | ||
836 | struct inode *inode = dentry->d_inode; | ||
837 | int error; | ||
838 | |||
839 | error = inode_change_ok(inode, iattr); | ||
840 | if (error) | ||
841 | return error; | ||
842 | |||
843 | error = inode_setattr(inode, iattr); | ||
844 | return error; | ||
845 | } | ||
846 | |||
847 | /* | ||
848 | * Read an inode from the OSD, and return it as is. We also return the size | ||
849 | * attribute in the 'sanity' argument if we got compiled with debugging turned | ||
850 | * on. | ||
851 | */ | ||
852 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | ||
853 | struct exofs_fcb *inode, uint64_t *sanity) | ||
854 | { | ||
855 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
856 | struct osd_request *or; | ||
857 | struct osd_attr attr; | ||
858 | struct osd_obj_id obj = {sbi->s_pid, | ||
859 | oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; | ||
860 | int ret; | ||
861 | |||
862 | exofs_make_credential(oi->i_cred, &obj); | ||
863 | |||
864 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
865 | if (unlikely(!or)) { | ||
866 | EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); | ||
867 | return -ENOMEM; | ||
868 | } | ||
869 | osd_req_get_attributes(or, &obj); | ||
870 | |||
871 | /* we need the inode attribute */ | ||
872 | osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); | ||
873 | |||
874 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
875 | /* we get the size attributes to do a sanity check */ | ||
876 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | ||
877 | #endif | ||
878 | |||
879 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
880 | if (ret) | ||
881 | goto out; | ||
882 | |||
883 | attr = g_attr_inode_data; | ||
884 | ret = extract_attr_from_req(or, &attr); | ||
885 | if (ret) { | ||
886 | EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); | ||
887 | goto out; | ||
888 | } | ||
889 | |||
890 | WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); | ||
891 | memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); | ||
892 | |||
893 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
894 | attr = g_attr_logical_length; | ||
895 | ret = extract_attr_from_req(or, &attr); | ||
896 | if (ret) { | ||
897 | EXOFS_ERR("ERROR: extract attr from or failed\n"); | ||
898 | goto out; | ||
899 | } | ||
900 | *sanity = get_unaligned_be64(attr.val_ptr); | ||
901 | #endif | ||
902 | |||
903 | out: | ||
904 | osd_end_request(or); | ||
905 | return ret; | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Fill in an inode read from the OSD and set it up for use | ||
910 | */ | ||
911 | struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | ||
912 | { | ||
913 | struct exofs_i_info *oi; | ||
914 | struct exofs_fcb fcb; | ||
915 | struct inode *inode; | ||
916 | uint64_t uninitialized_var(sanity); | ||
917 | int ret; | ||
918 | |||
919 | inode = iget_locked(sb, ino); | ||
920 | if (!inode) | ||
921 | return ERR_PTR(-ENOMEM); | ||
922 | if (!(inode->i_state & I_NEW)) | ||
923 | return inode; | ||
924 | oi = exofs_i(inode); | ||
925 | |||
926 | /* read the inode from the osd */ | ||
927 | ret = exofs_get_inode(sb, oi, &fcb, &sanity); | ||
928 | if (ret) | ||
929 | goto bad_inode; | ||
930 | |||
931 | init_waitqueue_head(&oi->i_wq); | ||
932 | set_obj_created(oi); | ||
933 | |||
934 | /* copy stuff from on-disk struct to in-memory struct */ | ||
935 | inode->i_mode = le16_to_cpu(fcb.i_mode); | ||
936 | inode->i_uid = le32_to_cpu(fcb.i_uid); | ||
937 | inode->i_gid = le32_to_cpu(fcb.i_gid); | ||
938 | inode->i_nlink = le16_to_cpu(fcb.i_links_count); | ||
939 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); | ||
940 | inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); | ||
941 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); | ||
942 | inode->i_ctime.tv_nsec = | ||
943 | inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; | ||
944 | oi->i_commit_size = le64_to_cpu(fcb.i_size); | ||
945 | i_size_write(inode, oi->i_commit_size); | ||
946 | inode->i_blkbits = EXOFS_BLKSHIFT; | ||
947 | inode->i_generation = le32_to_cpu(fcb.i_generation); | ||
948 | |||
949 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
950 | if ((inode->i_size != sanity) && | ||
951 | (!exofs_inode_is_fast_symlink(inode))) { | ||
952 | EXOFS_ERR("WARNING: Size of object from inode and " | ||
953 | "attributes differ (%lld != %llu)\n", | ||
954 | inode->i_size, _LLU(sanity)); | ||
955 | } | ||
956 | #endif | ||
957 | |||
958 | oi->i_dir_start_lookup = 0; | ||
959 | |||
960 | if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { | ||
961 | ret = -ESTALE; | ||
962 | goto bad_inode; | ||
963 | } | ||
964 | |||
965 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | ||
966 | if (fcb.i_data[0]) | ||
967 | inode->i_rdev = | ||
968 | old_decode_dev(le32_to_cpu(fcb.i_data[0])); | ||
969 | else | ||
970 | inode->i_rdev = | ||
971 | new_decode_dev(le32_to_cpu(fcb.i_data[1])); | ||
972 | } else { | ||
973 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); | ||
974 | } | ||
975 | |||
976 | if (S_ISREG(inode->i_mode)) { | ||
977 | inode->i_op = &exofs_file_inode_operations; | ||
978 | inode->i_fop = &exofs_file_operations; | ||
979 | inode->i_mapping->a_ops = &exofs_aops; | ||
980 | } else if (S_ISDIR(inode->i_mode)) { | ||
981 | inode->i_op = &exofs_dir_inode_operations; | ||
982 | inode->i_fop = &exofs_dir_operations; | ||
983 | inode->i_mapping->a_ops = &exofs_aops; | ||
984 | } else if (S_ISLNK(inode->i_mode)) { | ||
985 | if (exofs_inode_is_fast_symlink(inode)) | ||
986 | inode->i_op = &exofs_fast_symlink_inode_operations; | ||
987 | else { | ||
988 | inode->i_op = &exofs_symlink_inode_operations; | ||
989 | inode->i_mapping->a_ops = &exofs_aops; | ||
990 | } | ||
991 | } else { | ||
992 | inode->i_op = &exofs_special_inode_operations; | ||
993 | if (fcb.i_data[0]) | ||
994 | init_special_inode(inode, inode->i_mode, | ||
995 | old_decode_dev(le32_to_cpu(fcb.i_data[0]))); | ||
996 | else | ||
997 | init_special_inode(inode, inode->i_mode, | ||
998 | new_decode_dev(le32_to_cpu(fcb.i_data[1]))); | ||
999 | } | ||
1000 | |||
1001 | unlock_new_inode(inode); | ||
1002 | return inode; | ||
1003 | |||
1004 | bad_inode: | ||
1005 | iget_failed(inode); | ||
1006 | return ERR_PTR(ret); | ||
1007 | } | ||
1008 | |||
1009 | int __exofs_wait_obj_created(struct exofs_i_info *oi) | ||
1010 | { | ||
1011 | if (!obj_created(oi)) { | ||
1012 | BUG_ON(!obj_2bcreated(oi)); | ||
1013 | wait_event(oi->i_wq, obj_created(oi)); | ||
1014 | } | ||
1015 | return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; | ||
1016 | } | ||
1017 | /* | ||
1018 | * Callback function from exofs_new_inode(). The important thing is that we | ||
1019 | * set the obj_created flag so that other methods know that the object exists on | ||
1020 | * the OSD. | ||
1021 | */ | ||
1022 | static void create_done(struct osd_request *or, void *p) | ||
1023 | { | ||
1024 | struct inode *inode = p; | ||
1025 | struct exofs_i_info *oi = exofs_i(inode); | ||
1026 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
1027 | int ret; | ||
1028 | |||
1029 | ret = exofs_check_ok(or); | ||
1030 | osd_end_request(or); | ||
1031 | atomic_dec(&sbi->s_curr_pending); | ||
1032 | |||
1033 | if (unlikely(ret)) { | ||
1034 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", | ||
1035 | _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); | ||
1036 | make_bad_inode(inode); | ||
1037 | } else | ||
1038 | set_obj_created(oi); | ||
1039 | |||
1040 | atomic_dec(&inode->i_count); | ||
1041 | wake_up(&oi->i_wq); | ||
1042 | } | ||
1043 | |||
1044 | /* | ||
1045 | * Set up a new inode and create an object for it on the OSD | ||
1046 | */ | ||
1047 | struct inode *exofs_new_inode(struct inode *dir, int mode) | ||
1048 | { | ||
1049 | struct super_block *sb; | ||
1050 | struct inode *inode; | ||
1051 | struct exofs_i_info *oi; | ||
1052 | struct exofs_sb_info *sbi; | ||
1053 | struct osd_request *or; | ||
1054 | struct osd_obj_id obj; | ||
1055 | int ret; | ||
1056 | |||
1057 | sb = dir->i_sb; | ||
1058 | inode = new_inode(sb); | ||
1059 | if (!inode) | ||
1060 | return ERR_PTR(-ENOMEM); | ||
1061 | |||
1062 | oi = exofs_i(inode); | ||
1063 | |||
1064 | init_waitqueue_head(&oi->i_wq); | ||
1065 | set_obj_2bcreated(oi); | ||
1066 | |||
1067 | sbi = sb->s_fs_info; | ||
1068 | |||
1069 | sb->s_dirt = 1; | ||
1070 | inode->i_uid = current->cred->fsuid; | ||
1071 | if (dir->i_mode & S_ISGID) { | ||
1072 | inode->i_gid = dir->i_gid; | ||
1073 | if (S_ISDIR(mode)) | ||
1074 | mode |= S_ISGID; | ||
1075 | } else { | ||
1076 | inode->i_gid = current->cred->fsgid; | ||
1077 | } | ||
1078 | inode->i_mode = mode; | ||
1079 | |||
1080 | inode->i_ino = sbi->s_nextid++; | ||
1081 | inode->i_blkbits = EXOFS_BLKSHIFT; | ||
1082 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
1083 | oi->i_commit_size = inode->i_size = 0; | ||
1084 | spin_lock(&sbi->s_next_gen_lock); | ||
1085 | inode->i_generation = sbi->s_next_generation++; | ||
1086 | spin_unlock(&sbi->s_next_gen_lock); | ||
1087 | insert_inode_hash(inode); | ||
1088 | |||
1089 | mark_inode_dirty(inode); | ||
1090 | |||
1091 | obj.partition = sbi->s_pid; | ||
1092 | obj.id = inode->i_ino + EXOFS_OBJ_OFF; | ||
1093 | exofs_make_credential(oi->i_cred, &obj); | ||
1094 | |||
1095 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1096 | if (unlikely(!or)) { | ||
1097 | EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); | ||
1098 | return ERR_PTR(-ENOMEM); | ||
1099 | } | ||
1100 | |||
1101 | osd_req_create_object(or, &obj); | ||
1102 | |||
1103 | /* increment the refcount so that the inode will still be around when we | ||
1104 | * reach the callback | ||
1105 | */ | ||
1106 | atomic_inc(&inode->i_count); | ||
1107 | |||
1108 | ret = exofs_async_op(or, create_done, inode, oi->i_cred); | ||
1109 | if (ret) { | ||
1110 | atomic_dec(&inode->i_count); | ||
1111 | osd_end_request(or); | ||
1112 | return ERR_PTR(-EIO); | ||
1113 | } | ||
1114 | atomic_inc(&sbi->s_curr_pending); | ||
1115 | |||
1116 | return inode; | ||
1117 | } | ||
1118 | |||
1119 | /* | ||
1120 | * struct to pass two arguments to update_inode's callback | ||
1121 | */ | ||
1122 | struct updatei_args { | ||
1123 | struct exofs_sb_info *sbi; | ||
1124 | struct exofs_fcb fcb; | ||
1125 | }; | ||
1126 | |||
1127 | /* | ||
1128 | * Callback function from exofs_update_inode(). | ||
1129 | */ | ||
1130 | static void updatei_done(struct osd_request *or, void *p) | ||
1131 | { | ||
1132 | struct updatei_args *args = p; | ||
1133 | |||
1134 | osd_end_request(or); | ||
1135 | |||
1136 | atomic_dec(&args->sbi->s_curr_pending); | ||
1137 | |||
1138 | kfree(args); | ||
1139 | } | ||
1140 | |||
1141 | /* | ||
1142 | * Write the inode to the OSD. Just fill up the struct, and set the attribute | ||
1143 | * synchronously or asynchronously depending on the do_sync flag. | ||
1144 | */ | ||
1145 | static int exofs_update_inode(struct inode *inode, int do_sync) | ||
1146 | { | ||
1147 | struct exofs_i_info *oi = exofs_i(inode); | ||
1148 | struct super_block *sb = inode->i_sb; | ||
1149 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
1150 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
1151 | struct osd_request *or; | ||
1152 | struct osd_attr attr; | ||
1153 | struct exofs_fcb *fcb; | ||
1154 | struct updatei_args *args; | ||
1155 | int ret; | ||
1156 | |||
1157 | args = kzalloc(sizeof(*args), GFP_KERNEL); | ||
1158 | if (!args) | ||
1159 | return -ENOMEM; | ||
1160 | |||
1161 | fcb = &args->fcb; | ||
1162 | |||
1163 | fcb->i_mode = cpu_to_le16(inode->i_mode); | ||
1164 | fcb->i_uid = cpu_to_le32(inode->i_uid); | ||
1165 | fcb->i_gid = cpu_to_le32(inode->i_gid); | ||
1166 | fcb->i_links_count = cpu_to_le16(inode->i_nlink); | ||
1167 | fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | ||
1168 | fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | ||
1169 | fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | ||
1170 | oi->i_commit_size = i_size_read(inode); | ||
1171 | fcb->i_size = cpu_to_le64(oi->i_commit_size); | ||
1172 | fcb->i_generation = cpu_to_le32(inode->i_generation); | ||
1173 | |||
1174 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | ||
1175 | if (old_valid_dev(inode->i_rdev)) { | ||
1176 | fcb->i_data[0] = | ||
1177 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | ||
1178 | fcb->i_data[1] = 0; | ||
1179 | } else { | ||
1180 | fcb->i_data[0] = 0; | ||
1181 | fcb->i_data[1] = | ||
1182 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | ||
1183 | fcb->i_data[2] = 0; | ||
1184 | } | ||
1185 | } else | ||
1186 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | ||
1187 | |||
1188 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1189 | if (unlikely(!or)) { | ||
1190 | EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); | ||
1191 | ret = -ENOMEM; | ||
1192 | goto free_args; | ||
1193 | } | ||
1194 | |||
1195 | osd_req_set_attributes(or, &obj); | ||
1196 | |||
1197 | attr = g_attr_inode_data; | ||
1198 | attr.val_ptr = fcb; | ||
1199 | osd_req_add_set_attr_list(or, &attr, 1); | ||
1200 | |||
1201 | if (!obj_created(oi)) { | ||
1202 | EXOFS_DBGMSG("!obj_created\n"); | ||
1203 | BUG_ON(!obj_2bcreated(oi)); | ||
1204 | wait_event(oi->i_wq, obj_created(oi)); | ||
1205 | EXOFS_DBGMSG("wait_event done\n"); | ||
1206 | } | ||
1207 | |||
1208 | if (do_sync) { | ||
1209 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
1210 | osd_end_request(or); | ||
1211 | goto free_args; | ||
1212 | } else { | ||
1213 | args->sbi = sbi; | ||
1214 | |||
1215 | ret = exofs_async_op(or, updatei_done, args, oi->i_cred); | ||
1216 | if (ret) { | ||
1217 | osd_end_request(or); | ||
1218 | goto free_args; | ||
1219 | } | ||
1220 | atomic_inc(&sbi->s_curr_pending); | ||
1221 | goto out; /* deallocation in updatei_done */ | ||
1222 | } | ||
1223 | |||
1224 | free_args: | ||
1225 | kfree(args); | ||
1226 | out: | ||
1227 | EXOFS_DBGMSG("ret=>%d\n", ret); | ||
1228 | return ret; | ||
1229 | } | ||
1230 | |||
1231 | int exofs_write_inode(struct inode *inode, int wait) | ||
1232 | { | ||
1233 | return exofs_update_inode(inode, wait); | ||
1234 | } | ||
1235 | |||
1236 | /* | ||
1237 | * Callback function from exofs_delete_inode() - don't have much cleaning up to | ||
1238 | * do. | ||
1239 | */ | ||
1240 | static void delete_done(struct osd_request *or, void *p) | ||
1241 | { | ||
1242 | struct exofs_sb_info *sbi; | ||
1243 | osd_end_request(or); | ||
1244 | sbi = p; | ||
1245 | atomic_dec(&sbi->s_curr_pending); | ||
1246 | } | ||
1247 | |||
1248 | /* | ||
1249 | * Called when the refcount of an inode reaches zero. We remove the object | ||
1250 | * from the OSD here. We make sure the object was created before we try and | ||
1251 | * delete it. | ||
1252 | */ | ||
1253 | void exofs_delete_inode(struct inode *inode) | ||
1254 | { | ||
1255 | struct exofs_i_info *oi = exofs_i(inode); | ||
1256 | struct super_block *sb = inode->i_sb; | ||
1257 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
1258 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
1259 | struct osd_request *or; | ||
1260 | int ret; | ||
1261 | |||
1262 | truncate_inode_pages(&inode->i_data, 0); | ||
1263 | |||
1264 | if (is_bad_inode(inode)) | ||
1265 | goto no_delete; | ||
1266 | |||
1267 | mark_inode_dirty(inode); | ||
1268 | exofs_update_inode(inode, inode_needs_sync(inode)); | ||
1269 | |||
1270 | inode->i_size = 0; | ||
1271 | if (inode->i_blocks) | ||
1272 | exofs_truncate(inode); | ||
1273 | |||
1274 | clear_inode(inode); | ||
1275 | |||
1276 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1277 | if (unlikely(!or)) { | ||
1278 | EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); | ||
1279 | return; | ||
1280 | } | ||
1281 | |||
1282 | osd_req_remove_object(or, &obj); | ||
1283 | |||
1284 | /* if we are deleting an obj that hasn't been created yet, wait */ | ||
1285 | if (!obj_created(oi)) { | ||
1286 | BUG_ON(!obj_2bcreated(oi)); | ||
1287 | wait_event(oi->i_wq, obj_created(oi)); | ||
1288 | } | ||
1289 | |||
1290 | ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); | ||
1291 | if (ret) { | ||
1292 | EXOFS_ERR( | ||
1293 | "ERROR: @exofs_delete_inode exofs_async_op failed\n"); | ||
1294 | osd_end_request(or); | ||
1295 | return; | ||
1296 | } | ||
1297 | atomic_inc(&sbi->s_curr_pending); | ||
1298 | |||
1299 | return; | ||
1300 | |||
1301 | no_delete: | ||
1302 | clear_inode(inode); | ||
1303 | } | ||
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c new file mode 100644 index 000000000000..77fdd765e76d --- /dev/null +++ b/fs/exofs/namei.c | |||
@@ -0,0 +1,342 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include "exofs.h" | ||
37 | |||
38 | static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode) | ||
39 | { | ||
40 | int err = exofs_add_link(dentry, inode); | ||
41 | if (!err) { | ||
42 | d_instantiate(dentry, inode); | ||
43 | return 0; | ||
44 | } | ||
45 | inode_dec_link_count(inode); | ||
46 | iput(inode); | ||
47 | return err; | ||
48 | } | ||
49 | |||
50 | static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry, | ||
51 | struct nameidata *nd) | ||
52 | { | ||
53 | struct inode *inode; | ||
54 | ino_t ino; | ||
55 | |||
56 | if (dentry->d_name.len > EXOFS_NAME_LEN) | ||
57 | return ERR_PTR(-ENAMETOOLONG); | ||
58 | |||
59 | ino = exofs_inode_by_name(dir, dentry); | ||
60 | inode = NULL; | ||
61 | if (ino) { | ||
62 | inode = exofs_iget(dir->i_sb, ino); | ||
63 | if (IS_ERR(inode)) | ||
64 | return ERR_CAST(inode); | ||
65 | } | ||
66 | return d_splice_alias(inode, dentry); | ||
67 | } | ||
68 | |||
69 | static int exofs_create(struct inode *dir, struct dentry *dentry, int mode, | ||
70 | struct nameidata *nd) | ||
71 | { | ||
72 | struct inode *inode = exofs_new_inode(dir, mode); | ||
73 | int err = PTR_ERR(inode); | ||
74 | if (!IS_ERR(inode)) { | ||
75 | inode->i_op = &exofs_file_inode_operations; | ||
76 | inode->i_fop = &exofs_file_operations; | ||
77 | inode->i_mapping->a_ops = &exofs_aops; | ||
78 | mark_inode_dirty(inode); | ||
79 | err = exofs_add_nondir(dentry, inode); | ||
80 | } | ||
81 | return err; | ||
82 | } | ||
83 | |||
84 | static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode, | ||
85 | dev_t rdev) | ||
86 | { | ||
87 | struct inode *inode; | ||
88 | int err; | ||
89 | |||
90 | if (!new_valid_dev(rdev)) | ||
91 | return -EINVAL; | ||
92 | |||
93 | inode = exofs_new_inode(dir, mode); | ||
94 | err = PTR_ERR(inode); | ||
95 | if (!IS_ERR(inode)) { | ||
96 | init_special_inode(inode, inode->i_mode, rdev); | ||
97 | mark_inode_dirty(inode); | ||
98 | err = exofs_add_nondir(dentry, inode); | ||
99 | } | ||
100 | return err; | ||
101 | } | ||
102 | |||
103 | static int exofs_symlink(struct inode *dir, struct dentry *dentry, | ||
104 | const char *symname) | ||
105 | { | ||
106 | struct super_block *sb = dir->i_sb; | ||
107 | int err = -ENAMETOOLONG; | ||
108 | unsigned l = strlen(symname)+1; | ||
109 | struct inode *inode; | ||
110 | struct exofs_i_info *oi; | ||
111 | |||
112 | if (l > sb->s_blocksize) | ||
113 | goto out; | ||
114 | |||
115 | inode = exofs_new_inode(dir, S_IFLNK | S_IRWXUGO); | ||
116 | err = PTR_ERR(inode); | ||
117 | if (IS_ERR(inode)) | ||
118 | goto out; | ||
119 | |||
120 | oi = exofs_i(inode); | ||
121 | if (l > sizeof(oi->i_data)) { | ||
122 | /* slow symlink */ | ||
123 | inode->i_op = &exofs_symlink_inode_operations; | ||
124 | inode->i_mapping->a_ops = &exofs_aops; | ||
125 | memset(oi->i_data, 0, sizeof(oi->i_data)); | ||
126 | |||
127 | err = page_symlink(inode, symname, l); | ||
128 | if (err) | ||
129 | goto out_fail; | ||
130 | } else { | ||
131 | /* fast symlink */ | ||
132 | inode->i_op = &exofs_fast_symlink_inode_operations; | ||
133 | memcpy(oi->i_data, symname, l); | ||
134 | inode->i_size = l-1; | ||
135 | } | ||
136 | mark_inode_dirty(inode); | ||
137 | |||
138 | err = exofs_add_nondir(dentry, inode); | ||
139 | out: | ||
140 | return err; | ||
141 | |||
142 | out_fail: | ||
143 | inode_dec_link_count(inode); | ||
144 | iput(inode); | ||
145 | goto out; | ||
146 | } | ||
147 | |||
148 | static int exofs_link(struct dentry *old_dentry, struct inode *dir, | ||
149 | struct dentry *dentry) | ||
150 | { | ||
151 | struct inode *inode = old_dentry->d_inode; | ||
152 | |||
153 | if (inode->i_nlink >= EXOFS_LINK_MAX) | ||
154 | return -EMLINK; | ||
155 | |||
156 | inode->i_ctime = CURRENT_TIME; | ||
157 | inode_inc_link_count(inode); | ||
158 | atomic_inc(&inode->i_count); | ||
159 | |||
160 | return exofs_add_nondir(dentry, inode); | ||
161 | } | ||
162 | |||
163 | static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
164 | { | ||
165 | struct inode *inode; | ||
166 | int err = -EMLINK; | ||
167 | |||
168 | if (dir->i_nlink >= EXOFS_LINK_MAX) | ||
169 | goto out; | ||
170 | |||
171 | inode_inc_link_count(dir); | ||
172 | |||
173 | inode = exofs_new_inode(dir, S_IFDIR | mode); | ||
174 | err = PTR_ERR(inode); | ||
175 | if (IS_ERR(inode)) | ||
176 | goto out_dir; | ||
177 | |||
178 | inode->i_op = &exofs_dir_inode_operations; | ||
179 | inode->i_fop = &exofs_dir_operations; | ||
180 | inode->i_mapping->a_ops = &exofs_aops; | ||
181 | |||
182 | inode_inc_link_count(inode); | ||
183 | |||
184 | err = exofs_make_empty(inode, dir); | ||
185 | if (err) | ||
186 | goto out_fail; | ||
187 | |||
188 | err = exofs_add_link(dentry, inode); | ||
189 | if (err) | ||
190 | goto out_fail; | ||
191 | |||
192 | d_instantiate(dentry, inode); | ||
193 | out: | ||
194 | return err; | ||
195 | |||
196 | out_fail: | ||
197 | inode_dec_link_count(inode); | ||
198 | inode_dec_link_count(inode); | ||
199 | iput(inode); | ||
200 | out_dir: | ||
201 | inode_dec_link_count(dir); | ||
202 | goto out; | ||
203 | } | ||
204 | |||
205 | static int exofs_unlink(struct inode *dir, struct dentry *dentry) | ||
206 | { | ||
207 | struct inode *inode = dentry->d_inode; | ||
208 | struct exofs_dir_entry *de; | ||
209 | struct page *page; | ||
210 | int err = -ENOENT; | ||
211 | |||
212 | de = exofs_find_entry(dir, dentry, &page); | ||
213 | if (!de) | ||
214 | goto out; | ||
215 | |||
216 | err = exofs_delete_entry(de, page); | ||
217 | if (err) | ||
218 | goto out; | ||
219 | |||
220 | inode->i_ctime = dir->i_ctime; | ||
221 | inode_dec_link_count(inode); | ||
222 | err = 0; | ||
223 | out: | ||
224 | return err; | ||
225 | } | ||
226 | |||
227 | static int exofs_rmdir(struct inode *dir, struct dentry *dentry) | ||
228 | { | ||
229 | struct inode *inode = dentry->d_inode; | ||
230 | int err = -ENOTEMPTY; | ||
231 | |||
232 | if (exofs_empty_dir(inode)) { | ||
233 | err = exofs_unlink(dir, dentry); | ||
234 | if (!err) { | ||
235 | inode->i_size = 0; | ||
236 | inode_dec_link_count(inode); | ||
237 | inode_dec_link_count(dir); | ||
238 | } | ||
239 | } | ||
240 | return err; | ||
241 | } | ||
242 | |||
243 | static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
244 | struct inode *new_dir, struct dentry *new_dentry) | ||
245 | { | ||
246 | struct inode *old_inode = old_dentry->d_inode; | ||
247 | struct inode *new_inode = new_dentry->d_inode; | ||
248 | struct page *dir_page = NULL; | ||
249 | struct exofs_dir_entry *dir_de = NULL; | ||
250 | struct page *old_page; | ||
251 | struct exofs_dir_entry *old_de; | ||
252 | int err = -ENOENT; | ||
253 | |||
254 | old_de = exofs_find_entry(old_dir, old_dentry, &old_page); | ||
255 | if (!old_de) | ||
256 | goto out; | ||
257 | |||
258 | if (S_ISDIR(old_inode->i_mode)) { | ||
259 | err = -EIO; | ||
260 | dir_de = exofs_dotdot(old_inode, &dir_page); | ||
261 | if (!dir_de) | ||
262 | goto out_old; | ||
263 | } | ||
264 | |||
265 | if (new_inode) { | ||
266 | struct page *new_page; | ||
267 | struct exofs_dir_entry *new_de; | ||
268 | |||
269 | err = -ENOTEMPTY; | ||
270 | if (dir_de && !exofs_empty_dir(new_inode)) | ||
271 | goto out_dir; | ||
272 | |||
273 | err = -ENOENT; | ||
274 | new_de = exofs_find_entry(new_dir, new_dentry, &new_page); | ||
275 | if (!new_de) | ||
276 | goto out_dir; | ||
277 | inode_inc_link_count(old_inode); | ||
278 | err = exofs_set_link(new_dir, new_de, new_page, old_inode); | ||
279 | new_inode->i_ctime = CURRENT_TIME; | ||
280 | if (dir_de) | ||
281 | drop_nlink(new_inode); | ||
282 | inode_dec_link_count(new_inode); | ||
283 | if (err) | ||
284 | goto out_dir; | ||
285 | } else { | ||
286 | if (dir_de) { | ||
287 | err = -EMLINK; | ||
288 | if (new_dir->i_nlink >= EXOFS_LINK_MAX) | ||
289 | goto out_dir; | ||
290 | } | ||
291 | inode_inc_link_count(old_inode); | ||
292 | err = exofs_add_link(new_dentry, old_inode); | ||
293 | if (err) { | ||
294 | inode_dec_link_count(old_inode); | ||
295 | goto out_dir; | ||
296 | } | ||
297 | if (dir_de) | ||
298 | inode_inc_link_count(new_dir); | ||
299 | } | ||
300 | |||
301 | old_inode->i_ctime = CURRENT_TIME; | ||
302 | |||
303 | exofs_delete_entry(old_de, old_page); | ||
304 | inode_dec_link_count(old_inode); | ||
305 | |||
306 | if (dir_de) { | ||
307 | err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); | ||
308 | inode_dec_link_count(old_dir); | ||
309 | if (err) | ||
310 | goto out_dir; | ||
311 | } | ||
312 | return 0; | ||
313 | |||
314 | |||
315 | out_dir: | ||
316 | if (dir_de) { | ||
317 | kunmap(dir_page); | ||
318 | page_cache_release(dir_page); | ||
319 | } | ||
320 | out_old: | ||
321 | kunmap(old_page); | ||
322 | page_cache_release(old_page); | ||
323 | out: | ||
324 | return err; | ||
325 | } | ||
326 | |||
327 | const struct inode_operations exofs_dir_inode_operations = { | ||
328 | .create = exofs_create, | ||
329 | .lookup = exofs_lookup, | ||
330 | .link = exofs_link, | ||
331 | .unlink = exofs_unlink, | ||
332 | .symlink = exofs_symlink, | ||
333 | .mkdir = exofs_mkdir, | ||
334 | .rmdir = exofs_rmdir, | ||
335 | .mknod = exofs_mknod, | ||
336 | .rename = exofs_rename, | ||
337 | .setattr = exofs_setattr, | ||
338 | }; | ||
339 | |||
340 | const struct inode_operations exofs_special_inode_operations = { | ||
341 | .setattr = exofs_setattr, | ||
342 | }; | ||
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c new file mode 100644 index 000000000000..b249ae97fb15 --- /dev/null +++ b/fs/exofs/osd.c | |||
@@ -0,0 +1,153 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * This file is part of exofs. | ||
10 | * | ||
11 | * exofs is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
14 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
15 | * version of GPL for exofs is version 2. | ||
16 | * | ||
17 | * exofs is distributed in the hope that it will be useful, | ||
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
20 | * GNU General Public License for more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License | ||
23 | * along with exofs; if not, write to the Free Software | ||
24 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
25 | */ | ||
26 | |||
27 | #include <scsi/scsi_device.h> | ||
28 | #include <scsi/osd_sense.h> | ||
29 | |||
30 | #include "exofs.h" | ||
31 | |||
32 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) | ||
33 | { | ||
34 | struct osd_sense_info osi; | ||
35 | int ret = osd_req_decode_sense(or, &osi); | ||
36 | |||
37 | if (ret) { /* translate to Linux codes */ | ||
38 | if (osi.additional_code == scsi_invalid_field_in_cdb) { | ||
39 | if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) | ||
40 | ret = -EFAULT; | ||
41 | if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) | ||
42 | ret = -ENOENT; | ||
43 | else | ||
44 | ret = -EINVAL; | ||
45 | } else if (osi.additional_code == osd_quota_error) | ||
46 | ret = -ENOSPC; | ||
47 | else | ||
48 | ret = -EIO; | ||
49 | } | ||
50 | |||
51 | /* FIXME: should be include in osd_sense_info */ | ||
52 | if (in_resid) | ||
53 | *in_resid = or->in.req ? or->in.req->data_len : 0; | ||
54 | |||
55 | if (out_resid) | ||
56 | *out_resid = or->out.req ? or->out.req->data_len : 0; | ||
57 | |||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | ||
62 | { | ||
63 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * Perform a synchronous OSD operation. | ||
68 | */ | ||
69 | int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | ||
70 | { | ||
71 | int ret; | ||
72 | |||
73 | or->timeout = timeout; | ||
74 | ret = osd_finalize_request(or, 0, credential, NULL); | ||
75 | if (ret) { | ||
76 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
77 | return ret; | ||
78 | } | ||
79 | |||
80 | ret = osd_execute_request(or); | ||
81 | |||
82 | if (ret) | ||
83 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
84 | /* osd_req_decode_sense(or, ret); */ | ||
85 | return ret; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Perform an asynchronous OSD operation. | ||
90 | */ | ||
91 | int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done, | ||
92 | void *caller_context, u8 *cred) | ||
93 | { | ||
94 | int ret; | ||
95 | |||
96 | ret = osd_finalize_request(or, 0, cred, NULL); | ||
97 | if (ret) { | ||
98 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | ret = osd_execute_request_async(or, async_done, caller_context); | ||
103 | |||
104 | if (ret) | ||
105 | EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret); | ||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | ||
110 | { | ||
111 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | ||
112 | void *iter = NULL; | ||
113 | int nelem; | ||
114 | |||
115 | do { | ||
116 | nelem = 1; | ||
117 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | ||
118 | if ((cur_attr.attr_page == attr->attr_page) && | ||
119 | (cur_attr.attr_id == attr->attr_id)) { | ||
120 | attr->len = cur_attr.len; | ||
121 | attr->val_ptr = cur_attr.val_ptr; | ||
122 | return 0; | ||
123 | } | ||
124 | } while (iter); | ||
125 | |||
126 | return -EIO; | ||
127 | } | ||
128 | |||
129 | int osd_req_read_kern(struct osd_request *or, | ||
130 | const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) | ||
131 | { | ||
132 | struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; | ||
133 | struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); | ||
134 | |||
135 | if (!bio) | ||
136 | return -ENOMEM; | ||
137 | |||
138 | osd_req_read(or, obj, bio, offset); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | int osd_req_write_kern(struct osd_request *or, | ||
143 | const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) | ||
144 | { | ||
145 | struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; | ||
146 | struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); | ||
147 | |||
148 | if (!bio) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | osd_req_write(or, obj, bio, offset); | ||
152 | return 0; | ||
153 | } | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c new file mode 100644 index 000000000000..9f1985e857e2 --- /dev/null +++ b/fs/exofs/super.c | |||
@@ -0,0 +1,584 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/string.h> | ||
37 | #include <linux/parser.h> | ||
38 | #include <linux/vfs.h> | ||
39 | #include <linux/random.h> | ||
40 | #include <linux/exportfs.h> | ||
41 | |||
42 | #include "exofs.h" | ||
43 | |||
44 | /****************************************************************************** | ||
45 | * MOUNT OPTIONS | ||
46 | *****************************************************************************/ | ||
47 | |||
48 | /* | ||
49 | * struct to hold what we get from mount options | ||
50 | */ | ||
51 | struct exofs_mountopt { | ||
52 | const char *dev_name; | ||
53 | uint64_t pid; | ||
54 | int timeout; | ||
55 | }; | ||
56 | |||
57 | /* | ||
58 | * exofs-specific mount-time options. | ||
59 | */ | ||
60 | enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; | ||
61 | |||
62 | /* | ||
63 | * Our mount-time options. These should ideally be 64-bit unsigned, but the | ||
64 | * kernel's parsing functions do not currently support that. 32-bit should be | ||
65 | * sufficient for most applications now. | ||
66 | */ | ||
67 | static match_table_t tokens = { | ||
68 | {Opt_pid, "pid=%u"}, | ||
69 | {Opt_to, "to=%u"}, | ||
70 | {Opt_err, NULL} | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * The main option parsing method. Also makes sure that all of the mandatory | ||
75 | * mount options were set. | ||
76 | */ | ||
77 | static int parse_options(char *options, struct exofs_mountopt *opts) | ||
78 | { | ||
79 | char *p; | ||
80 | substring_t args[MAX_OPT_ARGS]; | ||
81 | int option; | ||
82 | bool s_pid = false; | ||
83 | |||
84 | EXOFS_DBGMSG("parse_options %s\n", options); | ||
85 | /* defaults */ | ||
86 | memset(opts, 0, sizeof(*opts)); | ||
87 | opts->timeout = BLK_DEFAULT_SG_TIMEOUT; | ||
88 | |||
89 | while ((p = strsep(&options, ",")) != NULL) { | ||
90 | int token; | ||
91 | char str[32]; | ||
92 | |||
93 | if (!*p) | ||
94 | continue; | ||
95 | |||
96 | token = match_token(p, tokens, args); | ||
97 | switch (token) { | ||
98 | case Opt_pid: | ||
99 | if (0 == match_strlcpy(str, &args[0], sizeof(str))) | ||
100 | return -EINVAL; | ||
101 | opts->pid = simple_strtoull(str, NULL, 0); | ||
102 | if (opts->pid < EXOFS_MIN_PID) { | ||
103 | EXOFS_ERR("Partition ID must be >= %u", | ||
104 | EXOFS_MIN_PID); | ||
105 | return -EINVAL; | ||
106 | } | ||
107 | s_pid = 1; | ||
108 | break; | ||
109 | case Opt_to: | ||
110 | if (match_int(&args[0], &option)) | ||
111 | return -EINVAL; | ||
112 | if (option <= 0) { | ||
113 | EXOFS_ERR("Timout must be > 0"); | ||
114 | return -EINVAL; | ||
115 | } | ||
116 | opts->timeout = option * HZ; | ||
117 | break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | if (!s_pid) { | ||
122 | EXOFS_ERR("Need to specify the following options:\n"); | ||
123 | EXOFS_ERR(" -o pid=pid_no_to_use\n"); | ||
124 | return -EINVAL; | ||
125 | } | ||
126 | |||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | /****************************************************************************** | ||
131 | * INODE CACHE | ||
132 | *****************************************************************************/ | ||
133 | |||
134 | /* | ||
135 | * Our inode cache. Isn't it pretty? | ||
136 | */ | ||
137 | static struct kmem_cache *exofs_inode_cachep; | ||
138 | |||
139 | /* | ||
140 | * Allocate an inode in the cache | ||
141 | */ | ||
142 | static struct inode *exofs_alloc_inode(struct super_block *sb) | ||
143 | { | ||
144 | struct exofs_i_info *oi; | ||
145 | |||
146 | oi = kmem_cache_alloc(exofs_inode_cachep, GFP_KERNEL); | ||
147 | if (!oi) | ||
148 | return NULL; | ||
149 | |||
150 | oi->vfs_inode.i_version = 1; | ||
151 | return &oi->vfs_inode; | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Remove an inode from the cache | ||
156 | */ | ||
157 | static void exofs_destroy_inode(struct inode *inode) | ||
158 | { | ||
159 | kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * Initialize the inode | ||
164 | */ | ||
165 | static void exofs_init_once(void *foo) | ||
166 | { | ||
167 | struct exofs_i_info *oi = foo; | ||
168 | |||
169 | inode_init_once(&oi->vfs_inode); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Create and initialize the inode cache | ||
174 | */ | ||
175 | static int init_inodecache(void) | ||
176 | { | ||
177 | exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", | ||
178 | sizeof(struct exofs_i_info), 0, | ||
179 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
180 | exofs_init_once); | ||
181 | if (exofs_inode_cachep == NULL) | ||
182 | return -ENOMEM; | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Destroy the inode cache | ||
188 | */ | ||
189 | static void destroy_inodecache(void) | ||
190 | { | ||
191 | kmem_cache_destroy(exofs_inode_cachep); | ||
192 | } | ||
193 | |||
194 | /****************************************************************************** | ||
195 | * SUPERBLOCK FUNCTIONS | ||
196 | *****************************************************************************/ | ||
197 | static const struct super_operations exofs_sops; | ||
198 | static const struct export_operations exofs_export_ops; | ||
199 | |||
200 | /* | ||
201 | * Write the superblock to the OSD | ||
202 | */ | ||
203 | static void exofs_write_super(struct super_block *sb) | ||
204 | { | ||
205 | struct exofs_sb_info *sbi; | ||
206 | struct exofs_fscb *fscb; | ||
207 | struct osd_request *or; | ||
208 | struct osd_obj_id obj; | ||
209 | int ret; | ||
210 | |||
211 | fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); | ||
212 | if (!fscb) { | ||
213 | EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | lock_kernel(); | ||
218 | sbi = sb->s_fs_info; | ||
219 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | ||
220 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); | ||
221 | fscb->s_magic = cpu_to_le16(sb->s_magic); | ||
222 | fscb->s_newfs = 0; | ||
223 | |||
224 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
225 | if (unlikely(!or)) { | ||
226 | EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); | ||
227 | goto out; | ||
228 | } | ||
229 | |||
230 | obj.partition = sbi->s_pid; | ||
231 | obj.id = EXOFS_SUPER_ID; | ||
232 | ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb)); | ||
233 | if (unlikely(ret)) { | ||
234 | EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); | ||
235 | goto out; | ||
236 | } | ||
237 | |||
238 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
239 | if (unlikely(ret)) { | ||
240 | EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n"); | ||
241 | goto out; | ||
242 | } | ||
243 | sb->s_dirt = 0; | ||
244 | |||
245 | out: | ||
246 | if (or) | ||
247 | osd_end_request(or); | ||
248 | unlock_kernel(); | ||
249 | kfree(fscb); | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * This function is called when the vfs is freeing the superblock. We just | ||
254 | * need to free our own part. | ||
255 | */ | ||
256 | static void exofs_put_super(struct super_block *sb) | ||
257 | { | ||
258 | int num_pend; | ||
259 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
260 | |||
261 | /* make sure there are no pending commands */ | ||
262 | for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; | ||
263 | num_pend = atomic_read(&sbi->s_curr_pending)) { | ||
264 | wait_queue_head_t wq; | ||
265 | init_waitqueue_head(&wq); | ||
266 | wait_event_timeout(wq, | ||
267 | (atomic_read(&sbi->s_curr_pending) == 0), | ||
268 | msecs_to_jiffies(100)); | ||
269 | } | ||
270 | |||
271 | osduld_put_device(sbi->s_dev); | ||
272 | kfree(sb->s_fs_info); | ||
273 | sb->s_fs_info = NULL; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Read the superblock from the OSD and fill in the fields | ||
278 | */ | ||
279 | static int exofs_fill_super(struct super_block *sb, void *data, int silent) | ||
280 | { | ||
281 | struct inode *root; | ||
282 | struct exofs_mountopt *opts = data; | ||
283 | struct exofs_sb_info *sbi; /*extended info */ | ||
284 | struct exofs_fscb fscb; /*on-disk superblock info */ | ||
285 | struct osd_request *or = NULL; | ||
286 | struct osd_obj_id obj; | ||
287 | int ret; | ||
288 | |||
289 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | ||
290 | if (!sbi) | ||
291 | return -ENOMEM; | ||
292 | sb->s_fs_info = sbi; | ||
293 | |||
294 | /* use mount options to fill superblock */ | ||
295 | sbi->s_dev = osduld_path_lookup(opts->dev_name); | ||
296 | if (IS_ERR(sbi->s_dev)) { | ||
297 | ret = PTR_ERR(sbi->s_dev); | ||
298 | sbi->s_dev = NULL; | ||
299 | goto free_sbi; | ||
300 | } | ||
301 | |||
302 | sbi->s_pid = opts->pid; | ||
303 | sbi->s_timeout = opts->timeout; | ||
304 | |||
305 | /* fill in some other data by hand */ | ||
306 | memset(sb->s_id, 0, sizeof(sb->s_id)); | ||
307 | strcpy(sb->s_id, "exofs"); | ||
308 | sb->s_blocksize = EXOFS_BLKSIZE; | ||
309 | sb->s_blocksize_bits = EXOFS_BLKSHIFT; | ||
310 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
311 | atomic_set(&sbi->s_curr_pending, 0); | ||
312 | sb->s_bdev = NULL; | ||
313 | sb->s_dev = 0; | ||
314 | |||
315 | /* read data from on-disk superblock object */ | ||
316 | obj.partition = sbi->s_pid; | ||
317 | obj.id = EXOFS_SUPER_ID; | ||
318 | exofs_make_credential(sbi->s_cred, &obj); | ||
319 | |||
320 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
321 | if (unlikely(!or)) { | ||
322 | if (!silent) | ||
323 | EXOFS_ERR( | ||
324 | "exofs_fill_super: osd_start_request failed.\n"); | ||
325 | ret = -ENOMEM; | ||
326 | goto free_sbi; | ||
327 | } | ||
328 | ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb)); | ||
329 | if (unlikely(ret)) { | ||
330 | if (!silent) | ||
331 | EXOFS_ERR( | ||
332 | "exofs_fill_super: osd_req_read_kern failed.\n"); | ||
333 | ret = -ENOMEM; | ||
334 | goto free_sbi; | ||
335 | } | ||
336 | |||
337 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
338 | if (unlikely(ret)) { | ||
339 | if (!silent) | ||
340 | EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n"); | ||
341 | ret = -EIO; | ||
342 | goto free_sbi; | ||
343 | } | ||
344 | |||
345 | sb->s_magic = le16_to_cpu(fscb.s_magic); | ||
346 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); | ||
347 | sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); | ||
348 | |||
349 | /* make sure what we read from the object store is correct */ | ||
350 | if (sb->s_magic != EXOFS_SUPER_MAGIC) { | ||
351 | if (!silent) | ||
352 | EXOFS_ERR("ERROR: Bad magic value\n"); | ||
353 | ret = -EINVAL; | ||
354 | goto free_sbi; | ||
355 | } | ||
356 | |||
357 | /* start generation numbers from a random point */ | ||
358 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | ||
359 | spin_lock_init(&sbi->s_next_gen_lock); | ||
360 | |||
361 | /* set up operation vectors */ | ||
362 | sb->s_op = &exofs_sops; | ||
363 | sb->s_export_op = &exofs_export_ops; | ||
364 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); | ||
365 | if (IS_ERR(root)) { | ||
366 | EXOFS_ERR("ERROR: exofs_iget failed\n"); | ||
367 | ret = PTR_ERR(root); | ||
368 | goto free_sbi; | ||
369 | } | ||
370 | sb->s_root = d_alloc_root(root); | ||
371 | if (!sb->s_root) { | ||
372 | iput(root); | ||
373 | EXOFS_ERR("ERROR: get root inode failed\n"); | ||
374 | ret = -ENOMEM; | ||
375 | goto free_sbi; | ||
376 | } | ||
377 | |||
378 | if (!S_ISDIR(root->i_mode)) { | ||
379 | dput(sb->s_root); | ||
380 | sb->s_root = NULL; | ||
381 | EXOFS_ERR("ERROR: corrupt root inode (mode = %hd)\n", | ||
382 | root->i_mode); | ||
383 | ret = -EINVAL; | ||
384 | goto free_sbi; | ||
385 | } | ||
386 | |||
387 | ret = 0; | ||
388 | out: | ||
389 | if (or) | ||
390 | osd_end_request(or); | ||
391 | return ret; | ||
392 | |||
393 | free_sbi: | ||
394 | osduld_put_device(sbi->s_dev); /* NULL safe */ | ||
395 | kfree(sbi); | ||
396 | goto out; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Set up the superblock (calls exofs_fill_super eventually) | ||
401 | */ | ||
402 | static int exofs_get_sb(struct file_system_type *type, | ||
403 | int flags, const char *dev_name, | ||
404 | void *data, struct vfsmount *mnt) | ||
405 | { | ||
406 | struct exofs_mountopt opts; | ||
407 | int ret; | ||
408 | |||
409 | ret = parse_options(data, &opts); | ||
410 | if (ret) | ||
411 | return ret; | ||
412 | |||
413 | opts.dev_name = dev_name; | ||
414 | return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt); | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Return information about the file system state in the buffer. This is used | ||
419 | * by the 'df' command, for example. | ||
420 | */ | ||
421 | static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
422 | { | ||
423 | struct super_block *sb = dentry->d_sb; | ||
424 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
425 | struct osd_obj_id obj = {sbi->s_pid, 0}; | ||
426 | struct osd_attr attrs[] = { | ||
427 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, | ||
428 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), | ||
429 | ATTR_DEF(OSD_APAGE_PARTITION_INFORMATION, | ||
430 | OSD_ATTR_PI_USED_CAPACITY, sizeof(__be64)), | ||
431 | }; | ||
432 | uint64_t capacity = ULLONG_MAX; | ||
433 | uint64_t used = ULLONG_MAX; | ||
434 | struct osd_request *or; | ||
435 | uint8_t cred_a[OSD_CAP_LEN]; | ||
436 | int ret; | ||
437 | |||
438 | /* get used/capacity attributes */ | ||
439 | exofs_make_credential(cred_a, &obj); | ||
440 | |||
441 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
442 | if (unlikely(!or)) { | ||
443 | EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n"); | ||
444 | return -ENOMEM; | ||
445 | } | ||
446 | |||
447 | osd_req_get_attributes(or, &obj); | ||
448 | osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); | ||
449 | ret = exofs_sync_op(or, sbi->s_timeout, cred_a); | ||
450 | if (unlikely(ret)) | ||
451 | goto out; | ||
452 | |||
453 | ret = extract_attr_from_req(or, &attrs[0]); | ||
454 | if (likely(!ret)) | ||
455 | capacity = get_unaligned_be64(attrs[0].val_ptr); | ||
456 | else | ||
457 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); | ||
458 | |||
459 | ret = extract_attr_from_req(or, &attrs[1]); | ||
460 | if (likely(!ret)) | ||
461 | used = get_unaligned_be64(attrs[1].val_ptr); | ||
462 | else | ||
463 | EXOFS_DBGMSG("exofs_statfs: get used-space failed.\n"); | ||
464 | |||
465 | /* fill in the stats buffer */ | ||
466 | buf->f_type = EXOFS_SUPER_MAGIC; | ||
467 | buf->f_bsize = EXOFS_BLKSIZE; | ||
468 | buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); | ||
469 | buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); | ||
470 | buf->f_bavail = buf->f_bfree; | ||
471 | buf->f_files = sbi->s_numfiles; | ||
472 | buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; | ||
473 | buf->f_namelen = EXOFS_NAME_LEN; | ||
474 | |||
475 | out: | ||
476 | osd_end_request(or); | ||
477 | return ret; | ||
478 | } | ||
479 | |||
480 | static const struct super_operations exofs_sops = { | ||
481 | .alloc_inode = exofs_alloc_inode, | ||
482 | .destroy_inode = exofs_destroy_inode, | ||
483 | .write_inode = exofs_write_inode, | ||
484 | .delete_inode = exofs_delete_inode, | ||
485 | .put_super = exofs_put_super, | ||
486 | .write_super = exofs_write_super, | ||
487 | .statfs = exofs_statfs, | ||
488 | }; | ||
489 | |||
490 | /****************************************************************************** | ||
491 | * EXPORT OPERATIONS | ||
492 | *****************************************************************************/ | ||
493 | |||
494 | struct dentry *exofs_get_parent(struct dentry *child) | ||
495 | { | ||
496 | unsigned long ino = exofs_parent_ino(child); | ||
497 | |||
498 | if (!ino) | ||
499 | return NULL; | ||
500 | |||
501 | return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino)); | ||
502 | } | ||
503 | |||
504 | static struct inode *exofs_nfs_get_inode(struct super_block *sb, | ||
505 | u64 ino, u32 generation) | ||
506 | { | ||
507 | struct inode *inode; | ||
508 | |||
509 | inode = exofs_iget(sb, ino); | ||
510 | if (IS_ERR(inode)) | ||
511 | return ERR_CAST(inode); | ||
512 | if (generation && inode->i_generation != generation) { | ||
513 | /* we didn't find the right inode.. */ | ||
514 | iput(inode); | ||
515 | return ERR_PTR(-ESTALE); | ||
516 | } | ||
517 | return inode; | ||
518 | } | ||
519 | |||
520 | static struct dentry *exofs_fh_to_dentry(struct super_block *sb, | ||
521 | struct fid *fid, int fh_len, int fh_type) | ||
522 | { | ||
523 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
524 | exofs_nfs_get_inode); | ||
525 | } | ||
526 | |||
527 | static struct dentry *exofs_fh_to_parent(struct super_block *sb, | ||
528 | struct fid *fid, int fh_len, int fh_type) | ||
529 | { | ||
530 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
531 | exofs_nfs_get_inode); | ||
532 | } | ||
533 | |||
534 | static const struct export_operations exofs_export_ops = { | ||
535 | .fh_to_dentry = exofs_fh_to_dentry, | ||
536 | .fh_to_parent = exofs_fh_to_parent, | ||
537 | .get_parent = exofs_get_parent, | ||
538 | }; | ||
539 | |||
540 | /****************************************************************************** | ||
541 | * INSMOD/RMMOD | ||
542 | *****************************************************************************/ | ||
543 | |||
544 | /* | ||
545 | * struct that describes this file system | ||
546 | */ | ||
547 | static struct file_system_type exofs_type = { | ||
548 | .owner = THIS_MODULE, | ||
549 | .name = "exofs", | ||
550 | .get_sb = exofs_get_sb, | ||
551 | .kill_sb = generic_shutdown_super, | ||
552 | }; | ||
553 | |||
554 | static int __init init_exofs(void) | ||
555 | { | ||
556 | int err; | ||
557 | |||
558 | err = init_inodecache(); | ||
559 | if (err) | ||
560 | goto out; | ||
561 | |||
562 | err = register_filesystem(&exofs_type); | ||
563 | if (err) | ||
564 | goto out_d; | ||
565 | |||
566 | return 0; | ||
567 | out_d: | ||
568 | destroy_inodecache(); | ||
569 | out: | ||
570 | return err; | ||
571 | } | ||
572 | |||
573 | static void __exit exit_exofs(void) | ||
574 | { | ||
575 | unregister_filesystem(&exofs_type); | ||
576 | destroy_inodecache(); | ||
577 | } | ||
578 | |||
579 | MODULE_AUTHOR("Avishay Traeger <avishay@gmail.com>"); | ||
580 | MODULE_DESCRIPTION("exofs"); | ||
581 | MODULE_LICENSE("GPL"); | ||
582 | |||
583 | module_init(init_exofs) | ||
584 | module_exit(exit_exofs) | ||
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c new file mode 100644 index 000000000000..36e2d7bc7f7b --- /dev/null +++ b/fs/exofs/symlink.c | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/namei.h> | ||
37 | |||
38 | #include "exofs.h" | ||
39 | |||
40 | static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
41 | { | ||
42 | struct exofs_i_info *oi = exofs_i(dentry->d_inode); | ||
43 | |||
44 | nd_set_link(nd, (char *)oi->i_data); | ||
45 | return NULL; | ||
46 | } | ||
47 | |||
48 | const struct inode_operations exofs_symlink_inode_operations = { | ||
49 | .readlink = generic_readlink, | ||
50 | .follow_link = page_follow_link_light, | ||
51 | .put_link = page_put_link, | ||
52 | }; | ||
53 | |||
54 | const struct inode_operations exofs_fast_symlink_inode_operations = { | ||
55 | .readlink = generic_readlink, | ||
56 | .follow_link = exofs_follow_link, | ||
57 | }; | ||