diff options
Diffstat (limited to 'fs/exofs')
| -rw-r--r-- | fs/exofs/Kbuild | 2 | ||||
| -rw-r--r-- | fs/exofs/common.h | 81 | ||||
| -rw-r--r-- | fs/exofs/exofs.h | 97 | ||||
| -rw-r--r-- | fs/exofs/inode.c | 409 | ||||
| -rw-r--r-- | fs/exofs/ios.c | 421 | ||||
| -rw-r--r-- | fs/exofs/osd.c | 125 | ||||
| -rw-r--r-- | fs/exofs/pnfs.h | 51 | ||||
| -rw-r--r-- | fs/exofs/super.c | 353 |
8 files changed, 1093 insertions, 446 deletions
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index cc2d22db119c..2d0f757fda3e 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild | |||
| @@ -12,5 +12,5 @@ | |||
| 12 | # Kbuild - Gets included from the Kernels Makefile and build system | 12 | # Kbuild - Gets included from the Kernels Makefile and build system |
| 13 | # | 13 | # |
| 14 | 14 | ||
| 15 | exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o | 15 | exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o |
| 16 | obj-$(CONFIG_EXOFS_FS) += exofs.o | 16 | obj-$(CONFIG_EXOFS_FS) += exofs.o |
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index c6718e4817fe..b1b178e61718 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ | 49 | #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ |
| 50 | #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ | 50 | #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ |
| 51 | #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ | 51 | #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ |
| 52 | #define EXOFS_DEVTABLE_ID 0x10001 /* object ID for on-disk device table */ | ||
| 52 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ | 53 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ |
| 53 | 54 | ||
| 54 | /* exofs Application specific page/attribute */ | 55 | /* exofs Application specific page/attribute */ |
| @@ -78,17 +79,67 @@ enum { | |||
| 78 | #define EXOFS_SUPER_MAGIC 0x5DF5 | 79 | #define EXOFS_SUPER_MAGIC 0x5DF5 |
| 79 | 80 | ||
| 80 | /* | 81 | /* |
| 81 | * The file system control block - stored in an object's data (mainly, the one | 82 | * The file system control block - stored in object EXOFS_SUPER_ID's data. |
| 82 | * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored | 83 | * This is where the in-memory superblock is stored on disk. |
| 83 | * on disk. Right now it just has a magic value, which is basically a sanity | ||
| 84 | * check on our ability to communicate with the object store. | ||
| 85 | */ | 84 | */ |
| 85 | enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; | ||
| 86 | struct exofs_fscb { | 86 | struct exofs_fscb { |
| 87 | __le64 s_nextid; /* Highest object ID used */ | 87 | __le64 s_nextid; /* Highest object ID used */ |
| 88 | __le32 s_numfiles; /* Number of files on fs */ | 88 | __le64 s_numfiles; /* Number of files on fs */ |
| 89 | __le32 s_version; /* == EXOFS_FSCB_VER */ | ||
| 89 | __le16 s_magic; /* Magic signature */ | 90 | __le16 s_magic; /* Magic signature */ |
| 90 | __le16 s_newfs; /* Non-zero if this is a new fs */ | 91 | __le16 s_newfs; /* Non-zero if this is a new fs */ |
| 91 | }; | 92 | |
| 93 | /* From here on it's a static part, only written by mkexofs */ | ||
| 94 | __le64 s_dev_table_oid; /* Resurved, not used */ | ||
| 95 | __le64 s_dev_table_count; /* == 0 means no dev_table */ | ||
| 96 | } __packed; | ||
| 97 | |||
| 98 | /* | ||
| 99 | * Describes the raid used in the FS. It is part of the device table. | ||
| 100 | * This here is taken from the pNFS-objects definition. In exofs we | ||
| 101 | * use one raid policy through-out the filesystem. (NOTE: the funny | ||
| 102 | * alignment at begining. We take care of it at exofs_device_table. | ||
| 103 | */ | ||
| 104 | struct exofs_dt_data_map { | ||
| 105 | __le32 cb_num_comps; | ||
| 106 | __le64 cb_stripe_unit; | ||
| 107 | __le32 cb_group_width; | ||
| 108 | __le32 cb_group_depth; | ||
| 109 | __le32 cb_mirror_cnt; | ||
| 110 | __le32 cb_raid_algorithm; | ||
| 111 | } __packed; | ||
| 112 | |||
| 113 | /* | ||
| 114 | * This is an osd device information descriptor. It is a single entry in | ||
| 115 | * the exofs device table. It describes an osd target lun which | ||
| 116 | * contains data belonging to this FS. (Same partition_id on all devices) | ||
| 117 | */ | ||
| 118 | struct exofs_dt_device_info { | ||
| 119 | __le32 systemid_len; | ||
| 120 | u8 systemid[OSD_SYSTEMID_LEN]; | ||
| 121 | __le64 long_name_offset; /* If !0 then offset-in-file */ | ||
| 122 | __le32 osdname_len; /* */ | ||
| 123 | u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ | ||
| 124 | } __packed; | ||
| 125 | |||
| 126 | /* | ||
| 127 | * The EXOFS device table - stored in object EXOFS_DEVTABLE_ID's data. | ||
| 128 | * It contains the raid used for this multy-device FS and an array of | ||
| 129 | * participating devices. | ||
| 130 | */ | ||
| 131 | struct exofs_device_table { | ||
| 132 | __le32 dt_version; /* == EXOFS_DT_VER */ | ||
| 133 | struct exofs_dt_data_map dt_data_map; /* Raid policy to use */ | ||
| 134 | |||
| 135 | /* Resurved space For future use. Total includeing this: | ||
| 136 | * (8 * sizeof(le64)) | ||
| 137 | */ | ||
| 138 | __le64 __Resurved[4]; | ||
| 139 | |||
| 140 | __le64 dt_num_devices; /* Array size */ | ||
| 141 | struct exofs_dt_device_info dt_dev_table[]; /* Array of devices */ | ||
| 142 | } __packed; | ||
| 92 | 143 | ||
| 93 | /**************************************************************************** | 144 | /**************************************************************************** |
| 94 | * inode-related things | 145 | * inode-related things |
| @@ -155,22 +206,4 @@ enum { | |||
| 155 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ | 206 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ |
| 156 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) | 207 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) |
| 157 | 208 | ||
| 158 | /************************* | ||
| 159 | * function declarations * | ||
| 160 | *************************/ | ||
| 161 | /* osd.c */ | ||
| 162 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
| 163 | const struct osd_obj_id *obj); | ||
| 164 | |||
| 165 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid); | ||
| 166 | static inline int exofs_check_ok(struct osd_request *or) | ||
| 167 | { | ||
| 168 | return exofs_check_ok_resid(or, NULL, NULL); | ||
| 169 | } | ||
| 170 | int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred); | ||
| 171 | int exofs_async_op(struct osd_request *or, | ||
| 172 | osd_req_done_fn *async_done, void *caller_context, u8 *cred); | ||
| 173 | |||
| 174 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); | ||
| 175 | |||
| 176 | #endif /*ifndef __EXOFS_COM_H__*/ | 209 | #endif /*ifndef __EXOFS_COM_H__*/ |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 5ec72e020b22..c35fd4623986 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
| @@ -30,13 +30,17 @@ | |||
| 30 | * along with exofs; if not, write to the Free Software | 30 | * along with exofs; if not, write to the Free Software |
| 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 32 | */ | 32 | */ |
| 33 | #ifndef __EXOFS_H__ | ||
| 34 | #define __EXOFS_H__ | ||
| 33 | 35 | ||
| 34 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
| 35 | #include <linux/time.h> | 37 | #include <linux/time.h> |
| 36 | #include "common.h" | 38 | #include "common.h" |
| 37 | 39 | ||
| 38 | #ifndef __EXOFS_H__ | 40 | /* FIXME: Remove once pnfs hits mainline |
| 39 | #define __EXOFS_H__ | 41 | * #include <linux/exportfs/pnfs_osd_xdr.h> |
| 42 | */ | ||
| 43 | #include "pnfs.h" | ||
| 40 | 44 | ||
| 41 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) | 45 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) |
| 42 | 46 | ||
| @@ -55,7 +59,7 @@ | |||
| 55 | * our extension to the in-memory superblock | 59 | * our extension to the in-memory superblock |
| 56 | */ | 60 | */ |
| 57 | struct exofs_sb_info { | 61 | struct exofs_sb_info { |
| 58 | struct osd_dev *s_dev; /* returned by get_osd_dev */ | 62 | struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ |
| 59 | osd_id s_pid; /* partition ID of file system*/ | 63 | osd_id s_pid; /* partition ID of file system*/ |
| 60 | int s_timeout; /* timeout for OSD operations */ | 64 | int s_timeout; /* timeout for OSD operations */ |
| 61 | uint64_t s_nextid; /* highest object ID used */ | 65 | uint64_t s_nextid; /* highest object ID used */ |
| @@ -63,7 +67,11 @@ struct exofs_sb_info { | |||
| 63 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ | 67 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ |
| 64 | u32 s_next_generation; /* next gen # to use */ | 68 | u32 s_next_generation; /* next gen # to use */ |
| 65 | atomic_t s_curr_pending; /* number of pending commands */ | 69 | atomic_t s_curr_pending; /* number of pending commands */ |
| 66 | uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ | 70 | uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ |
| 71 | |||
| 72 | struct pnfs_osd_data_map data_map; /* Default raid to use */ | ||
| 73 | unsigned s_numdevs; /* Num of devices in array */ | ||
| 74 | struct osd_dev *s_ods[1]; /* Variable length, minimum 1 */ | ||
| 67 | }; | 75 | }; |
| 68 | 76 | ||
| 69 | /* | 77 | /* |
| @@ -79,6 +87,50 @@ struct exofs_i_info { | |||
| 79 | struct inode vfs_inode; /* normal in-memory inode */ | 87 | struct inode vfs_inode; /* normal in-memory inode */ |
| 80 | }; | 88 | }; |
| 81 | 89 | ||
| 90 | static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) | ||
| 91 | { | ||
| 92 | return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; | ||
| 93 | } | ||
| 94 | |||
| 95 | struct exofs_io_state; | ||
| 96 | typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); | ||
| 97 | |||
| 98 | struct exofs_io_state { | ||
| 99 | struct kref kref; | ||
| 100 | |||
| 101 | void *private; | ||
| 102 | exofs_io_done_fn done; | ||
| 103 | |||
| 104 | struct exofs_sb_info *sbi; | ||
| 105 | struct osd_obj_id obj; | ||
| 106 | u8 *cred; | ||
| 107 | |||
| 108 | /* Global read/write IO*/ | ||
| 109 | loff_t offset; | ||
| 110 | unsigned long length; | ||
| 111 | void *kern_buff; | ||
| 112 | struct bio *bio; | ||
| 113 | |||
| 114 | /* Attributes */ | ||
| 115 | unsigned in_attr_len; | ||
| 116 | struct osd_attr *in_attr; | ||
| 117 | unsigned out_attr_len; | ||
| 118 | struct osd_attr *out_attr; | ||
| 119 | |||
| 120 | /* Variable array of size numdevs */ | ||
| 121 | unsigned numdevs; | ||
| 122 | struct exofs_per_dev_state { | ||
| 123 | struct osd_request *or; | ||
| 124 | struct bio *bio; | ||
| 125 | } per_dev[]; | ||
| 126 | }; | ||
| 127 | |||
| 128 | static inline unsigned exofs_io_state_size(unsigned numdevs) | ||
| 129 | { | ||
| 130 | return sizeof(struct exofs_io_state) + | ||
| 131 | sizeof(struct exofs_per_dev_state) * numdevs; | ||
| 132 | } | ||
| 133 | |||
| 82 | /* | 134 | /* |
| 83 | * our inode flags | 135 | * our inode flags |
| 84 | */ | 136 | */ |
| @@ -130,6 +182,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) | |||
| 130 | /************************* | 182 | /************************* |
| 131 | * function declarations * | 183 | * function declarations * |
| 132 | *************************/ | 184 | *************************/ |
| 185 | |||
| 186 | /* ios.c */ | ||
| 187 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
| 188 | const struct osd_obj_id *obj); | ||
| 189 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | ||
| 190 | u64 offset, void *p, unsigned length); | ||
| 191 | |||
| 192 | int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios); | ||
| 193 | void exofs_put_io_state(struct exofs_io_state *ios); | ||
| 194 | |||
| 195 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid); | ||
| 196 | |||
| 197 | int exofs_sbi_create(struct exofs_io_state *ios); | ||
| 198 | int exofs_sbi_remove(struct exofs_io_state *ios); | ||
| 199 | int exofs_sbi_write(struct exofs_io_state *ios); | ||
| 200 | int exofs_sbi_read(struct exofs_io_state *ios); | ||
| 201 | |||
| 202 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); | ||
| 203 | |||
| 204 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); | ||
| 205 | static inline int exofs_oi_write(struct exofs_i_info *oi, | ||
| 206 | struct exofs_io_state *ios) | ||
| 207 | { | ||
| 208 | ios->obj.id = exofs_oi_objno(oi); | ||
| 209 | ios->cred = oi->i_cred; | ||
| 210 | return exofs_sbi_write(ios); | ||
| 211 | } | ||
| 212 | |||
| 213 | static inline int exofs_oi_read(struct exofs_i_info *oi, | ||
| 214 | struct exofs_io_state *ios) | ||
| 215 | { | ||
| 216 | ios->obj.id = exofs_oi_objno(oi); | ||
| 217 | ios->cred = oi->i_cred; | ||
| 218 | return exofs_sbi_read(ios); | ||
| 219 | } | ||
| 220 | |||
| 133 | /* inode.c */ | 221 | /* inode.c */ |
| 134 | void exofs_truncate(struct inode *inode); | 222 | void exofs_truncate(struct inode *inode); |
| 135 | int exofs_setattr(struct dentry *, struct iattr *); | 223 | int exofs_setattr(struct dentry *, struct iattr *); |
| @@ -169,6 +257,7 @@ extern const struct file_operations exofs_file_operations; | |||
| 169 | 257 | ||
| 170 | /* inode.c */ | 258 | /* inode.c */ |
| 171 | extern const struct address_space_operations exofs_aops; | 259 | extern const struct address_space_operations exofs_aops; |
| 260 | extern const struct osd_attr g_attr_logical_length; | ||
| 172 | 261 | ||
| 173 | /* namei.c */ | 262 | /* namei.c */ |
| 174 | extern const struct inode_operations exofs_dir_inode_operations; | 263 | extern const struct inode_operations exofs_dir_inode_operations; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 6c10f7476699..698a8636d39c 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
| @@ -37,15 +37,18 @@ | |||
| 37 | 37 | ||
| 38 | #include "exofs.h" | 38 | #include "exofs.h" |
| 39 | 39 | ||
| 40 | #ifdef CONFIG_EXOFS_DEBUG | 40 | #define EXOFS_DBGMSG2(M...) do {} while (0) |
| 41 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | 41 | |
| 42 | #endif | 42 | enum { BIO_MAX_PAGES_KMALLOC = |
| 43 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
| 44 | }; | ||
| 43 | 45 | ||
| 44 | struct page_collect { | 46 | struct page_collect { |
| 45 | struct exofs_sb_info *sbi; | 47 | struct exofs_sb_info *sbi; |
| 46 | struct request_queue *req_q; | 48 | struct request_queue *req_q; |
| 47 | struct inode *inode; | 49 | struct inode *inode; |
| 48 | unsigned expected_pages; | 50 | unsigned expected_pages; |
| 51 | struct exofs_io_state *ios; | ||
| 49 | 52 | ||
| 50 | struct bio *bio; | 53 | struct bio *bio; |
| 51 | unsigned nr_pages; | 54 | unsigned nr_pages; |
| @@ -54,22 +57,23 @@ struct page_collect { | |||
| 54 | }; | 57 | }; |
| 55 | 58 | ||
| 56 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 59 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
| 57 | struct inode *inode) | 60 | struct inode *inode) |
| 58 | { | 61 | { |
| 59 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 62 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
| 60 | 63 | ||
| 61 | pcol->sbi = sbi; | 64 | pcol->sbi = sbi; |
| 62 | pcol->req_q = osd_request_queue(sbi->s_dev); | 65 | /* Create master bios on first Q, later on cloning, each clone will be |
| 66 | * allocated on it's destination Q | ||
| 67 | */ | ||
| 68 | pcol->req_q = osd_request_queue(sbi->s_ods[0]); | ||
| 63 | pcol->inode = inode; | 69 | pcol->inode = inode; |
| 64 | pcol->expected_pages = expected_pages; | 70 | pcol->expected_pages = expected_pages; |
| 65 | 71 | ||
| 72 | pcol->ios = NULL; | ||
| 66 | pcol->bio = NULL; | 73 | pcol->bio = NULL; |
| 67 | pcol->nr_pages = 0; | 74 | pcol->nr_pages = 0; |
| 68 | pcol->length = 0; | 75 | pcol->length = 0; |
| 69 | pcol->pg_first = -1; | 76 | pcol->pg_first = -1; |
| 70 | |||
| 71 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
| 72 | expected_pages); | ||
| 73 | } | 77 | } |
| 74 | 78 | ||
| 75 | static void _pcol_reset(struct page_collect *pcol) | 79 | static void _pcol_reset(struct page_collect *pcol) |
| @@ -80,35 +84,49 @@ static void _pcol_reset(struct page_collect *pcol) | |||
| 80 | pcol->nr_pages = 0; | 84 | pcol->nr_pages = 0; |
| 81 | pcol->length = 0; | 85 | pcol->length = 0; |
| 82 | pcol->pg_first = -1; | 86 | pcol->pg_first = -1; |
| 83 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | 87 | pcol->ios = NULL; |
| 84 | pcol->inode->i_ino, pcol->expected_pages); | ||
| 85 | 88 | ||
| 86 | /* this is probably the end of the loop but in writes | 89 | /* this is probably the end of the loop but in writes |
| 87 | * it might not end here. don't be left with nothing | 90 | * it might not end here. don't be left with nothing |
| 88 | */ | 91 | */ |
| 89 | if (!pcol->expected_pages) | 92 | if (!pcol->expected_pages) |
| 90 | pcol->expected_pages = 128; | 93 | pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; |
| 91 | } | 94 | } |
| 92 | 95 | ||
| 93 | static int pcol_try_alloc(struct page_collect *pcol) | 96 | static int pcol_try_alloc(struct page_collect *pcol) |
| 94 | { | 97 | { |
| 95 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | 98 | int pages = min_t(unsigned, pcol->expected_pages, |
| 99 | BIO_MAX_PAGES_KMALLOC); | ||
| 100 | |||
| 101 | if (!pcol->ios) { /* First time allocate io_state */ | ||
| 102 | int ret = exofs_get_io_state(pcol->sbi, &pcol->ios); | ||
| 103 | |||
| 104 | if (ret) | ||
| 105 | return ret; | ||
| 106 | } | ||
| 96 | 107 | ||
| 97 | for (; pages; pages >>= 1) { | 108 | for (; pages; pages >>= 1) { |
| 98 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | 109 | pcol->bio = bio_kmalloc(GFP_KERNEL, pages); |
| 99 | if (likely(pcol->bio)) | 110 | if (likely(pcol->bio)) |
| 100 | return 0; | 111 | return 0; |
| 101 | } | 112 | } |
| 102 | 113 | ||
| 103 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | 114 | EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", |
| 104 | pcol->expected_pages); | 115 | pcol->expected_pages); |
| 105 | return -ENOMEM; | 116 | return -ENOMEM; |
| 106 | } | 117 | } |
| 107 | 118 | ||
| 108 | static void pcol_free(struct page_collect *pcol) | 119 | static void pcol_free(struct page_collect *pcol) |
| 109 | { | 120 | { |
| 110 | bio_put(pcol->bio); | 121 | if (pcol->bio) { |
| 111 | pcol->bio = NULL; | 122 | bio_put(pcol->bio); |
| 123 | pcol->bio = NULL; | ||
| 124 | } | ||
| 125 | |||
| 126 | if (pcol->ios) { | ||
| 127 | exofs_put_io_state(pcol->ios); | ||
| 128 | pcol->ios = NULL; | ||
| 129 | } | ||
| 112 | } | 130 | } |
| 113 | 131 | ||
| 114 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | 132 | static int pcol_add_page(struct page_collect *pcol, struct page *page, |
| @@ -161,22 +179,17 @@ static void update_write_page(struct page *page, int ret) | |||
| 161 | /* Called at the end of reads, to optionally unlock pages and update their | 179 | /* Called at the end of reads, to optionally unlock pages and update their |
| 162 | * status. | 180 | * status. |
| 163 | */ | 181 | */ |
| 164 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | 182 | static int __readpages_done(struct page_collect *pcol, bool do_unlock) |
| 165 | bool do_unlock) | ||
| 166 | { | 183 | { |
| 167 | struct bio_vec *bvec; | 184 | struct bio_vec *bvec; |
| 168 | int i; | 185 | int i; |
| 169 | u64 resid; | 186 | u64 resid; |
| 170 | u64 good_bytes; | 187 | u64 good_bytes; |
| 171 | u64 length = 0; | 188 | u64 length = 0; |
| 172 | int ret = exofs_check_ok_resid(or, &resid, NULL); | 189 | int ret = exofs_check_io(pcol->ios, &resid); |
| 173 | |||
| 174 | osd_end_request(or); | ||
| 175 | 190 | ||
| 176 | if (likely(!ret)) | 191 | if (likely(!ret)) |
| 177 | good_bytes = pcol->length; | 192 | good_bytes = pcol->length; |
| 178 | else if (!resid) | ||
| 179 | good_bytes = 0; | ||
| 180 | else | 193 | else |
| 181 | good_bytes = pcol->length - resid; | 194 | good_bytes = pcol->length - resid; |
| 182 | 195 | ||
| @@ -198,7 +211,7 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | |||
| 198 | else | 211 | else |
| 199 | page_stat = ret; | 212 | page_stat = ret; |
| 200 | 213 | ||
| 201 | EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | 214 | EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", |
| 202 | inode->i_ino, page->index, | 215 | inode->i_ino, page->index, |
| 203 | page_stat ? "bad_bytes" : "good_bytes"); | 216 | page_stat ? "bad_bytes" : "good_bytes"); |
| 204 | 217 | ||
| @@ -214,13 +227,13 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | |||
| 214 | } | 227 | } |
| 215 | 228 | ||
| 216 | /* callback of async reads */ | 229 | /* callback of async reads */ |
| 217 | static void readpages_done(struct osd_request *or, void *p) | 230 | static void readpages_done(struct exofs_io_state *ios, void *p) |
| 218 | { | 231 | { |
| 219 | struct page_collect *pcol = p; | 232 | struct page_collect *pcol = p; |
| 220 | 233 | ||
| 221 | __readpages_done(or, pcol, true); | 234 | __readpages_done(pcol, true); |
| 222 | atomic_dec(&pcol->sbi->s_curr_pending); | 235 | atomic_dec(&pcol->sbi->s_curr_pending); |
| 223 | kfree(p); | 236 | kfree(pcol); |
| 224 | } | 237 | } |
| 225 | 238 | ||
| 226 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | 239 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) |
| @@ -238,17 +251,13 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | |||
| 238 | 251 | ||
| 239 | unlock_page(page); | 252 | unlock_page(page); |
| 240 | } | 253 | } |
| 241 | pcol_free(pcol); | ||
| 242 | } | 254 | } |
| 243 | 255 | ||
| 244 | static int read_exec(struct page_collect *pcol, bool is_sync) | 256 | static int read_exec(struct page_collect *pcol, bool is_sync) |
| 245 | { | 257 | { |
| 246 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 258 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
| 247 | struct osd_obj_id obj = {pcol->sbi->s_pid, | 259 | struct exofs_io_state *ios = pcol->ios; |
| 248 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
| 249 | struct osd_request *or = NULL; | ||
| 250 | struct page_collect *pcol_copy = NULL; | 260 | struct page_collect *pcol_copy = NULL; |
| 251 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
| 252 | int ret; | 261 | int ret; |
| 253 | 262 | ||
| 254 | if (!pcol->bio) | 263 | if (!pcol->bio) |
| @@ -257,17 +266,13 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
| 257 | /* see comment in _readpage() about sync reads */ | 266 | /* see comment in _readpage() about sync reads */ |
| 258 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | 267 | WARN_ON(is_sync && (pcol->nr_pages != 1)); |
| 259 | 268 | ||
| 260 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | 269 | ios->bio = pcol->bio; |
| 261 | if (unlikely(!or)) { | 270 | ios->length = pcol->length; |
| 262 | ret = -ENOMEM; | 271 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; |
| 263 | goto err; | ||
| 264 | } | ||
| 265 | |||
| 266 | osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); | ||
| 267 | 272 | ||
| 268 | if (is_sync) { | 273 | if (is_sync) { |
| 269 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | 274 | exofs_oi_read(oi, pcol->ios); |
| 270 | return __readpages_done(or, pcol, false); | 275 | return __readpages_done(pcol, false); |
| 271 | } | 276 | } |
| 272 | 277 | ||
| 273 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 278 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
| @@ -277,14 +282,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
| 277 | } | 282 | } |
| 278 | 283 | ||
| 279 | *pcol_copy = *pcol; | 284 | *pcol_copy = *pcol; |
| 280 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | 285 | ios->done = readpages_done; |
| 286 | ios->private = pcol_copy; | ||
| 287 | ret = exofs_oi_read(oi, ios); | ||
| 281 | if (unlikely(ret)) | 288 | if (unlikely(ret)) |
| 282 | goto err; | 289 | goto err; |
| 283 | 290 | ||
| 284 | atomic_inc(&pcol->sbi->s_curr_pending); | 291 | atomic_inc(&pcol->sbi->s_curr_pending); |
| 285 | 292 | ||
| 286 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | 293 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", |
| 287 | obj.id, _LLU(i_start), pcol->length); | 294 | ios->obj.id, _LLU(ios->offset), pcol->length); |
| 288 | 295 | ||
| 289 | /* pages ownership was passed to pcol_copy */ | 296 | /* pages ownership was passed to pcol_copy */ |
| 290 | _pcol_reset(pcol); | 297 | _pcol_reset(pcol); |
| @@ -293,12 +300,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
| 293 | err: | 300 | err: |
| 294 | if (!is_sync) | 301 | if (!is_sync) |
| 295 | _unlock_pcol_pages(pcol, ret, READ); | 302 | _unlock_pcol_pages(pcol, ret, READ); |
| 296 | else /* Pages unlocked by caller in sync mode only free bio */ | 303 | |
| 297 | pcol_free(pcol); | 304 | pcol_free(pcol); |
| 298 | 305 | ||
| 299 | kfree(pcol_copy); | 306 | kfree(pcol_copy); |
| 300 | if (or) | ||
| 301 | osd_end_request(or); | ||
| 302 | return ret; | 307 | return ret; |
| 303 | } | 308 | } |
| 304 | 309 | ||
| @@ -370,12 +375,12 @@ try_again: | |||
| 370 | if (len != PAGE_CACHE_SIZE) | 375 | if (len != PAGE_CACHE_SIZE) |
| 371 | zero_user(page, len, PAGE_CACHE_SIZE - len); | 376 | zero_user(page, len, PAGE_CACHE_SIZE - len); |
| 372 | 377 | ||
| 373 | EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | 378 | EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", |
| 374 | inode->i_ino, page->index, len); | 379 | inode->i_ino, page->index, len); |
| 375 | 380 | ||
| 376 | ret = pcol_add_page(pcol, page, len); | 381 | ret = pcol_add_page(pcol, page, len); |
| 377 | if (ret) { | 382 | if (ret) { |
| 378 | EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | 383 | EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " |
| 379 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", | 384 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", |
| 380 | page, len, pcol->nr_pages, pcol->length); | 385 | page, len, pcol->nr_pages, pcol->length); |
| 381 | 386 | ||
| @@ -419,9 +424,8 @@ static int _readpage(struct page *page, bool is_sync) | |||
| 419 | 424 | ||
| 420 | _pcol_init(&pcol, 1, page->mapping->host); | 425 | _pcol_init(&pcol, 1, page->mapping->host); |
| 421 | 426 | ||
| 422 | /* readpage_strip might call read_exec(,async) inside at several places | 427 | /* readpage_strip might call read_exec(,is_sync==false) at several |
| 423 | * but this is safe for is_async=0 since read_exec will not do anything | 428 | * places but not if we have a single page. |
| 424 | * when we have a single page. | ||
| 425 | */ | 429 | */ |
| 426 | ret = readpage_strip(&pcol, page); | 430 | ret = readpage_strip(&pcol, page); |
| 427 | if (ret) { | 431 | if (ret) { |
| @@ -440,8 +444,8 @@ static int exofs_readpage(struct file *file, struct page *page) | |||
| 440 | return _readpage(page, false); | 444 | return _readpage(page, false); |
| 441 | } | 445 | } |
| 442 | 446 | ||
| 443 | /* Callback for osd_write. All writes are asynchronouse */ | 447 | /* Callback for osd_write. All writes are asynchronous */ |
| 444 | static void writepages_done(struct osd_request *or, void *p) | 448 | static void writepages_done(struct exofs_io_state *ios, void *p) |
| 445 | { | 449 | { |
| 446 | struct page_collect *pcol = p; | 450 | struct page_collect *pcol = p; |
| 447 | struct bio_vec *bvec; | 451 | struct bio_vec *bvec; |
| @@ -449,16 +453,12 @@ static void writepages_done(struct osd_request *or, void *p) | |||
| 449 | u64 resid; | 453 | u64 resid; |
| 450 | u64 good_bytes; | 454 | u64 good_bytes; |
| 451 | u64 length = 0; | 455 | u64 length = 0; |
| 456 | int ret = exofs_check_io(ios, &resid); | ||
| 452 | 457 | ||
| 453 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
| 454 | |||
| 455 | osd_end_request(or); | ||
| 456 | atomic_dec(&pcol->sbi->s_curr_pending); | 458 | atomic_dec(&pcol->sbi->s_curr_pending); |
| 457 | 459 | ||
| 458 | if (likely(!ret)) | 460 | if (likely(!ret)) |
| 459 | good_bytes = pcol->length; | 461 | good_bytes = pcol->length; |
| 460 | else if (!resid) | ||
| 461 | good_bytes = 0; | ||
| 462 | else | 462 | else |
| 463 | good_bytes = pcol->length - resid; | 463 | good_bytes = pcol->length - resid; |
| 464 | 464 | ||
| @@ -482,7 +482,7 @@ static void writepages_done(struct osd_request *or, void *p) | |||
| 482 | 482 | ||
| 483 | update_write_page(page, page_stat); | 483 | update_write_page(page, page_stat); |
| 484 | unlock_page(page); | 484 | unlock_page(page); |
| 485 | EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | 485 | EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", |
| 486 | inode->i_ino, page->index, page_stat); | 486 | inode->i_ino, page->index, page_stat); |
| 487 | 487 | ||
| 488 | length += bvec->bv_len; | 488 | length += bvec->bv_len; |
| @@ -496,23 +496,13 @@ static void writepages_done(struct osd_request *or, void *p) | |||
| 496 | static int write_exec(struct page_collect *pcol) | 496 | static int write_exec(struct page_collect *pcol) |
| 497 | { | 497 | { |
| 498 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 498 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
| 499 | struct osd_obj_id obj = {pcol->sbi->s_pid, | 499 | struct exofs_io_state *ios = pcol->ios; |
| 500 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
| 501 | struct osd_request *or = NULL; | ||
| 502 | struct page_collect *pcol_copy = NULL; | 500 | struct page_collect *pcol_copy = NULL; |
| 503 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
| 504 | int ret; | 501 | int ret; |
| 505 | 502 | ||
| 506 | if (!pcol->bio) | 503 | if (!pcol->bio) |
| 507 | return 0; | 504 | return 0; |
| 508 | 505 | ||
| 509 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
| 510 | if (unlikely(!or)) { | ||
| 511 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
| 512 | ret = -ENOMEM; | ||
| 513 | goto err; | ||
| 514 | } | ||
| 515 | |||
| 516 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 506 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
| 517 | if (!pcol_copy) { | 507 | if (!pcol_copy) { |
| 518 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | 508 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); |
| @@ -523,16 +513,22 @@ static int write_exec(struct page_collect *pcol) | |||
| 523 | *pcol_copy = *pcol; | 513 | *pcol_copy = *pcol; |
| 524 | 514 | ||
| 525 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ | 515 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ |
| 526 | osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); | 516 | |
| 527 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | 517 | ios->bio = pcol_copy->bio; |
| 518 | ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; | ||
| 519 | ios->length = pcol_copy->length; | ||
| 520 | ios->done = writepages_done; | ||
| 521 | ios->private = pcol_copy; | ||
| 522 | |||
| 523 | ret = exofs_oi_write(oi, ios); | ||
| 528 | if (unlikely(ret)) { | 524 | if (unlikely(ret)) { |
| 529 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | 525 | EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); |
| 530 | goto err; | 526 | goto err; |
| 531 | } | 527 | } |
| 532 | 528 | ||
| 533 | atomic_inc(&pcol->sbi->s_curr_pending); | 529 | atomic_inc(&pcol->sbi->s_curr_pending); |
| 534 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | 530 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", |
| 535 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | 531 | pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), |
| 536 | pcol->length); | 532 | pcol->length); |
| 537 | /* pages ownership was passed to pcol_copy */ | 533 | /* pages ownership was passed to pcol_copy */ |
| 538 | _pcol_reset(pcol); | 534 | _pcol_reset(pcol); |
| @@ -540,9 +536,9 @@ static int write_exec(struct page_collect *pcol) | |||
| 540 | 536 | ||
| 541 | err: | 537 | err: |
| 542 | _unlock_pcol_pages(pcol, ret, WRITE); | 538 | _unlock_pcol_pages(pcol, ret, WRITE); |
| 539 | pcol_free(pcol); | ||
| 543 | kfree(pcol_copy); | 540 | kfree(pcol_copy); |
| 544 | if (or) | 541 | |
| 545 | osd_end_request(or); | ||
| 546 | return ret; | 542 | return ret; |
| 547 | } | 543 | } |
| 548 | 544 | ||
| @@ -586,6 +582,9 @@ static int writepage_strip(struct page *page, | |||
| 586 | if (PageError(page)) | 582 | if (PageError(page)) |
| 587 | ClearPageError(page); | 583 | ClearPageError(page); |
| 588 | unlock_page(page); | 584 | unlock_page(page); |
| 585 | EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " | ||
| 586 | "outside the limits\n", | ||
| 587 | inode->i_ino, page->index); | ||
| 589 | return 0; | 588 | return 0; |
| 590 | } | 589 | } |
| 591 | } | 590 | } |
| @@ -600,6 +599,9 @@ try_again: | |||
| 600 | ret = write_exec(pcol); | 599 | ret = write_exec(pcol); |
| 601 | if (unlikely(ret)) | 600 | if (unlikely(ret)) |
| 602 | goto fail; | 601 | goto fail; |
| 602 | |||
| 603 | EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", | ||
| 604 | inode->i_ino, page->index); | ||
| 603 | goto try_again; | 605 | goto try_again; |
| 604 | } | 606 | } |
| 605 | 607 | ||
| @@ -609,7 +611,7 @@ try_again: | |||
| 609 | goto fail; | 611 | goto fail; |
| 610 | } | 612 | } |
| 611 | 613 | ||
| 612 | EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | 614 | EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", |
| 613 | inode->i_ino, page->index, len); | 615 | inode->i_ino, page->index, len); |
| 614 | 616 | ||
| 615 | ret = pcol_add_page(pcol, page, len); | 617 | ret = pcol_add_page(pcol, page, len); |
| @@ -634,6 +636,8 @@ try_again: | |||
| 634 | return 0; | 636 | return 0; |
| 635 | 637 | ||
| 636 | fail: | 638 | fail: |
| 639 | EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", | ||
| 640 | inode->i_ino, page->index, ret); | ||
| 637 | set_bit(AS_EIO, &page->mapping->flags); | 641 | set_bit(AS_EIO, &page->mapping->flags); |
| 638 | unlock_page(page); | 642 | unlock_page(page); |
| 639 | return ret; | 643 | return ret; |
| @@ -652,14 +656,17 @@ static int exofs_writepages(struct address_space *mapping, | |||
| 652 | wbc->range_end >> PAGE_CACHE_SHIFT; | 656 | wbc->range_end >> PAGE_CACHE_SHIFT; |
| 653 | 657 | ||
| 654 | if (start || end) | 658 | if (start || end) |
| 655 | expected_pages = min(end - start + 1, 32L); | 659 | expected_pages = end - start + 1; |
| 656 | else | 660 | else |
| 657 | expected_pages = mapping->nrpages; | 661 | expected_pages = mapping->nrpages; |
| 658 | 662 | ||
| 659 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | 663 | if (expected_pages < 32L) |
| 660 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | 664 | expected_pages = 32L; |
| 665 | |||
| 666 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " | ||
| 667 | "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", | ||
| 661 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | 668 | mapping->host->i_ino, wbc->range_start, wbc->range_end, |
| 662 | mapping->nrpages, start, end); | 669 | mapping->nrpages, start, end, expected_pages); |
| 663 | 670 | ||
| 664 | _pcol_init(&pcol, expected_pages, mapping->host); | 671 | _pcol_init(&pcol, expected_pages, mapping->host); |
| 665 | 672 | ||
| @@ -771,19 +778,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock, | |||
| 771 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | 778 | const struct osd_attr g_attr_logical_length = ATTR_DEF( |
| 772 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | 779 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); |
| 773 | 780 | ||
| 781 | static int _do_truncate(struct inode *inode) | ||
| 782 | { | ||
| 783 | struct exofs_i_info *oi = exofs_i(inode); | ||
| 784 | loff_t isize = i_size_read(inode); | ||
| 785 | int ret; | ||
| 786 | |||
| 787 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 788 | |||
| 789 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
| 790 | |||
| 791 | ret = exofs_oi_truncate(oi, (u64)isize); | ||
| 792 | EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); | ||
| 793 | return ret; | ||
| 794 | } | ||
| 795 | |||
| 774 | /* | 796 | /* |
| 775 | * Truncate a file to the specified size - all we have to do is set the size | 797 | * Truncate a file to the specified size - all we have to do is set the size |
| 776 | * attribute. We make sure the object exists first. | 798 | * attribute. We make sure the object exists first. |
| 777 | */ | 799 | */ |
| 778 | void exofs_truncate(struct inode *inode) | 800 | void exofs_truncate(struct inode *inode) |
| 779 | { | 801 | { |
| 780 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
| 781 | struct exofs_i_info *oi = exofs_i(inode); | 802 | struct exofs_i_info *oi = exofs_i(inode); |
| 782 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
| 783 | struct osd_request *or; | ||
| 784 | struct osd_attr attr; | ||
| 785 | loff_t isize = i_size_read(inode); | ||
| 786 | __be64 newsize; | ||
| 787 | int ret; | 803 | int ret; |
| 788 | 804 | ||
| 789 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | 805 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) |
| @@ -793,22 +809,6 @@ void exofs_truncate(struct inode *inode) | |||
| 793 | return; | 809 | return; |
| 794 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 810 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
| 795 | return; | 811 | return; |
| 796 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 797 | |||
| 798 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
| 799 | |||
| 800 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
| 801 | if (unlikely(!or)) { | ||
| 802 | EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); | ||
| 803 | goto fail; | ||
| 804 | } | ||
| 805 | |||
| 806 | osd_req_set_attributes(or, &obj); | ||
| 807 | |||
| 808 | newsize = cpu_to_be64((u64)isize); | ||
| 809 | attr = g_attr_logical_length; | ||
| 810 | attr.val_ptr = &newsize; | ||
| 811 | osd_req_add_set_attr_list(or, &attr, 1); | ||
| 812 | 812 | ||
| 813 | /* if we are about to truncate an object, and it hasn't been | 813 | /* if we are about to truncate an object, and it hasn't been |
| 814 | * created yet, wait | 814 | * created yet, wait |
| @@ -816,8 +816,7 @@ void exofs_truncate(struct inode *inode) | |||
| 816 | if (unlikely(wait_obj_created(oi))) | 816 | if (unlikely(wait_obj_created(oi))) |
| 817 | goto fail; | 817 | goto fail; |
| 818 | 818 | ||
| 819 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | 819 | ret = _do_truncate(inode); |
| 820 | osd_end_request(or); | ||
| 821 | if (ret) | 820 | if (ret) |
| 822 | goto fail; | 821 | goto fail; |
| 823 | 822 | ||
| @@ -847,65 +846,62 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
| 847 | 846 | ||
| 848 | /* | 847 | /* |
| 849 | * Read an inode from the OSD, and return it as is. We also return the size | 848 | * Read an inode from the OSD, and return it as is. We also return the size |
| 850 | * attribute in the 'sanity' argument if we got compiled with debugging turned | 849 | * attribute in the 'obj_size' argument. |
| 851 | * on. | ||
| 852 | */ | 850 | */ |
| 853 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | 851 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, |
| 854 | struct exofs_fcb *inode, uint64_t *sanity) | 852 | struct exofs_fcb *inode, uint64_t *obj_size) |
| 855 | { | 853 | { |
| 856 | struct exofs_sb_info *sbi = sb->s_fs_info; | 854 | struct exofs_sb_info *sbi = sb->s_fs_info; |
| 857 | struct osd_request *or; | 855 | struct osd_attr attrs[2]; |
| 858 | struct osd_attr attr; | 856 | struct exofs_io_state *ios; |
| 859 | struct osd_obj_id obj = {sbi->s_pid, | ||
| 860 | oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; | ||
| 861 | int ret; | 857 | int ret; |
| 862 | 858 | ||
| 863 | exofs_make_credential(oi->i_cred, &obj); | 859 | *obj_size = ~0; |
| 864 | 860 | ret = exofs_get_io_state(sbi, &ios); | |
| 865 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 861 | if (unlikely(ret)) { |
| 866 | if (unlikely(!or)) { | 862 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
| 867 | EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); | 863 | return ret; |
| 868 | return -ENOMEM; | ||
| 869 | } | 864 | } |
| 870 | osd_req_get_attributes(or, &obj); | ||
| 871 | 865 | ||
| 872 | /* we need the inode attribute */ | 866 | ios->obj.id = exofs_oi_objno(oi); |
| 873 | osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); | 867 | exofs_make_credential(oi->i_cred, &ios->obj); |
| 868 | ios->cred = oi->i_cred; | ||
| 874 | 869 | ||
| 875 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | 870 | attrs[0] = g_attr_inode_data; |
| 876 | /* we get the size attributes to do a sanity check */ | 871 | attrs[1] = g_attr_logical_length; |
| 877 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | 872 | ios->in_attr = attrs; |
| 878 | #endif | 873 | ios->in_attr_len = ARRAY_SIZE(attrs); |
| 879 | 874 | ||
| 880 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | 875 | ret = exofs_sbi_read(ios); |
| 881 | if (ret) | 876 | if (ret) |
| 882 | goto out; | 877 | goto out; |
| 883 | 878 | ||
| 884 | attr = g_attr_inode_data; | 879 | ret = extract_attr_from_ios(ios, &attrs[0]); |
| 885 | ret = extract_attr_from_req(or, &attr); | ||
| 886 | if (ret) { | 880 | if (ret) { |
| 887 | EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); | 881 | EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); |
| 888 | goto out; | 882 | goto out; |
| 889 | } | 883 | } |
| 884 | WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); | ||
| 885 | memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); | ||
| 890 | 886 | ||
| 891 | WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); | 887 | ret = extract_attr_from_ios(ios, &attrs[1]); |
| 892 | memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); | ||
| 893 | |||
| 894 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
| 895 | attr = g_attr_logical_length; | ||
| 896 | ret = extract_attr_from_req(or, &attr); | ||
| 897 | if (ret) { | 888 | if (ret) { |
| 898 | EXOFS_ERR("ERROR: extract attr from or failed\n"); | 889 | EXOFS_ERR("%s: extract_attr of logical_length failed\n", |
| 890 | __func__); | ||
| 899 | goto out; | 891 | goto out; |
| 900 | } | 892 | } |
| 901 | *sanity = get_unaligned_be64(attr.val_ptr); | 893 | *obj_size = get_unaligned_be64(attrs[1].val_ptr); |
| 902 | #endif | ||
| 903 | 894 | ||
| 904 | out: | 895 | out: |
| 905 | osd_end_request(or); | 896 | exofs_put_io_state(ios); |
| 906 | return ret; | 897 | return ret; |
| 907 | } | 898 | } |
| 908 | 899 | ||
| 900 | static void __oi_init(struct exofs_i_info *oi) | ||
| 901 | { | ||
| 902 | init_waitqueue_head(&oi->i_wq); | ||
| 903 | oi->i_flags = 0; | ||
| 904 | } | ||
| 909 | /* | 905 | /* |
| 910 | * Fill in an inode read from the OSD and set it up for use | 906 | * Fill in an inode read from the OSD and set it up for use |
| 911 | */ | 907 | */ |
| @@ -914,7 +910,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
| 914 | struct exofs_i_info *oi; | 910 | struct exofs_i_info *oi; |
| 915 | struct exofs_fcb fcb; | 911 | struct exofs_fcb fcb; |
| 916 | struct inode *inode; | 912 | struct inode *inode; |
| 917 | uint64_t uninitialized_var(sanity); | 913 | uint64_t obj_size; |
| 918 | int ret; | 914 | int ret; |
| 919 | 915 | ||
| 920 | inode = iget_locked(sb, ino); | 916 | inode = iget_locked(sb, ino); |
| @@ -923,13 +919,13 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
| 923 | if (!(inode->i_state & I_NEW)) | 919 | if (!(inode->i_state & I_NEW)) |
| 924 | return inode; | 920 | return inode; |
| 925 | oi = exofs_i(inode); | 921 | oi = exofs_i(inode); |
| 922 | __oi_init(oi); | ||
| 926 | 923 | ||
| 927 | /* read the inode from the osd */ | 924 | /* read the inode from the osd */ |
| 928 | ret = exofs_get_inode(sb, oi, &fcb, &sanity); | 925 | ret = exofs_get_inode(sb, oi, &fcb, &obj_size); |
| 929 | if (ret) | 926 | if (ret) |
| 930 | goto bad_inode; | 927 | goto bad_inode; |
| 931 | 928 | ||
| 932 | init_waitqueue_head(&oi->i_wq); | ||
| 933 | set_obj_created(oi); | 929 | set_obj_created(oi); |
| 934 | 930 | ||
| 935 | /* copy stuff from on-disk struct to in-memory struct */ | 931 | /* copy stuff from on-disk struct to in-memory struct */ |
| @@ -947,14 +943,12 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
| 947 | inode->i_blkbits = EXOFS_BLKSHIFT; | 943 | inode->i_blkbits = EXOFS_BLKSHIFT; |
| 948 | inode->i_generation = le32_to_cpu(fcb.i_generation); | 944 | inode->i_generation = le32_to_cpu(fcb.i_generation); |
| 949 | 945 | ||
| 950 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | 946 | if ((inode->i_size != obj_size) && |
| 951 | if ((inode->i_size != sanity) && | ||
| 952 | (!exofs_inode_is_fast_symlink(inode))) { | 947 | (!exofs_inode_is_fast_symlink(inode))) { |
| 953 | EXOFS_ERR("WARNING: Size of object from inode and " | 948 | EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n", |
| 954 | "attributes differ (%lld != %llu)\n", | 949 | inode->i_size, _LLU(obj_size)); |
| 955 | inode->i_size, _LLU(sanity)); | 950 | /* FIXME: call exofs_inode_recovery() */ |
| 956 | } | 951 | } |
| 957 | #endif | ||
| 958 | 952 | ||
| 959 | oi->i_dir_start_lookup = 0; | 953 | oi->i_dir_start_lookup = 0; |
| 960 | 954 | ||
| @@ -1020,23 +1014,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) | |||
| 1020 | * set the obj_created flag so that other methods know that the object exists on | 1014 | * set the obj_created flag so that other methods know that the object exists on |
| 1021 | * the OSD. | 1015 | * the OSD. |
| 1022 | */ | 1016 | */ |
| 1023 | static void create_done(struct osd_request *or, void *p) | 1017 | static void create_done(struct exofs_io_state *ios, void *p) |
| 1024 | { | 1018 | { |
| 1025 | struct inode *inode = p; | 1019 | struct inode *inode = p; |
| 1026 | struct exofs_i_info *oi = exofs_i(inode); | 1020 | struct exofs_i_info *oi = exofs_i(inode); |
| 1027 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 1021 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
| 1028 | int ret; | 1022 | int ret; |
| 1029 | 1023 | ||
| 1030 | ret = exofs_check_ok(or); | 1024 | ret = exofs_check_io(ios, NULL); |
| 1031 | osd_end_request(or); | 1025 | exofs_put_io_state(ios); |
| 1026 | |||
| 1032 | atomic_dec(&sbi->s_curr_pending); | 1027 | atomic_dec(&sbi->s_curr_pending); |
| 1033 | 1028 | ||
| 1034 | if (unlikely(ret)) { | 1029 | if (unlikely(ret)) { |
| 1035 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", | 1030 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", |
| 1036 | _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); | 1031 | _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid)); |
| 1037 | make_bad_inode(inode); | 1032 | /*TODO: When FS is corrupted creation can fail, object already |
| 1038 | } else | 1033 | * exist. Get rid of this asynchronous creation, if exist |
| 1039 | set_obj_created(oi); | 1034 | * increment the obj counter and try the next object. Until we |
| 1035 | * succeed. All these dangling objects will be made into lost | ||
| 1036 | * files by chkfs.exofs | ||
| 1037 | */ | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | set_obj_created(oi); | ||
| 1040 | 1041 | ||
| 1041 | atomic_dec(&inode->i_count); | 1042 | atomic_dec(&inode->i_count); |
| 1042 | wake_up(&oi->i_wq); | 1043 | wake_up(&oi->i_wq); |
| @@ -1051,8 +1052,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
| 1051 | struct inode *inode; | 1052 | struct inode *inode; |
| 1052 | struct exofs_i_info *oi; | 1053 | struct exofs_i_info *oi; |
| 1053 | struct exofs_sb_info *sbi; | 1054 | struct exofs_sb_info *sbi; |
| 1054 | struct osd_request *or; | 1055 | struct exofs_io_state *ios; |
| 1055 | struct osd_obj_id obj; | ||
| 1056 | int ret; | 1056 | int ret; |
| 1057 | 1057 | ||
| 1058 | sb = dir->i_sb; | 1058 | sb = dir->i_sb; |
| @@ -1061,8 +1061,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
| 1061 | return ERR_PTR(-ENOMEM); | 1061 | return ERR_PTR(-ENOMEM); |
| 1062 | 1062 | ||
| 1063 | oi = exofs_i(inode); | 1063 | oi = exofs_i(inode); |
| 1064 | __oi_init(oi); | ||
| 1064 | 1065 | ||
| 1065 | init_waitqueue_head(&oi->i_wq); | ||
| 1066 | set_obj_2bcreated(oi); | 1066 | set_obj_2bcreated(oi); |
| 1067 | 1067 | ||
| 1068 | sbi = sb->s_fs_info; | 1068 | sbi = sb->s_fs_info; |
| @@ -1089,28 +1089,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
| 1089 | 1089 | ||
| 1090 | mark_inode_dirty(inode); | 1090 | mark_inode_dirty(inode); |
| 1091 | 1091 | ||
| 1092 | obj.partition = sbi->s_pid; | 1092 | ret = exofs_get_io_state(sbi, &ios); |
| 1093 | obj.id = inode->i_ino + EXOFS_OBJ_OFF; | 1093 | if (unlikely(ret)) { |
| 1094 | exofs_make_credential(oi->i_cred, &obj); | 1094 | EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); |
| 1095 | 1095 | return ERR_PTR(ret); | |
| 1096 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
| 1097 | if (unlikely(!or)) { | ||
| 1098 | EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); | ||
| 1099 | return ERR_PTR(-ENOMEM); | ||
| 1100 | } | 1096 | } |
| 1101 | 1097 | ||
| 1102 | osd_req_create_object(or, &obj); | 1098 | ios->obj.id = exofs_oi_objno(oi); |
| 1099 | exofs_make_credential(oi->i_cred, &ios->obj); | ||
| 1103 | 1100 | ||
| 1104 | /* increment the refcount so that the inode will still be around when we | 1101 | /* increment the refcount so that the inode will still be around when we |
| 1105 | * reach the callback | 1102 | * reach the callback |
| 1106 | */ | 1103 | */ |
| 1107 | atomic_inc(&inode->i_count); | 1104 | atomic_inc(&inode->i_count); |
| 1108 | 1105 | ||
| 1109 | ret = exofs_async_op(or, create_done, inode, oi->i_cred); | 1106 | ios->done = create_done; |
| 1107 | ios->private = inode; | ||
| 1108 | ios->cred = oi->i_cred; | ||
| 1109 | ret = exofs_sbi_create(ios); | ||
| 1110 | if (ret) { | 1110 | if (ret) { |
| 1111 | atomic_dec(&inode->i_count); | 1111 | atomic_dec(&inode->i_count); |
| 1112 | osd_end_request(or); | 1112 | exofs_put_io_state(ios); |
| 1113 | return ERR_PTR(-EIO); | 1113 | return ERR_PTR(ret); |
| 1114 | } | 1114 | } |
| 1115 | atomic_inc(&sbi->s_curr_pending); | 1115 | atomic_inc(&sbi->s_curr_pending); |
| 1116 | 1116 | ||
| @@ -1128,11 +1128,11 @@ struct updatei_args { | |||
| 1128 | /* | 1128 | /* |
| 1129 | * Callback function from exofs_update_inode(). | 1129 | * Callback function from exofs_update_inode(). |
| 1130 | */ | 1130 | */ |
| 1131 | static void updatei_done(struct osd_request *or, void *p) | 1131 | static void updatei_done(struct exofs_io_state *ios, void *p) |
| 1132 | { | 1132 | { |
| 1133 | struct updatei_args *args = p; | 1133 | struct updatei_args *args = p; |
| 1134 | 1134 | ||
| 1135 | osd_end_request(or); | 1135 | exofs_put_io_state(ios); |
| 1136 | 1136 | ||
| 1137 | atomic_dec(&args->sbi->s_curr_pending); | 1137 | atomic_dec(&args->sbi->s_curr_pending); |
| 1138 | 1138 | ||
| @@ -1148,8 +1148,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
| 1148 | struct exofs_i_info *oi = exofs_i(inode); | 1148 | struct exofs_i_info *oi = exofs_i(inode); |
| 1149 | struct super_block *sb = inode->i_sb; | 1149 | struct super_block *sb = inode->i_sb; |
| 1150 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1150 | struct exofs_sb_info *sbi = sb->s_fs_info; |
| 1151 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | 1151 | struct exofs_io_state *ios; |
| 1152 | struct osd_request *or; | ||
| 1153 | struct osd_attr attr; | 1152 | struct osd_attr attr; |
| 1154 | struct exofs_fcb *fcb; | 1153 | struct exofs_fcb *fcb; |
| 1155 | struct updatei_args *args; | 1154 | struct updatei_args *args; |
| @@ -1186,18 +1185,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
| 1186 | } else | 1185 | } else |
| 1187 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | 1186 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); |
| 1188 | 1187 | ||
| 1189 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 1188 | ret = exofs_get_io_state(sbi, &ios); |
| 1190 | if (unlikely(!or)) { | 1189 | if (unlikely(ret)) { |
| 1191 | EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); | 1190 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
| 1192 | ret = -ENOMEM; | ||
| 1193 | goto free_args; | 1191 | goto free_args; |
| 1194 | } | 1192 | } |
| 1195 | 1193 | ||
| 1196 | osd_req_set_attributes(or, &obj); | ||
| 1197 | |||
| 1198 | attr = g_attr_inode_data; | 1194 | attr = g_attr_inode_data; |
| 1199 | attr.val_ptr = fcb; | 1195 | attr.val_ptr = fcb; |
| 1200 | osd_req_add_set_attr_list(or, &attr, 1); | 1196 | ios->out_attr_len = 1; |
| 1197 | ios->out_attr = &attr; | ||
| 1201 | 1198 | ||
| 1202 | if (!obj_created(oi)) { | 1199 | if (!obj_created(oi)) { |
| 1203 | EXOFS_DBGMSG("!obj_created\n"); | 1200 | EXOFS_DBGMSG("!obj_created\n"); |
| @@ -1206,22 +1203,19 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
| 1206 | EXOFS_DBGMSG("wait_event done\n"); | 1203 | EXOFS_DBGMSG("wait_event done\n"); |
| 1207 | } | 1204 | } |
| 1208 | 1205 | ||
| 1209 | if (do_sync) { | 1206 | if (!do_sync) { |
| 1210 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
| 1211 | osd_end_request(or); | ||
| 1212 | goto free_args; | ||
| 1213 | } else { | ||
| 1214 | args->sbi = sbi; | 1207 | args->sbi = sbi; |
| 1208 | ios->done = updatei_done; | ||
| 1209 | ios->private = args; | ||
| 1210 | } | ||
| 1215 | 1211 | ||
| 1216 | ret = exofs_async_op(or, updatei_done, args, oi->i_cred); | 1212 | ret = exofs_oi_write(oi, ios); |
| 1217 | if (ret) { | 1213 | if (!do_sync && !ret) { |
| 1218 | osd_end_request(or); | ||
| 1219 | goto free_args; | ||
| 1220 | } | ||
| 1221 | atomic_inc(&sbi->s_curr_pending); | 1214 | atomic_inc(&sbi->s_curr_pending); |
| 1222 | goto out; /* deallocation in updatei_done */ | 1215 | goto out; /* deallocation in updatei_done */ |
| 1223 | } | 1216 | } |
| 1224 | 1217 | ||
| 1218 | exofs_put_io_state(ios); | ||
| 1225 | free_args: | 1219 | free_args: |
| 1226 | kfree(args); | 1220 | kfree(args); |
| 1227 | out: | 1221 | out: |
| @@ -1238,11 +1232,12 @@ int exofs_write_inode(struct inode *inode, int wait) | |||
| 1238 | * Callback function from exofs_delete_inode() - don't have much cleaning up to | 1232 | * Callback function from exofs_delete_inode() - don't have much cleaning up to |
| 1239 | * do. | 1233 | * do. |
| 1240 | */ | 1234 | */ |
| 1241 | static void delete_done(struct osd_request *or, void *p) | 1235 | static void delete_done(struct exofs_io_state *ios, void *p) |
| 1242 | { | 1236 | { |
| 1243 | struct exofs_sb_info *sbi; | 1237 | struct exofs_sb_info *sbi = p; |
| 1244 | osd_end_request(or); | 1238 | |
| 1245 | sbi = p; | 1239 | exofs_put_io_state(ios); |
| 1240 | |||
| 1246 | atomic_dec(&sbi->s_curr_pending); | 1241 | atomic_dec(&sbi->s_curr_pending); |
| 1247 | } | 1242 | } |
| 1248 | 1243 | ||
| @@ -1256,8 +1251,7 @@ void exofs_delete_inode(struct inode *inode) | |||
| 1256 | struct exofs_i_info *oi = exofs_i(inode); | 1251 | struct exofs_i_info *oi = exofs_i(inode); |
| 1257 | struct super_block *sb = inode->i_sb; | 1252 | struct super_block *sb = inode->i_sb; |
| 1258 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1253 | struct exofs_sb_info *sbi = sb->s_fs_info; |
| 1259 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | 1254 | struct exofs_io_state *ios; |
| 1260 | struct osd_request *or; | ||
| 1261 | int ret; | 1255 | int ret; |
| 1262 | 1256 | ||
| 1263 | truncate_inode_pages(&inode->i_data, 0); | 1257 | truncate_inode_pages(&inode->i_data, 0); |
| @@ -1274,25 +1268,26 @@ void exofs_delete_inode(struct inode *inode) | |||
| 1274 | 1268 | ||
| 1275 | clear_inode(inode); | 1269 | clear_inode(inode); |
| 1276 | 1270 | ||
| 1277 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 1271 | ret = exofs_get_io_state(sbi, &ios); |
| 1278 | if (unlikely(!or)) { | 1272 | if (unlikely(ret)) { |
| 1279 | EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); | 1273 | EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); |
| 1280 | return; | 1274 | return; |
| 1281 | } | 1275 | } |
| 1282 | 1276 | ||
| 1283 | osd_req_remove_object(or, &obj); | ||
| 1284 | |||
| 1285 | /* if we are deleting an obj that hasn't been created yet, wait */ | 1277 | /* if we are deleting an obj that hasn't been created yet, wait */ |
| 1286 | if (!obj_created(oi)) { | 1278 | if (!obj_created(oi)) { |
| 1287 | BUG_ON(!obj_2bcreated(oi)); | 1279 | BUG_ON(!obj_2bcreated(oi)); |
| 1288 | wait_event(oi->i_wq, obj_created(oi)); | 1280 | wait_event(oi->i_wq, obj_created(oi)); |
| 1289 | } | 1281 | } |
| 1290 | 1282 | ||
| 1291 | ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); | 1283 | ios->obj.id = exofs_oi_objno(oi); |
| 1284 | ios->done = delete_done; | ||
| 1285 | ios->private = sbi; | ||
| 1286 | ios->cred = oi->i_cred; | ||
| 1287 | ret = exofs_sbi_remove(ios); | ||
| 1292 | if (ret) { | 1288 | if (ret) { |
| 1293 | EXOFS_ERR( | 1289 | EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); |
| 1294 | "ERROR: @exofs_delete_inode exofs_async_op failed\n"); | 1290 | exofs_put_io_state(ios); |
| 1295 | osd_end_request(or); | ||
| 1296 | return; | 1291 | return; |
| 1297 | } | 1292 | } |
| 1298 | atomic_inc(&sbi->s_curr_pending); | 1293 | atomic_inc(&sbi->s_curr_pending); |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c new file mode 100644 index 000000000000..5bad01fa1f9f --- /dev/null +++ b/fs/exofs/ios.c | |||
| @@ -0,0 +1,421 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005, 2006 | ||
| 3 | * Avishay Traeger (avishay@gmail.com) | ||
| 4 | * Copyright (C) 2008, 2009 | ||
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | ||
| 6 | * | ||
| 7 | * This file is part of exofs. | ||
| 8 | * | ||
| 9 | * exofs is free software; you can redistribute it and/or modify | ||
| 10 | * it under the terms of the GNU General Public License as published by | ||
| 11 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
| 12 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
| 13 | * version of GPL for exofs is version 2. | ||
| 14 | * | ||
| 15 | * exofs is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 18 | * GNU General Public License for more details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public License | ||
| 21 | * along with exofs; if not, write to the Free Software | ||
| 22 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 23 | */ | ||
| 24 | |||
| 25 | #include <scsi/scsi_device.h> | ||
| 26 | |||
| 27 | #include "exofs.h" | ||
| 28 | |||
| 29 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | ||
| 30 | { | ||
| 31 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | ||
| 32 | } | ||
| 33 | |||
| 34 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | ||
| 35 | u64 offset, void *p, unsigned length) | ||
| 36 | { | ||
| 37 | struct osd_request *or = osd_start_request(od, GFP_KERNEL); | ||
| 38 | /* struct osd_sense_info osi = {.key = 0};*/ | ||
| 39 | int ret; | ||
| 40 | |||
| 41 | if (unlikely(!or)) { | ||
| 42 | EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); | ||
| 43 | return -ENOMEM; | ||
| 44 | } | ||
| 45 | ret = osd_req_read_kern(or, obj, offset, p, length); | ||
| 46 | if (unlikely(ret)) { | ||
| 47 | EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); | ||
| 48 | goto out; | ||
| 49 | } | ||
| 50 | |||
| 51 | ret = osd_finalize_request(or, 0, cred, NULL); | ||
| 52 | if (unlikely(ret)) { | ||
| 53 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
| 54 | goto out; | ||
| 55 | } | ||
| 56 | |||
| 57 | ret = osd_execute_request(or); | ||
| 58 | if (unlikely(ret)) | ||
| 59 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
| 60 | /* osd_req_decode_sense(or, ret); */ | ||
| 61 | |||
| 62 | out: | ||
| 63 | osd_end_request(or); | ||
| 64 | return ret; | ||
| 65 | } | ||
| 66 | |||
| 67 | int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) | ||
| 68 | { | ||
| 69 | struct exofs_io_state *ios; | ||
| 70 | |||
| 71 | /*TODO: Maybe use kmem_cach per sbi of size | ||
| 72 | * exofs_io_state_size(sbi->s_numdevs) | ||
| 73 | */ | ||
| 74 | ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL); | ||
| 75 | if (unlikely(!ios)) { | ||
| 76 | *pios = NULL; | ||
| 77 | return -ENOMEM; | ||
| 78 | } | ||
| 79 | |||
| 80 | ios->sbi = sbi; | ||
| 81 | ios->obj.partition = sbi->s_pid; | ||
| 82 | *pios = ios; | ||
| 83 | return 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | void exofs_put_io_state(struct exofs_io_state *ios) | ||
| 87 | { | ||
| 88 | if (ios) { | ||
| 89 | unsigned i; | ||
| 90 | |||
| 91 | for (i = 0; i < ios->numdevs; i++) { | ||
| 92 | struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; | ||
| 93 | |||
| 94 | if (per_dev->or) | ||
| 95 | osd_end_request(per_dev->or); | ||
| 96 | if (per_dev->bio) | ||
| 97 | bio_put(per_dev->bio); | ||
| 98 | } | ||
| 99 | |||
| 100 | kfree(ios); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | static void _sync_done(struct exofs_io_state *ios, void *p) | ||
| 105 | { | ||
| 106 | struct completion *waiting = p; | ||
| 107 | |||
| 108 | complete(waiting); | ||
| 109 | } | ||
| 110 | |||
| 111 | static void _last_io(struct kref *kref) | ||
| 112 | { | ||
| 113 | struct exofs_io_state *ios = container_of( | ||
| 114 | kref, struct exofs_io_state, kref); | ||
| 115 | |||
| 116 | ios->done(ios, ios->private); | ||
| 117 | } | ||
| 118 | |||
| 119 | static void _done_io(struct osd_request *or, void *p) | ||
| 120 | { | ||
| 121 | struct exofs_io_state *ios = p; | ||
| 122 | |||
| 123 | kref_put(&ios->kref, _last_io); | ||
| 124 | } | ||
| 125 | |||
| 126 | static int exofs_io_execute(struct exofs_io_state *ios) | ||
| 127 | { | ||
| 128 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 129 | bool sync = (ios->done == NULL); | ||
| 130 | int i, ret; | ||
| 131 | |||
| 132 | if (sync) { | ||
| 133 | ios->done = _sync_done; | ||
| 134 | ios->private = &wait; | ||
| 135 | } | ||
| 136 | |||
| 137 | for (i = 0; i < ios->numdevs; i++) { | ||
| 138 | struct osd_request *or = ios->per_dev[i].or; | ||
| 139 | if (unlikely(!or)) | ||
| 140 | continue; | ||
| 141 | |||
| 142 | ret = osd_finalize_request(or, 0, ios->cred, NULL); | ||
| 143 | if (unlikely(ret)) { | ||
| 144 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", | ||
| 145 | ret); | ||
| 146 | return ret; | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | kref_init(&ios->kref); | ||
| 151 | |||
| 152 | for (i = 0; i < ios->numdevs; i++) { | ||
| 153 | struct osd_request *or = ios->per_dev[i].or; | ||
| 154 | if (unlikely(!or)) | ||
| 155 | continue; | ||
| 156 | |||
| 157 | kref_get(&ios->kref); | ||
| 158 | osd_execute_request_async(or, _done_io, ios); | ||
| 159 | } | ||
| 160 | |||
| 161 | kref_put(&ios->kref, _last_io); | ||
| 162 | ret = 0; | ||
| 163 | |||
| 164 | if (sync) { | ||
| 165 | wait_for_completion(&wait); | ||
| 166 | ret = exofs_check_io(ios, NULL); | ||
| 167 | } | ||
| 168 | return ret; | ||
| 169 | } | ||
| 170 | |||
| 171 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | ||
| 172 | { | ||
| 173 | enum osd_err_priority acumulated_osd_err = 0; | ||
| 174 | int acumulated_lin_err = 0; | ||
| 175 | int i; | ||
| 176 | |||
| 177 | for (i = 0; i < ios->numdevs; i++) { | ||
| 178 | struct osd_sense_info osi; | ||
| 179 | int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi); | ||
| 180 | |||
| 181 | if (likely(!ret)) | ||
| 182 | continue; | ||
| 183 | |||
| 184 | if (unlikely(ret == -EFAULT)) { | ||
| 185 | EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__); | ||
| 186 | /*FIXME: All the pages in this device range should: | ||
| 187 | * clear_highpage(page); | ||
| 188 | */ | ||
| 189 | } | ||
| 190 | |||
| 191 | if (osi.osd_err_pri >= acumulated_osd_err) { | ||
| 192 | acumulated_osd_err = osi.osd_err_pri; | ||
| 193 | acumulated_lin_err = ret; | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | /* TODO: raid specific residual calculations */ | ||
| 198 | if (resid) { | ||
| 199 | if (likely(!acumulated_lin_err)) | ||
| 200 | *resid = 0; | ||
| 201 | else | ||
| 202 | *resid = ios->length; | ||
| 203 | } | ||
| 204 | |||
| 205 | return acumulated_lin_err; | ||
| 206 | } | ||
| 207 | |||
| 208 | int exofs_sbi_create(struct exofs_io_state *ios) | ||
| 209 | { | ||
| 210 | int i, ret; | ||
| 211 | |||
| 212 | for (i = 0; i < ios->sbi->s_numdevs; i++) { | ||
| 213 | struct osd_request *or; | ||
| 214 | |||
| 215 | or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | ||
| 216 | if (unlikely(!or)) { | ||
| 217 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
| 218 | ret = -ENOMEM; | ||
| 219 | goto out; | ||
| 220 | } | ||
| 221 | ios->per_dev[i].or = or; | ||
| 222 | ios->numdevs++; | ||
| 223 | |||
| 224 | osd_req_create_object(or, &ios->obj); | ||
| 225 | } | ||
| 226 | ret = exofs_io_execute(ios); | ||
| 227 | |||
| 228 | out: | ||
| 229 | return ret; | ||
| 230 | } | ||
| 231 | |||
| 232 | int exofs_sbi_remove(struct exofs_io_state *ios) | ||
| 233 | { | ||
| 234 | int i, ret; | ||
| 235 | |||
| 236 | for (i = 0; i < ios->sbi->s_numdevs; i++) { | ||
| 237 | struct osd_request *or; | ||
| 238 | |||
| 239 | or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | ||
| 240 | if (unlikely(!or)) { | ||
| 241 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
| 242 | ret = -ENOMEM; | ||
| 243 | goto out; | ||
| 244 | } | ||
| 245 | ios->per_dev[i].or = or; | ||
| 246 | ios->numdevs++; | ||
| 247 | |||
| 248 | osd_req_remove_object(or, &ios->obj); | ||
| 249 | } | ||
| 250 | ret = exofs_io_execute(ios); | ||
| 251 | |||
| 252 | out: | ||
| 253 | return ret; | ||
| 254 | } | ||
| 255 | |||
| 256 | int exofs_sbi_write(struct exofs_io_state *ios) | ||
| 257 | { | ||
| 258 | int i, ret; | ||
| 259 | |||
| 260 | for (i = 0; i < ios->sbi->s_numdevs; i++) { | ||
| 261 | struct osd_request *or; | ||
| 262 | |||
| 263 | or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | ||
| 264 | if (unlikely(!or)) { | ||
| 265 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
| 266 | ret = -ENOMEM; | ||
| 267 | goto out; | ||
| 268 | } | ||
| 269 | ios->per_dev[i].or = or; | ||
| 270 | ios->numdevs++; | ||
| 271 | |||
| 272 | if (ios->bio) { | ||
| 273 | struct bio *bio; | ||
| 274 | |||
| 275 | if (i != 0) { | ||
| 276 | bio = bio_kmalloc(GFP_KERNEL, | ||
| 277 | ios->bio->bi_max_vecs); | ||
| 278 | if (unlikely(!bio)) { | ||
| 279 | ret = -ENOMEM; | ||
| 280 | goto out; | ||
| 281 | } | ||
| 282 | |||
| 283 | __bio_clone(bio, ios->bio); | ||
| 284 | bio->bi_bdev = NULL; | ||
| 285 | bio->bi_next = NULL; | ||
| 286 | ios->per_dev[i].bio = bio; | ||
| 287 | } else { | ||
| 288 | bio = ios->bio; | ||
| 289 | } | ||
| 290 | |||
| 291 | osd_req_write(or, &ios->obj, ios->offset, bio, | ||
| 292 | ios->length); | ||
| 293 | /* EXOFS_DBGMSG("write sync=%d\n", sync);*/ | ||
| 294 | } else if (ios->kern_buff) { | ||
| 295 | osd_req_write_kern(or, &ios->obj, ios->offset, | ||
| 296 | ios->kern_buff, ios->length); | ||
| 297 | /* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/ | ||
| 298 | } else { | ||
| 299 | osd_req_set_attributes(or, &ios->obj); | ||
| 300 | /* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/ | ||
| 301 | } | ||
| 302 | |||
| 303 | if (ios->out_attr) | ||
| 304 | osd_req_add_set_attr_list(or, ios->out_attr, | ||
| 305 | ios->out_attr_len); | ||
| 306 | |||
| 307 | if (ios->in_attr) | ||
| 308 | osd_req_add_get_attr_list(or, ios->in_attr, | ||
| 309 | ios->in_attr_len); | ||
| 310 | } | ||
| 311 | ret = exofs_io_execute(ios); | ||
| 312 | |||
| 313 | out: | ||
| 314 | return ret; | ||
| 315 | } | ||
| 316 | |||
| 317 | int exofs_sbi_read(struct exofs_io_state *ios) | ||
| 318 | { | ||
| 319 | int i, ret; | ||
| 320 | |||
| 321 | for (i = 0; i < 1; i++) { | ||
| 322 | struct osd_request *or; | ||
| 323 | unsigned first_dev = (unsigned)ios->obj.id; | ||
| 324 | |||
| 325 | first_dev %= ios->sbi->s_numdevs; | ||
| 326 | or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL); | ||
| 327 | if (unlikely(!or)) { | ||
| 328 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
| 329 | ret = -ENOMEM; | ||
| 330 | goto out; | ||
| 331 | } | ||
| 332 | ios->per_dev[i].or = or; | ||
| 333 | ios->numdevs++; | ||
| 334 | |||
| 335 | if (ios->bio) { | ||
| 336 | osd_req_read(or, &ios->obj, ios->offset, ios->bio, | ||
| 337 | ios->length); | ||
| 338 | /* EXOFS_DBGMSG("read sync=%d\n", sync);*/ | ||
| 339 | } else if (ios->kern_buff) { | ||
| 340 | osd_req_read_kern(or, &ios->obj, ios->offset, | ||
| 341 | ios->kern_buff, ios->length); | ||
| 342 | /* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/ | ||
| 343 | } else { | ||
| 344 | osd_req_get_attributes(or, &ios->obj); | ||
| 345 | /* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/ | ||
| 346 | } | ||
| 347 | |||
| 348 | if (ios->out_attr) | ||
| 349 | osd_req_add_set_attr_list(or, ios->out_attr, | ||
| 350 | ios->out_attr_len); | ||
| 351 | |||
| 352 | if (ios->in_attr) | ||
| 353 | osd_req_add_get_attr_list(or, ios->in_attr, | ||
| 354 | ios->in_attr_len); | ||
| 355 | } | ||
| 356 | ret = exofs_io_execute(ios); | ||
| 357 | |||
| 358 | out: | ||
| 359 | return ret; | ||
| 360 | } | ||
| 361 | |||
| 362 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) | ||
| 363 | { | ||
| 364 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | ||
| 365 | void *iter = NULL; | ||
| 366 | int nelem; | ||
| 367 | |||
| 368 | do { | ||
| 369 | nelem = 1; | ||
| 370 | osd_req_decode_get_attr_list(ios->per_dev[0].or, | ||
| 371 | &cur_attr, &nelem, &iter); | ||
| 372 | if ((cur_attr.attr_page == attr->attr_page) && | ||
| 373 | (cur_attr.attr_id == attr->attr_id)) { | ||
| 374 | attr->len = cur_attr.len; | ||
| 375 | attr->val_ptr = cur_attr.val_ptr; | ||
| 376 | return 0; | ||
| 377 | } | ||
| 378 | } while (iter); | ||
| 379 | |||
| 380 | return -EIO; | ||
| 381 | } | ||
| 382 | |||
| 383 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) | ||
| 384 | { | ||
| 385 | struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; | ||
| 386 | struct exofs_io_state *ios; | ||
| 387 | struct osd_attr attr; | ||
| 388 | __be64 newsize; | ||
| 389 | int i, ret; | ||
| 390 | |||
| 391 | if (exofs_get_io_state(sbi, &ios)) | ||
| 392 | return -ENOMEM; | ||
| 393 | |||
| 394 | ios->obj.id = exofs_oi_objno(oi); | ||
| 395 | ios->cred = oi->i_cred; | ||
| 396 | |||
| 397 | newsize = cpu_to_be64(size); | ||
| 398 | attr = g_attr_logical_length; | ||
| 399 | attr.val_ptr = &newsize; | ||
| 400 | |||
| 401 | for (i = 0; i < sbi->s_numdevs; i++) { | ||
| 402 | struct osd_request *or; | ||
| 403 | |||
| 404 | or = osd_start_request(sbi->s_ods[i], GFP_KERNEL); | ||
| 405 | if (unlikely(!or)) { | ||
| 406 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
| 407 | ret = -ENOMEM; | ||
| 408 | goto out; | ||
| 409 | } | ||
| 410 | ios->per_dev[i].or = or; | ||
| 411 | ios->numdevs++; | ||
| 412 | |||
| 413 | osd_req_set_attributes(or, &ios->obj); | ||
| 414 | osd_req_add_set_attr_list(or, &attr, 1); | ||
| 415 | } | ||
| 416 | ret = exofs_io_execute(ios); | ||
| 417 | |||
| 418 | out: | ||
| 419 | exofs_put_io_state(ios); | ||
| 420 | return ret; | ||
| 421 | } | ||
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c deleted file mode 100644 index 4372542df284..000000000000 --- a/fs/exofs/osd.c +++ /dev/null | |||
| @@ -1,125 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005, 2006 | ||
| 3 | * Avishay Traeger (avishay@gmail.com) | ||
| 4 | * Copyright (C) 2008, 2009 | ||
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | ||
| 6 | * | ||
| 7 | * This file is part of exofs. | ||
| 8 | * | ||
| 9 | * exofs is free software; you can redistribute it and/or modify | ||
| 10 | * it under the terms of the GNU General Public License as published by | ||
| 11 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
| 12 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
| 13 | * version of GPL for exofs is version 2. | ||
| 14 | * | ||
| 15 | * exofs is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 18 | * GNU General Public License for more details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public License | ||
| 21 | * along with exofs; if not, write to the Free Software | ||
| 22 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 23 | */ | ||
| 24 | |||
| 25 | #include <scsi/scsi_device.h> | ||
| 26 | #include <scsi/osd_sense.h> | ||
| 27 | |||
| 28 | #include "exofs.h" | ||
| 29 | |||
| 30 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) | ||
| 31 | { | ||
| 32 | struct osd_sense_info osi; | ||
| 33 | int ret = osd_req_decode_sense(or, &osi); | ||
| 34 | |||
| 35 | if (ret) { /* translate to Linux codes */ | ||
| 36 | if (osi.additional_code == scsi_invalid_field_in_cdb) { | ||
| 37 | if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) | ||
| 38 | ret = -EFAULT; | ||
| 39 | if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) | ||
| 40 | ret = -ENOENT; | ||
| 41 | else | ||
| 42 | ret = -EINVAL; | ||
| 43 | } else if (osi.additional_code == osd_quota_error) | ||
| 44 | ret = -ENOSPC; | ||
| 45 | else | ||
| 46 | ret = -EIO; | ||
| 47 | } | ||
| 48 | |||
| 49 | /* FIXME: should be include in osd_sense_info */ | ||
| 50 | if (in_resid) | ||
| 51 | *in_resid = or->in.req ? or->in.req->resid_len : 0; | ||
| 52 | |||
| 53 | if (out_resid) | ||
| 54 | *out_resid = or->out.req ? or->out.req->resid_len : 0; | ||
| 55 | |||
| 56 | return ret; | ||
| 57 | } | ||
| 58 | |||
| 59 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | ||
| 60 | { | ||
| 61 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | ||
| 62 | } | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Perform a synchronous OSD operation. | ||
| 66 | */ | ||
| 67 | int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | ||
| 68 | { | ||
| 69 | int ret; | ||
| 70 | |||
| 71 | or->timeout = timeout; | ||
| 72 | ret = osd_finalize_request(or, 0, credential, NULL); | ||
| 73 | if (ret) { | ||
| 74 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
| 75 | return ret; | ||
| 76 | } | ||
| 77 | |||
| 78 | ret = osd_execute_request(or); | ||
| 79 | |||
| 80 | if (ret) | ||
| 81 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
| 82 | /* osd_req_decode_sense(or, ret); */ | ||
| 83 | return ret; | ||
| 84 | } | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Perform an asynchronous OSD operation. | ||
| 88 | */ | ||
| 89 | int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done, | ||
| 90 | void *caller_context, u8 *cred) | ||
| 91 | { | ||
| 92 | int ret; | ||
| 93 | |||
| 94 | ret = osd_finalize_request(or, 0, cred, NULL); | ||
| 95 | if (ret) { | ||
| 96 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | ||
| 97 | return ret; | ||
| 98 | } | ||
| 99 | |||
| 100 | ret = osd_execute_request_async(or, async_done, caller_context); | ||
| 101 | |||
| 102 | if (ret) | ||
| 103 | EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret); | ||
| 104 | return ret; | ||
| 105 | } | ||
| 106 | |||
| 107 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | ||
| 108 | { | ||
| 109 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | ||
| 110 | void *iter = NULL; | ||
| 111 | int nelem; | ||
| 112 | |||
| 113 | do { | ||
| 114 | nelem = 1; | ||
| 115 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | ||
| 116 | if ((cur_attr.attr_page == attr->attr_page) && | ||
| 117 | (cur_attr.attr_id == attr->attr_id)) { | ||
| 118 | attr->len = cur_attr.len; | ||
| 119 | attr->val_ptr = cur_attr.val_ptr; | ||
| 120 | return 0; | ||
| 121 | } | ||
| 122 | } while (iter); | ||
| 123 | |||
| 124 | return -EIO; | ||
| 125 | } | ||
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h new file mode 100644 index 000000000000..423033addd1f --- /dev/null +++ b/fs/exofs/pnfs.h | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008, 2009 | ||
| 3 | * Boaz Harrosh <bharrosh@panasas.com> | ||
| 4 | * | ||
| 5 | * This file is part of exofs. | ||
| 6 | * | ||
| 7 | * exofs is free software; you can redistribute it and/or modify it under the | ||
| 8 | * terms of the GNU General Public License version 2 as published by the Free | ||
| 9 | * Software Foundation. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | /* FIXME: Remove this file once pnfs hits mainline */ | ||
| 14 | |||
| 15 | #ifndef __EXOFS_PNFS_H__ | ||
| 16 | #define __EXOFS_PNFS_H__ | ||
| 17 | |||
| 18 | #if defined(CONFIG_PNFS) | ||
| 19 | |||
| 20 | |||
| 21 | /* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */ | ||
| 22 | #include "../nfs/objlayout/pnfs_osd_xdr.h" | ||
| 23 | |||
| 24 | #else /* defined(CONFIG_PNFS) */ | ||
| 25 | |||
| 26 | enum pnfs_iomode { | ||
| 27 | IOMODE_READ = 1, | ||
| 28 | IOMODE_RW = 2, | ||
| 29 | IOMODE_ANY = 3, | ||
| 30 | }; | ||
| 31 | |||
| 32 | /* Layout Structure */ | ||
| 33 | enum pnfs_osd_raid_algorithm4 { | ||
| 34 | PNFS_OSD_RAID_0 = 1, | ||
| 35 | PNFS_OSD_RAID_4 = 2, | ||
| 36 | PNFS_OSD_RAID_5 = 3, | ||
| 37 | PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct pnfs_osd_data_map { | ||
| 41 | u32 odm_num_comps; | ||
| 42 | u64 odm_stripe_unit; | ||
| 43 | u32 odm_group_width; | ||
| 44 | u32 odm_group_depth; | ||
| 45 | u32 odm_mirror_cnt; | ||
| 46 | u32 odm_raid_algorithm; | ||
| 47 | }; | ||
| 48 | |||
| 49 | #endif /* else defined(CONFIG_PNFS) */ | ||
| 50 | |||
| 51 | #endif /* __EXOFS_PNFS_H__ */ | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 9f500dec3b59..a1d1e77b12eb 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
| @@ -203,49 +203,45 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
| 203 | { | 203 | { |
| 204 | struct exofs_sb_info *sbi; | 204 | struct exofs_sb_info *sbi; |
| 205 | struct exofs_fscb *fscb; | 205 | struct exofs_fscb *fscb; |
| 206 | struct osd_request *or; | 206 | struct exofs_io_state *ios; |
| 207 | struct osd_obj_id obj; | ||
| 208 | int ret = -ENOMEM; | 207 | int ret = -ENOMEM; |
| 209 | 208 | ||
| 210 | fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); | ||
| 211 | if (!fscb) { | ||
| 212 | EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); | ||
| 213 | return -ENOMEM; | ||
| 214 | } | ||
| 215 | |||
| 216 | lock_super(sb); | 209 | lock_super(sb); |
| 217 | sbi = sb->s_fs_info; | 210 | sbi = sb->s_fs_info; |
| 211 | fscb = &sbi->s_fscb; | ||
| 212 | |||
| 213 | ret = exofs_get_io_state(sbi, &ios); | ||
| 214 | if (ret) | ||
| 215 | goto out; | ||
| 216 | |||
| 217 | /* Note: We only write the changing part of the fscb. .i.e upto the | ||
| 218 | * the fscb->s_dev_table_oid member. There is no read-modify-write | ||
| 219 | * here. | ||
| 220 | */ | ||
| 221 | ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); | ||
| 222 | memset(fscb, 0, ios->length); | ||
| 218 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | 223 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
| 219 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); | 224 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); |
| 220 | fscb->s_magic = cpu_to_le16(sb->s_magic); | 225 | fscb->s_magic = cpu_to_le16(sb->s_magic); |
| 221 | fscb->s_newfs = 0; | 226 | fscb->s_newfs = 0; |
| 227 | fscb->s_version = EXOFS_FSCB_VER; | ||
| 222 | 228 | ||
| 223 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 229 | ios->obj.id = EXOFS_SUPER_ID; |
| 224 | if (unlikely(!or)) { | 230 | ios->offset = 0; |
| 225 | EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); | 231 | ios->kern_buff = fscb; |
| 226 | goto out; | 232 | ios->cred = sbi->s_cred; |
| 227 | } | ||
| 228 | 233 | ||
| 229 | obj.partition = sbi->s_pid; | 234 | ret = exofs_sbi_write(ios); |
| 230 | obj.id = EXOFS_SUPER_ID; | ||
| 231 | ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb)); | ||
| 232 | if (unlikely(ret)) { | 235 | if (unlikely(ret)) { |
| 233 | EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); | 236 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); |
| 234 | goto out; | ||
| 235 | } | ||
| 236 | |||
| 237 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
| 238 | if (unlikely(ret)) { | ||
| 239 | EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n"); | ||
| 240 | goto out; | 237 | goto out; |
| 241 | } | 238 | } |
| 242 | sb->s_dirt = 0; | 239 | sb->s_dirt = 0; |
| 243 | 240 | ||
| 244 | out: | 241 | out: |
| 245 | if (or) | 242 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); |
| 246 | osd_end_request(or); | 243 | exofs_put_io_state(ios); |
| 247 | unlock_super(sb); | 244 | unlock_super(sb); |
| 248 | kfree(fscb); | ||
| 249 | return ret; | 245 | return ret; |
| 250 | } | 246 | } |
| 251 | 247 | ||
| @@ -257,6 +253,29 @@ static void exofs_write_super(struct super_block *sb) | |||
| 257 | sb->s_dirt = 0; | 253 | sb->s_dirt = 0; |
| 258 | } | 254 | } |
| 259 | 255 | ||
| 256 | static void _exofs_print_device(const char *msg, const char *dev_path, | ||
| 257 | struct osd_dev *od, u64 pid) | ||
| 258 | { | ||
| 259 | const struct osd_dev_info *odi = osduld_device_info(od); | ||
| 260 | |||
| 261 | printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n", | ||
| 262 | msg, dev_path ?: "", odi->osdname, _LLU(pid)); | ||
| 263 | } | ||
| 264 | |||
| 265 | void exofs_free_sbi(struct exofs_sb_info *sbi) | ||
| 266 | { | ||
| 267 | while (sbi->s_numdevs) { | ||
| 268 | int i = --sbi->s_numdevs; | ||
| 269 | struct osd_dev *od = sbi->s_ods[i]; | ||
| 270 | |||
| 271 | if (od) { | ||
| 272 | sbi->s_ods[i] = NULL; | ||
| 273 | osduld_put_device(od); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | kfree(sbi); | ||
| 277 | } | ||
| 278 | |||
| 260 | /* | 279 | /* |
| 261 | * This function is called when the vfs is freeing the superblock. We just | 280 | * This function is called when the vfs is freeing the superblock. We just |
| 262 | * need to free our own part. | 281 | * need to free our own part. |
| @@ -279,11 +298,182 @@ static void exofs_put_super(struct super_block *sb) | |||
| 279 | msecs_to_jiffies(100)); | 298 | msecs_to_jiffies(100)); |
| 280 | } | 299 | } |
| 281 | 300 | ||
| 282 | osduld_put_device(sbi->s_dev); | 301 | _exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid); |
| 283 | kfree(sb->s_fs_info); | 302 | |
| 303 | exofs_free_sbi(sbi); | ||
| 284 | sb->s_fs_info = NULL; | 304 | sb->s_fs_info = NULL; |
| 285 | } | 305 | } |
| 286 | 306 | ||
| 307 | static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | ||
| 308 | struct exofs_device_table *dt) | ||
| 309 | { | ||
| 310 | sbi->data_map.odm_num_comps = | ||
| 311 | le32_to_cpu(dt->dt_data_map.cb_num_comps); | ||
| 312 | sbi->data_map.odm_stripe_unit = | ||
| 313 | le64_to_cpu(dt->dt_data_map.cb_stripe_unit); | ||
| 314 | sbi->data_map.odm_group_width = | ||
| 315 | le32_to_cpu(dt->dt_data_map.cb_group_width); | ||
| 316 | sbi->data_map.odm_group_depth = | ||
| 317 | le32_to_cpu(dt->dt_data_map.cb_group_depth); | ||
| 318 | sbi->data_map.odm_mirror_cnt = | ||
| 319 | le32_to_cpu(dt->dt_data_map.cb_mirror_cnt); | ||
| 320 | sbi->data_map.odm_raid_algorithm = | ||
| 321 | le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); | ||
| 322 | |||
| 323 | /* FIXME: Hard coded mirror only for now. if not so do not mount */ | ||
| 324 | if ((sbi->data_map.odm_num_comps != numdevs) || | ||
| 325 | (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) || | ||
| 326 | (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) || | ||
| 327 | (sbi->data_map.odm_mirror_cnt != (numdevs - 1))) | ||
| 328 | return -EINVAL; | ||
| 329 | else | ||
| 330 | return 0; | ||
| 331 | } | ||
| 332 | |||
| 333 | /* @odi is valid only as long as @fscb_dev is valid */ | ||
| 334 | static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, | ||
| 335 | struct osd_dev_info *odi) | ||
| 336 | { | ||
| 337 | odi->systemid_len = le32_to_cpu(dt_dev->systemid_len); | ||
| 338 | memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len); | ||
| 339 | |||
| 340 | odi->osdname_len = le32_to_cpu(dt_dev->osdname_len); | ||
| 341 | odi->osdname = dt_dev->osdname; | ||
| 342 | |||
| 343 | /* FIXME support long names. Will need a _put function */ | ||
| 344 | if (dt_dev->long_name_offset) | ||
| 345 | return -EINVAL; | ||
| 346 | |||
| 347 | /* Make sure osdname is printable! | ||
| 348 | * mkexofs should give us space for a null-terminator else the | ||
| 349 | * device-table is invalid. | ||
| 350 | */ | ||
| 351 | if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname))) | ||
| 352 | odi->osdname_len = sizeof(dt_dev->osdname) - 1; | ||
| 353 | dt_dev->osdname[odi->osdname_len] = 0; | ||
| 354 | |||
| 355 | /* If it's all zeros something is bad we read past end-of-obj */ | ||
| 356 | return !(odi->systemid_len || odi->osdname_len); | ||
| 357 | } | ||
| 358 | |||
| 359 | static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | ||
| 360 | unsigned table_count) | ||
| 361 | { | ||
| 362 | struct exofs_sb_info *sbi = *psbi; | ||
| 363 | struct osd_dev *fscb_od; | ||
| 364 | struct osd_obj_id obj = {.partition = sbi->s_pid, | ||
| 365 | .id = EXOFS_DEVTABLE_ID}; | ||
| 366 | struct exofs_device_table *dt; | ||
| 367 | unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + | ||
| 368 | sizeof(*dt); | ||
| 369 | unsigned numdevs, i; | ||
| 370 | int ret; | ||
| 371 | |||
| 372 | dt = kmalloc(table_bytes, GFP_KERNEL); | ||
| 373 | if (unlikely(!dt)) { | ||
| 374 | EXOFS_ERR("ERROR: allocating %x bytes for device table\n", | ||
| 375 | table_bytes); | ||
| 376 | return -ENOMEM; | ||
| 377 | } | ||
| 378 | |||
| 379 | fscb_od = sbi->s_ods[0]; | ||
| 380 | sbi->s_ods[0] = NULL; | ||
| 381 | sbi->s_numdevs = 0; | ||
| 382 | ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); | ||
| 383 | if (unlikely(ret)) { | ||
| 384 | EXOFS_ERR("ERROR: reading device table\n"); | ||
| 385 | goto out; | ||
| 386 | } | ||
| 387 | |||
| 388 | numdevs = le64_to_cpu(dt->dt_num_devices); | ||
| 389 | if (unlikely(!numdevs)) { | ||
| 390 | ret = -EINVAL; | ||
| 391 | goto out; | ||
| 392 | } | ||
| 393 | WARN_ON(table_count != numdevs); | ||
| 394 | |||
| 395 | ret = _read_and_match_data_map(sbi, numdevs, dt); | ||
| 396 | if (unlikely(ret)) | ||
| 397 | goto out; | ||
| 398 | |||
| 399 | if (likely(numdevs > 1)) { | ||
| 400 | unsigned size = numdevs * sizeof(sbi->s_ods[0]); | ||
| 401 | |||
| 402 | sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); | ||
| 403 | if (unlikely(!sbi)) { | ||
| 404 | ret = -ENOMEM; | ||
| 405 | goto out; | ||
| 406 | } | ||
| 407 | memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0])); | ||
| 408 | *psbi = sbi; | ||
| 409 | } | ||
| 410 | |||
| 411 | for (i = 0; i < numdevs; i++) { | ||
| 412 | struct exofs_fscb fscb; | ||
| 413 | struct osd_dev_info odi; | ||
| 414 | struct osd_dev *od; | ||
| 415 | |||
| 416 | if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) { | ||
| 417 | EXOFS_ERR("ERROR: Read all-zeros device entry\n"); | ||
| 418 | ret = -EINVAL; | ||
| 419 | goto out; | ||
| 420 | } | ||
| 421 | |||
| 422 | printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", | ||
| 423 | i, odi.osdname); | ||
| 424 | |||
| 425 | /* On all devices the device table is identical. The user can | ||
| 426 | * specify any one of the participating devices on the command | ||
| 427 | * line. We always keep them in device-table order. | ||
| 428 | */ | ||
| 429 | if (fscb_od && osduld_device_same(fscb_od, &odi)) { | ||
| 430 | sbi->s_ods[i] = fscb_od; | ||
| 431 | ++sbi->s_numdevs; | ||
| 432 | fscb_od = NULL; | ||
| 433 | continue; | ||
| 434 | } | ||
| 435 | |||
| 436 | od = osduld_info_lookup(&odi); | ||
| 437 | if (unlikely(IS_ERR(od))) { | ||
| 438 | ret = PTR_ERR(od); | ||
| 439 | EXOFS_ERR("ERROR: device requested is not found " | ||
| 440 | "osd_name-%s =>%d\n", odi.osdname, ret); | ||
| 441 | goto out; | ||
| 442 | } | ||
| 443 | |||
| 444 | sbi->s_ods[i] = od; | ||
| 445 | ++sbi->s_numdevs; | ||
| 446 | |||
| 447 | /* Read the fscb of the other devices to make sure the FS | ||
| 448 | * partition is there. | ||
| 449 | */ | ||
| 450 | ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, | ||
| 451 | sizeof(fscb)); | ||
| 452 | if (unlikely(ret)) { | ||
| 453 | EXOFS_ERR("ERROR: Malformed participating device " | ||
| 454 | "error reading fscb osd_name-%s\n", | ||
| 455 | odi.osdname); | ||
| 456 | goto out; | ||
| 457 | } | ||
| 458 | |||
| 459 | /* TODO: verify other information is correct and FS-uuid | ||
| 460 | * matches. Benny what did you say about device table | ||
| 461 | * generation and old devices? | ||
| 462 | */ | ||
| 463 | } | ||
| 464 | |||
| 465 | out: | ||
| 466 | kfree(dt); | ||
| 467 | if (unlikely(!ret && fscb_od)) { | ||
| 468 | EXOFS_ERR( | ||
| 469 | "ERROR: Bad device-table container device not present\n"); | ||
| 470 | osduld_put_device(fscb_od); | ||
| 471 | ret = -EINVAL; | ||
| 472 | } | ||
| 473 | |||
| 474 | return ret; | ||
| 475 | } | ||
| 476 | |||
| 287 | /* | 477 | /* |
| 288 | * Read the superblock from the OSD and fill in the fields | 478 | * Read the superblock from the OSD and fill in the fields |
| 289 | */ | 479 | */ |
| @@ -292,24 +482,25 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 292 | struct inode *root; | 482 | struct inode *root; |
| 293 | struct exofs_mountopt *opts = data; | 483 | struct exofs_mountopt *opts = data; |
| 294 | struct exofs_sb_info *sbi; /*extended info */ | 484 | struct exofs_sb_info *sbi; /*extended info */ |
| 485 | struct osd_dev *od; /* Master device */ | ||
| 295 | struct exofs_fscb fscb; /*on-disk superblock info */ | 486 | struct exofs_fscb fscb; /*on-disk superblock info */ |
| 296 | struct osd_request *or = NULL; | ||
| 297 | struct osd_obj_id obj; | 487 | struct osd_obj_id obj; |
| 488 | unsigned table_count; | ||
| 298 | int ret; | 489 | int ret; |
| 299 | 490 | ||
| 300 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 491 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
| 301 | if (!sbi) | 492 | if (!sbi) |
| 302 | return -ENOMEM; | 493 | return -ENOMEM; |
| 303 | sb->s_fs_info = sbi; | ||
| 304 | 494 | ||
| 305 | /* use mount options to fill superblock */ | 495 | /* use mount options to fill superblock */ |
| 306 | sbi->s_dev = osduld_path_lookup(opts->dev_name); | 496 | od = osduld_path_lookup(opts->dev_name); |
| 307 | if (IS_ERR(sbi->s_dev)) { | 497 | if (IS_ERR(od)) { |
| 308 | ret = PTR_ERR(sbi->s_dev); | 498 | ret = PTR_ERR(od); |
| 309 | sbi->s_dev = NULL; | ||
| 310 | goto free_sbi; | 499 | goto free_sbi; |
| 311 | } | 500 | } |
| 312 | 501 | ||
| 502 | sbi->s_ods[0] = od; | ||
| 503 | sbi->s_numdevs = 1; | ||
| 313 | sbi->s_pid = opts->pid; | 504 | sbi->s_pid = opts->pid; |
| 314 | sbi->s_timeout = opts->timeout; | 505 | sbi->s_timeout = opts->timeout; |
| 315 | 506 | ||
| @@ -323,35 +514,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 323 | sb->s_bdev = NULL; | 514 | sb->s_bdev = NULL; |
| 324 | sb->s_dev = 0; | 515 | sb->s_dev = 0; |
| 325 | 516 | ||
| 326 | /* read data from on-disk superblock object */ | ||
| 327 | obj.partition = sbi->s_pid; | 517 | obj.partition = sbi->s_pid; |
| 328 | obj.id = EXOFS_SUPER_ID; | 518 | obj.id = EXOFS_SUPER_ID; |
| 329 | exofs_make_credential(sbi->s_cred, &obj); | 519 | exofs_make_credential(sbi->s_cred, &obj); |
| 330 | 520 | ||
| 331 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 521 | ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); |
| 332 | if (unlikely(!or)) { | 522 | if (unlikely(ret)) |
| 333 | if (!silent) | ||
| 334 | EXOFS_ERR( | ||
| 335 | "exofs_fill_super: osd_start_request failed.\n"); | ||
| 336 | ret = -ENOMEM; | ||
| 337 | goto free_sbi; | ||
| 338 | } | ||
| 339 | ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb)); | ||
| 340 | if (unlikely(ret)) { | ||
| 341 | if (!silent) | ||
| 342 | EXOFS_ERR( | ||
| 343 | "exofs_fill_super: osd_req_read_kern failed.\n"); | ||
| 344 | ret = -ENOMEM; | ||
| 345 | goto free_sbi; | ||
| 346 | } | ||
| 347 | |||
| 348 | ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | ||
| 349 | if (unlikely(ret)) { | ||
| 350 | if (!silent) | ||
| 351 | EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n"); | ||
| 352 | ret = -EIO; | ||
| 353 | goto free_sbi; | 523 | goto free_sbi; |
| 354 | } | ||
| 355 | 524 | ||
| 356 | sb->s_magic = le16_to_cpu(fscb.s_magic); | 525 | sb->s_magic = le16_to_cpu(fscb.s_magic); |
| 357 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); | 526 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); |
| @@ -364,12 +533,26 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 364 | ret = -EINVAL; | 533 | ret = -EINVAL; |
| 365 | goto free_sbi; | 534 | goto free_sbi; |
| 366 | } | 535 | } |
| 536 | if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { | ||
| 537 | EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", | ||
| 538 | EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); | ||
| 539 | ret = -EINVAL; | ||
| 540 | goto free_sbi; | ||
| 541 | } | ||
| 367 | 542 | ||
| 368 | /* start generation numbers from a random point */ | 543 | /* start generation numbers from a random point */ |
| 369 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 544 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
| 370 | spin_lock_init(&sbi->s_next_gen_lock); | 545 | spin_lock_init(&sbi->s_next_gen_lock); |
| 371 | 546 | ||
| 547 | table_count = le64_to_cpu(fscb.s_dev_table_count); | ||
| 548 | if (table_count) { | ||
| 549 | ret = exofs_read_lookup_dev_table(&sbi, table_count); | ||
| 550 | if (unlikely(ret)) | ||
| 551 | goto free_sbi; | ||
| 552 | } | ||
| 553 | |||
| 372 | /* set up operation vectors */ | 554 | /* set up operation vectors */ |
| 555 | sb->s_fs_info = sbi; | ||
| 373 | sb->s_op = &exofs_sops; | 556 | sb->s_op = &exofs_sops; |
| 374 | sb->s_export_op = &exofs_export_ops; | 557 | sb->s_export_op = &exofs_export_ops; |
| 375 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); | 558 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); |
| @@ -395,16 +578,15 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 395 | goto free_sbi; | 578 | goto free_sbi; |
| 396 | } | 579 | } |
| 397 | 580 | ||
| 398 | ret = 0; | 581 | _exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0], |
| 399 | out: | 582 | sbi->s_pid); |
| 400 | if (or) | 583 | return 0; |
| 401 | osd_end_request(or); | ||
| 402 | return ret; | ||
| 403 | 584 | ||
| 404 | free_sbi: | 585 | free_sbi: |
| 405 | osduld_put_device(sbi->s_dev); /* NULL safe */ | 586 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", |
| 406 | kfree(sbi); | 587 | opts->dev_name, sbi->s_pid, ret); |
| 407 | goto out; | 588 | exofs_free_sbi(sbi); |
| 589 | return ret; | ||
| 408 | } | 590 | } |
| 409 | 591 | ||
| 410 | /* | 592 | /* |
| @@ -433,7 +615,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 433 | { | 615 | { |
| 434 | struct super_block *sb = dentry->d_sb; | 616 | struct super_block *sb = dentry->d_sb; |
| 435 | struct exofs_sb_info *sbi = sb->s_fs_info; | 617 | struct exofs_sb_info *sbi = sb->s_fs_info; |
| 436 | struct osd_obj_id obj = {sbi->s_pid, 0}; | 618 | struct exofs_io_state *ios; |
| 437 | struct osd_attr attrs[] = { | 619 | struct osd_attr attrs[] = { |
| 438 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, | 620 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, |
| 439 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), | 621 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), |
| @@ -442,32 +624,33 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 442 | }; | 624 | }; |
| 443 | uint64_t capacity = ULLONG_MAX; | 625 | uint64_t capacity = ULLONG_MAX; |
| 444 | uint64_t used = ULLONG_MAX; | 626 | uint64_t used = ULLONG_MAX; |
| 445 | struct osd_request *or; | ||
| 446 | uint8_t cred_a[OSD_CAP_LEN]; | 627 | uint8_t cred_a[OSD_CAP_LEN]; |
| 447 | int ret; | 628 | int ret; |
| 448 | 629 | ||
| 449 | /* get used/capacity attributes */ | 630 | ret = exofs_get_io_state(sbi, &ios); |
| 450 | exofs_make_credential(cred_a, &obj); | 631 | if (ret) { |
| 451 | 632 | EXOFS_DBGMSG("exofs_get_io_state failed.\n"); | |
| 452 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 633 | return ret; |
| 453 | if (unlikely(!or)) { | ||
| 454 | EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n"); | ||
| 455 | return -ENOMEM; | ||
| 456 | } | 634 | } |
| 457 | 635 | ||
| 458 | osd_req_get_attributes(or, &obj); | 636 | exofs_make_credential(cred_a, &ios->obj); |
| 459 | osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); | 637 | ios->cred = sbi->s_cred; |
| 460 | ret = exofs_sync_op(or, sbi->s_timeout, cred_a); | 638 | ios->in_attr = attrs; |
| 639 | ios->in_attr_len = ARRAY_SIZE(attrs); | ||
| 640 | |||
| 641 | ret = exofs_sbi_read(ios); | ||
| 461 | if (unlikely(ret)) | 642 | if (unlikely(ret)) |
| 462 | goto out; | 643 | goto out; |
| 463 | 644 | ||
| 464 | ret = extract_attr_from_req(or, &attrs[0]); | 645 | ret = extract_attr_from_ios(ios, &attrs[0]); |
| 465 | if (likely(!ret)) | 646 | if (likely(!ret)) { |
| 466 | capacity = get_unaligned_be64(attrs[0].val_ptr); | 647 | capacity = get_unaligned_be64(attrs[0].val_ptr); |
| 467 | else | 648 | if (unlikely(!capacity)) |
| 649 | capacity = ULLONG_MAX; | ||
| 650 | } else | ||
| 468 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); | 651 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); |
| 469 | 652 | ||
| 470 | ret = extract_attr_from_req(or, &attrs[1]); | 653 | ret = extract_attr_from_ios(ios, &attrs[1]); |
| 471 | if (likely(!ret)) | 654 | if (likely(!ret)) |
| 472 | used = get_unaligned_be64(attrs[1].val_ptr); | 655 | used = get_unaligned_be64(attrs[1].val_ptr); |
| 473 | else | 656 | else |
| @@ -476,15 +659,15 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 476 | /* fill in the stats buffer */ | 659 | /* fill in the stats buffer */ |
| 477 | buf->f_type = EXOFS_SUPER_MAGIC; | 660 | buf->f_type = EXOFS_SUPER_MAGIC; |
| 478 | buf->f_bsize = EXOFS_BLKSIZE; | 661 | buf->f_bsize = EXOFS_BLKSIZE; |
| 479 | buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); | 662 | buf->f_blocks = capacity >> 9; |
| 480 | buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); | 663 | buf->f_bfree = (capacity - used) >> 9; |
| 481 | buf->f_bavail = buf->f_bfree; | 664 | buf->f_bavail = buf->f_bfree; |
| 482 | buf->f_files = sbi->s_numfiles; | 665 | buf->f_files = sbi->s_numfiles; |
| 483 | buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; | 666 | buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; |
| 484 | buf->f_namelen = EXOFS_NAME_LEN; | 667 | buf->f_namelen = EXOFS_NAME_LEN; |
| 485 | 668 | ||
| 486 | out: | 669 | out: |
| 487 | osd_end_request(or); | 670 | exofs_put_io_state(ios); |
| 488 | return ret; | 671 | return ret; |
| 489 | } | 672 | } |
| 490 | 673 | ||
