diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-09 12:37:25 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-09 12:37:25 -0400 |
| commit | 882388056194d2d4c3f589b194b6bdcc47e677e8 (patch) | |
| tree | 71279f464d13e3fa08ea3e5f72e2b40b4592a96b | |
| parent | dce45af5c2e9e85f22578f2f8065f225f5d11764 (diff) | |
| parent | 33713cd09ccdc1e01b10d0782ae60200d4989553 (diff) | |
Merge tag 'for-linus-5.2-ofs1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux
Pull orangefs updates from Mike Marshall:
"This includes one fix and our "Orangefs through the pagecache" patch
series which greatly improves our small IO performance and helps us
pass more xfstests than before.
Fix:
- orangefs: truncate before updating size
Pagecache series:
- all the rest"
* tag 'for-linus-5.2-ofs1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux: (23 commits)
orangefs: truncate before updating size
orangefs: copy Orangefs-sized blocks into the pagecache if possible.
orangefs: pass slot index back to readpage.
orangefs: remember count when reading.
orangefs: add orangefs_revalidate_mapping
orangefs: implement writepages
orangefs: write range tracking
orangefs: avoid fsync service operation on flush
orangefs: skip inode writeout if nothing to write
orangefs: move do_readv_writev to direct_IO
orangefs: do not return successful read when the client-core disappeared
orangefs: implement writepage
orangefs: migrate to generic_file_read_iter
orangefs: service ops done for writeback are not killable
orangefs: remove orangefs_readpages
orangefs: reorganize setattr functions to track attribute changes
orangefs: let setattr write to cached inode
orangefs: set up and use backing_dev_info
orangefs: hold i_lock during inode_getattr
orangefs: update attributes rather than relying on server
...
| -rw-r--r-- | fs/orangefs/acl.c | 4 | ||||
| -rw-r--r-- | fs/orangefs/file.c | 389 | ||||
| -rw-r--r-- | fs/orangefs/inode.c | 914 | ||||
| -rw-r--r-- | fs/orangefs/namei.c | 40 | ||||
| -rw-r--r-- | fs/orangefs/orangefs-bufmap.c | 13 | ||||
| -rw-r--r-- | fs/orangefs/orangefs-bufmap.h | 2 | ||||
| -rw-r--r-- | fs/orangefs/orangefs-debugfs.c | 4 | ||||
| -rw-r--r-- | fs/orangefs/orangefs-kernel.h | 56 | ||||
| -rw-r--r-- | fs/orangefs/orangefs-mod.c | 1 | ||||
| -rw-r--r-- | fs/orangefs/orangefs-sysfs.c | 22 | ||||
| -rw-r--r-- | fs/orangefs/orangefs-utils.c | 179 | ||||
| -rw-r--r-- | fs/orangefs/super.c | 42 | ||||
| -rw-r--r-- | fs/orangefs/waitqueue.c | 18 | ||||
| -rw-r--r-- | fs/orangefs/xattr.c | 106 |
14 files changed, 1300 insertions, 490 deletions
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index 72d2ff17d27b..eced272a3c57 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c | |||
| @@ -142,7 +142,7 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
| 142 | rc = __orangefs_set_acl(inode, acl, type); | 142 | rc = __orangefs_set_acl(inode, acl, type); |
| 143 | } else { | 143 | } else { |
| 144 | iattr.ia_valid = ATTR_MODE; | 144 | iattr.ia_valid = ATTR_MODE; |
| 145 | rc = orangefs_inode_setattr(inode, &iattr); | 145 | rc = __orangefs_setattr(inode, &iattr); |
| 146 | } | 146 | } |
| 147 | 147 | ||
| 148 | return rc; | 148 | return rc; |
| @@ -185,7 +185,7 @@ int orangefs_init_acl(struct inode *inode, struct inode *dir) | |||
| 185 | inode->i_mode = mode; | 185 | inode->i_mode = mode; |
| 186 | iattr.ia_mode = mode; | 186 | iattr.ia_mode = mode; |
| 187 | iattr.ia_valid |= ATTR_MODE; | 187 | iattr.ia_valid |= ATTR_MODE; |
| 188 | orangefs_inode_setattr(inode, &iattr); | 188 | __orangefs_setattr(inode, &iattr); |
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | return error; | 191 | return error; |
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index b094d3d79354..a35c17017210 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* | 2 | /* |
| 3 | * (C) 2001 Clemson University and The University of Chicago | 3 | * (C) 2001 Clemson University and The University of Chicago |
| 4 | * Copyright 2018 Omnibond Systems, L.L.C. | ||
| 4 | * | 5 | * |
| 5 | * See COPYING in top-level directory. | 6 | * See COPYING in top-level directory. |
| 6 | */ | 7 | */ |
| @@ -44,15 +45,16 @@ static int flush_racache(struct inode *inode) | |||
| 44 | /* | 45 | /* |
| 45 | * Post and wait for the I/O upcall to finish | 46 | * Post and wait for the I/O upcall to finish |
| 46 | */ | 47 | */ |
| 47 | static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, | 48 | ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, |
| 48 | loff_t *offset, struct iov_iter *iter, | 49 | loff_t *offset, struct iov_iter *iter, size_t total_size, |
| 49 | size_t total_size, loff_t readahead_size) | 50 | loff_t readahead_size, struct orangefs_write_range *wr, int *index_return) |
| 50 | { | 51 | { |
| 51 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 52 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 52 | struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; | 53 | struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; |
| 53 | struct orangefs_kernel_op_s *new_op = NULL; | 54 | struct orangefs_kernel_op_s *new_op = NULL; |
| 54 | int buffer_index = -1; | 55 | int buffer_index = -1; |
| 55 | ssize_t ret; | 56 | ssize_t ret; |
| 57 | size_t copy_amount; | ||
| 56 | 58 | ||
| 57 | new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); | 59 | new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); |
| 58 | if (!new_op) | 60 | if (!new_op) |
| @@ -84,6 +86,10 @@ populate_shared_memory: | |||
| 84 | new_op->upcall.req.io.buf_index = buffer_index; | 86 | new_op->upcall.req.io.buf_index = buffer_index; |
| 85 | new_op->upcall.req.io.count = total_size; | 87 | new_op->upcall.req.io.count = total_size; |
| 86 | new_op->upcall.req.io.offset = *offset; | 88 | new_op->upcall.req.io.offset = *offset; |
| 89 | if (type == ORANGEFS_IO_WRITE && wr) { | ||
| 90 | new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid); | ||
| 91 | new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid); | ||
| 92 | } | ||
| 87 | 93 | ||
| 88 | gossip_debug(GOSSIP_FILE_DEBUG, | 94 | gossip_debug(GOSSIP_FILE_DEBUG, |
| 89 | "%s(%pU): offset: %llu total_size: %zd\n", | 95 | "%s(%pU): offset: %llu total_size: %zd\n", |
| @@ -168,7 +174,10 @@ populate_shared_memory: | |||
| 168 | * trigger the write. | 174 | * trigger the write. |
| 169 | */ | 175 | */ |
| 170 | case OP_VFS_STATE_INPROGR: | 176 | case OP_VFS_STATE_INPROGR: |
| 171 | ret = total_size; | 177 | if (type == ORANGEFS_IO_READ) |
| 178 | ret = -EINTR; | ||
| 179 | else | ||
| 180 | ret = total_size; | ||
| 172 | break; | 181 | break; |
| 173 | default: | 182 | default: |
| 174 | gossip_err("%s: unexpected op state :%d:.\n", | 183 | gossip_err("%s: unexpected op state :%d:.\n", |
| @@ -204,8 +213,25 @@ populate_shared_memory: | |||
| 204 | * can futher be kernel-space or user-space addresses. | 213 | * can futher be kernel-space or user-space addresses. |
| 205 | * or it can pointers to struct page's | 214 | * or it can pointers to struct page's |
| 206 | */ | 215 | */ |
| 216 | |||
| 217 | /* | ||
| 218 | * When reading, readahead_size will only be zero when | ||
| 219 | * we're doing O_DIRECT, otherwise we got here from | ||
| 220 | * orangefs_readpage. | ||
| 221 | * | ||
| 222 | * If we got here from orangefs_readpage we want to | ||
| 223 | * copy either a page or the whole file into the io | ||
| 224 | * vector, whichever is smaller. | ||
| 225 | */ | ||
| 226 | if (readahead_size) | ||
| 227 | copy_amount = | ||
| 228 | min(new_op->downcall.resp.io.amt_complete, | ||
| 229 | (__s64)PAGE_SIZE); | ||
| 230 | else | ||
| 231 | copy_amount = new_op->downcall.resp.io.amt_complete; | ||
| 232 | |||
| 207 | ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, | 233 | ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, |
| 208 | new_op->downcall.resp.io.amt_complete); | 234 | copy_amount); |
| 209 | if (ret < 0) { | 235 | if (ret < 0) { |
| 210 | gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", | 236 | gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", |
| 211 | __func__, (long)ret); | 237 | __func__, (long)ret); |
| @@ -223,246 +249,112 @@ populate_shared_memory: | |||
| 223 | 249 | ||
| 224 | out: | 250 | out: |
| 225 | if (buffer_index >= 0) { | 251 | if (buffer_index >= 0) { |
| 226 | orangefs_bufmap_put(buffer_index); | 252 | if ((readahead_size) && (type == ORANGEFS_IO_READ)) { |
| 227 | gossip_debug(GOSSIP_FILE_DEBUG, | 253 | /* readpage */ |
| 228 | "%s(%pU): PUT buffer_index %d\n", | 254 | *index_return = buffer_index; |
| 229 | __func__, handle, buffer_index); | 255 | gossip_debug(GOSSIP_FILE_DEBUG, |
| 256 | "%s: hold on to buffer_index :%d:\n", | ||
| 257 | __func__, buffer_index); | ||
| 258 | } else { | ||
| 259 | /* O_DIRECT */ | ||
| 260 | orangefs_bufmap_put(buffer_index); | ||
| 261 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 262 | "%s(%pU): PUT buffer_index %d\n", | ||
| 263 | __func__, handle, buffer_index); | ||
| 264 | } | ||
| 230 | buffer_index = -1; | 265 | buffer_index = -1; |
| 231 | } | 266 | } |
| 232 | op_release(new_op); | 267 | op_release(new_op); |
| 233 | return ret; | 268 | return ret; |
| 234 | } | 269 | } |
| 235 | 270 | ||
| 236 | /* | 271 | int orangefs_revalidate_mapping(struct inode *inode) |
| 237 | * Common entry point for read/write/readv/writev | ||
| 238 | * This function will dispatch it to either the direct I/O | ||
| 239 | * or buffered I/O path depending on the mount options and/or | ||
| 240 | * augmented/extended metadata attached to the file. | ||
| 241 | * Note: File extended attributes override any mount options. | ||
| 242 | */ | ||
| 243 | static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, | ||
| 244 | loff_t *offset, struct iov_iter *iter) | ||
| 245 | { | 272 | { |
| 246 | struct inode *inode = file->f_mapping->host; | ||
| 247 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 273 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 248 | struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; | 274 | struct address_space *mapping = inode->i_mapping; |
| 249 | size_t count = iov_iter_count(iter); | 275 | unsigned long *bitlock = &orangefs_inode->bitlock; |
| 250 | ssize_t total_count = 0; | 276 | int ret; |
| 251 | ssize_t ret = -EINVAL; | ||
| 252 | |||
| 253 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 254 | "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", | ||
| 255 | __func__, | ||
| 256 | handle, | ||
| 257 | (int)count); | ||
| 258 | |||
| 259 | if (type == ORANGEFS_IO_WRITE) { | ||
| 260 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 261 | "%s(%pU): proceeding with offset : %llu, " | ||
| 262 | "size %d\n", | ||
| 263 | __func__, | ||
| 264 | handle, | ||
| 265 | llu(*offset), | ||
| 266 | (int)count); | ||
| 267 | } | ||
| 268 | 277 | ||
| 269 | if (count == 0) { | 278 | while (1) { |
| 270 | ret = 0; | 279 | ret = wait_on_bit(bitlock, 1, TASK_KILLABLE); |
| 271 | goto out; | 280 | if (ret) |
| 281 | return ret; | ||
| 282 | spin_lock(&inode->i_lock); | ||
| 283 | if (test_bit(1, bitlock)) { | ||
| 284 | spin_unlock(&inode->i_lock); | ||
| 285 | continue; | ||
| 286 | } | ||
| 287 | if (!time_before(jiffies, orangefs_inode->mapping_time)) | ||
| 288 | break; | ||
| 289 | spin_unlock(&inode->i_lock); | ||
| 290 | return 0; | ||
| 272 | } | 291 | } |
| 273 | 292 | ||
| 274 | while (iov_iter_count(iter)) { | 293 | set_bit(1, bitlock); |
| 275 | size_t each_count = iov_iter_count(iter); | 294 | smp_wmb(); |
| 276 | size_t amt_complete; | 295 | spin_unlock(&inode->i_lock); |
| 277 | |||
| 278 | /* how much to transfer in this loop iteration */ | ||
| 279 | if (each_count > orangefs_bufmap_size_query()) | ||
| 280 | each_count = orangefs_bufmap_size_query(); | ||
| 281 | 296 | ||
| 282 | gossip_debug(GOSSIP_FILE_DEBUG, | 297 | unmap_mapping_range(mapping, 0, 0, 0); |
| 283 | "%s(%pU): size of each_count(%d)\n", | 298 | ret = filemap_write_and_wait(mapping); |
| 284 | __func__, | 299 | if (!ret) |
| 285 | handle, | 300 | ret = invalidate_inode_pages2(mapping); |
| 286 | (int)each_count); | ||
| 287 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 288 | "%s(%pU): BEFORE wait_for_io: offset is %d\n", | ||
| 289 | __func__, | ||
| 290 | handle, | ||
| 291 | (int)*offset); | ||
| 292 | |||
| 293 | ret = wait_for_direct_io(type, inode, offset, iter, | ||
| 294 | each_count, 0); | ||
| 295 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 296 | "%s(%pU): return from wait_for_io:%d\n", | ||
| 297 | __func__, | ||
| 298 | handle, | ||
| 299 | (int)ret); | ||
| 300 | 301 | ||
| 301 | if (ret < 0) | 302 | orangefs_inode->mapping_time = jiffies + |
| 302 | goto out; | 303 | orangefs_cache_timeout_msecs*HZ/1000; |
| 303 | |||
| 304 | *offset += ret; | ||
| 305 | total_count += ret; | ||
| 306 | amt_complete = ret; | ||
| 307 | 304 | ||
| 308 | gossip_debug(GOSSIP_FILE_DEBUG, | 305 | clear_bit(1, bitlock); |
| 309 | "%s(%pU): AFTER wait_for_io: offset is %d\n", | 306 | smp_mb__after_atomic(); |
| 310 | __func__, | 307 | wake_up_bit(bitlock, 1); |
| 311 | handle, | ||
| 312 | (int)*offset); | ||
| 313 | |||
| 314 | /* | ||
| 315 | * if we got a short I/O operations, | ||
| 316 | * fall out and return what we got so far | ||
| 317 | */ | ||
| 318 | if (amt_complete < each_count) | ||
| 319 | break; | ||
| 320 | } /*end while */ | ||
| 321 | |||
| 322 | out: | ||
| 323 | if (total_count > 0) | ||
| 324 | ret = total_count; | ||
| 325 | if (ret > 0) { | ||
| 326 | if (type == ORANGEFS_IO_READ) { | ||
| 327 | file_accessed(file); | ||
| 328 | } else { | ||
| 329 | file_update_time(file); | ||
| 330 | /* | ||
| 331 | * Must invalidate to ensure write loop doesn't | ||
| 332 | * prevent kernel from reading updated | ||
| 333 | * attribute. Size probably changed because of | ||
| 334 | * the write, and other clients could update | ||
| 335 | * any other attribute. | ||
| 336 | */ | ||
| 337 | orangefs_inode->getattr_time = jiffies - 1; | ||
| 338 | } | ||
| 339 | } | ||
| 340 | |||
| 341 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 342 | "%s(%pU): Value(%d) returned.\n", | ||
| 343 | __func__, | ||
| 344 | handle, | ||
| 345 | (int)ret); | ||
| 346 | 308 | ||
| 347 | return ret; | 309 | return ret; |
| 348 | } | 310 | } |
| 349 | 311 | ||
| 350 | /* | 312 | static ssize_t orangefs_file_read_iter(struct kiocb *iocb, |
| 351 | * Read data from a specified offset in a file (referenced by inode). | 313 | struct iov_iter *iter) |
| 352 | * Data may be placed either in a user or kernel buffer. | ||
| 353 | */ | ||
| 354 | ssize_t orangefs_inode_read(struct inode *inode, | ||
| 355 | struct iov_iter *iter, | ||
| 356 | loff_t *offset, | ||
| 357 | loff_t readahead_size) | ||
| 358 | { | 314 | { |
| 359 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 315 | int ret; |
| 360 | size_t count = iov_iter_count(iter); | 316 | struct orangefs_read_options *ro; |
| 361 | size_t bufmap_size; | ||
| 362 | ssize_t ret = -EINVAL; | ||
| 363 | 317 | ||
| 364 | orangefs_stats.reads++; | 318 | orangefs_stats.reads++; |
| 365 | 319 | ||
| 366 | bufmap_size = orangefs_bufmap_size_query(); | 320 | /* |
| 367 | if (count > bufmap_size) { | 321 | * Remember how they set "count" in read(2) or pread(2) or whatever - |
| 368 | gossip_debug(GOSSIP_FILE_DEBUG, | 322 | * users can use count as a knob to control orangefs io size and later |
| 369 | "%s: count is too large (%zd/%zd)!\n", | 323 | * we can try to help them fill as many pages as possible in readpage. |
| 370 | __func__, count, bufmap_size); | 324 | */ |
| 371 | return -EINVAL; | 325 | if (!iocb->ki_filp->private_data) { |
| 326 | iocb->ki_filp->private_data = kmalloc(sizeof *ro, GFP_KERNEL); | ||
| 327 | if (!iocb->ki_filp->private_data) | ||
| 328 | return(ENOMEM); | ||
| 329 | ro = iocb->ki_filp->private_data; | ||
| 330 | ro->blksiz = iter->count; | ||
| 372 | } | 331 | } |
| 373 | 332 | ||
| 374 | gossip_debug(GOSSIP_FILE_DEBUG, | 333 | down_read(&file_inode(iocb->ki_filp)->i_rwsem); |
| 375 | "%s(%pU) %zd@%llu\n", | 334 | ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); |
| 376 | __func__, | 335 | if (ret) |
| 377 | &orangefs_inode->refn.khandle, | 336 | goto out; |
| 378 | count, | ||
| 379 | llu(*offset)); | ||
| 380 | |||
| 381 | ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter, | ||
| 382 | count, readahead_size); | ||
| 383 | if (ret > 0) | ||
| 384 | *offset += ret; | ||
| 385 | |||
| 386 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 387 | "%s(%pU): Value(%zd) returned.\n", | ||
| 388 | __func__, | ||
| 389 | &orangefs_inode->refn.khandle, | ||
| 390 | ret); | ||
| 391 | 337 | ||
| 338 | ret = generic_file_read_iter(iocb, iter); | ||
| 339 | out: | ||
| 340 | up_read(&file_inode(iocb->ki_filp)->i_rwsem); | ||
| 392 | return ret; | 341 | return ret; |
| 393 | } | 342 | } |
| 394 | 343 | ||
| 395 | static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) | 344 | static ssize_t orangefs_file_write_iter(struct kiocb *iocb, |
| 345 | struct iov_iter *iter) | ||
| 396 | { | 346 | { |
| 397 | struct file *file = iocb->ki_filp; | 347 | int ret; |
| 398 | loff_t pos = iocb->ki_pos; | ||
| 399 | ssize_t rc = 0; | ||
| 400 | |||
| 401 | gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n"); | ||
| 402 | |||
| 403 | orangefs_stats.reads++; | ||
| 404 | |||
| 405 | rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter); | ||
| 406 | iocb->ki_pos = pos; | ||
| 407 | |||
| 408 | return rc; | ||
| 409 | } | ||
| 410 | |||
| 411 | static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) | ||
| 412 | { | ||
| 413 | struct file *file = iocb->ki_filp; | ||
| 414 | loff_t pos; | ||
| 415 | ssize_t rc; | ||
| 416 | |||
| 417 | gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n"); | ||
| 418 | |||
| 419 | inode_lock(file->f_mapping->host); | ||
| 420 | |||
| 421 | /* Make sure generic_write_checks sees an up to date inode size. */ | ||
| 422 | if (file->f_flags & O_APPEND) { | ||
| 423 | rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1, | ||
| 424 | STATX_SIZE); | ||
| 425 | if (rc == -ESTALE) | ||
| 426 | rc = -EIO; | ||
| 427 | if (rc) { | ||
| 428 | gossip_err("%s: orangefs_inode_getattr failed, " | ||
| 429 | "rc:%zd:.\n", __func__, rc); | ||
| 430 | goto out; | ||
| 431 | } | ||
| 432 | } | ||
| 433 | |||
| 434 | rc = generic_write_checks(iocb, iter); | ||
| 435 | |||
| 436 | if (rc <= 0) { | ||
| 437 | gossip_err("%s: generic_write_checks failed, rc:%zd:.\n", | ||
| 438 | __func__, rc); | ||
| 439 | goto out; | ||
| 440 | } | ||
| 441 | |||
| 442 | /* | ||
| 443 | * if we are appending, generic_write_checks would have updated | ||
| 444 | * pos to the end of the file, so we will wait till now to set | ||
| 445 | * pos... | ||
| 446 | */ | ||
| 447 | pos = iocb->ki_pos; | ||
| 448 | |||
| 449 | rc = do_readv_writev(ORANGEFS_IO_WRITE, | ||
| 450 | file, | ||
| 451 | &pos, | ||
| 452 | iter); | ||
| 453 | if (rc < 0) { | ||
| 454 | gossip_err("%s: do_readv_writev failed, rc:%zd:.\n", | ||
| 455 | __func__, rc); | ||
| 456 | goto out; | ||
| 457 | } | ||
| 458 | |||
| 459 | iocb->ki_pos = pos; | ||
| 460 | orangefs_stats.writes++; | 348 | orangefs_stats.writes++; |
| 461 | 349 | ||
| 462 | out: | 350 | if (iocb->ki_pos > i_size_read(file_inode(iocb->ki_filp))) { |
| 351 | ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); | ||
| 352 | if (ret) | ||
| 353 | return ret; | ||
| 354 | } | ||
| 463 | 355 | ||
| 464 | inode_unlock(file->f_mapping->host); | 356 | ret = generic_file_write_iter(iocb, iter); |
| 465 | return rc; | 357 | return ret; |
| 466 | } | 358 | } |
| 467 | 359 | ||
| 468 | /* | 360 | /* |
| @@ -528,14 +420,13 @@ static vm_fault_t orangefs_fault(struct vm_fault *vmf) | |||
| 528 | { | 420 | { |
| 529 | struct file *file = vmf->vma->vm_file; | 421 | struct file *file = vmf->vma->vm_file; |
| 530 | int ret; | 422 | int ret; |
| 531 | 423 | ret = orangefs_inode_getattr(file->f_mapping->host, | |
| 532 | ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1, | 424 | ORANGEFS_GETATTR_SIZE); |
| 533 | STATX_SIZE); | ||
| 534 | if (ret == -ESTALE) | 425 | if (ret == -ESTALE) |
| 535 | ret = -EIO; | 426 | ret = -EIO; |
| 536 | if (ret) { | 427 | if (ret) { |
| 537 | gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n", | 428 | gossip_err("%s: orangefs_inode_getattr failed, " |
| 538 | __func__, ret); | 429 | "ret:%d:.\n", __func__, ret); |
| 539 | return VM_FAULT_SIGBUS; | 430 | return VM_FAULT_SIGBUS; |
| 540 | } | 431 | } |
| 541 | return filemap_fault(vmf); | 432 | return filemap_fault(vmf); |
| @@ -544,7 +435,7 @@ static vm_fault_t orangefs_fault(struct vm_fault *vmf) | |||
| 544 | static const struct vm_operations_struct orangefs_file_vm_ops = { | 435 | static const struct vm_operations_struct orangefs_file_vm_ops = { |
| 545 | .fault = orangefs_fault, | 436 | .fault = orangefs_fault, |
| 546 | .map_pages = filemap_map_pages, | 437 | .map_pages = filemap_map_pages, |
| 547 | .page_mkwrite = filemap_page_mkwrite, | 438 | .page_mkwrite = orangefs_page_mkwrite, |
| 548 | }; | 439 | }; |
| 549 | 440 | ||
| 550 | /* | 441 | /* |
| @@ -552,15 +443,18 @@ static const struct vm_operations_struct orangefs_file_vm_ops = { | |||
| 552 | */ | 443 | */ |
| 553 | static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) | 444 | static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) |
| 554 | { | 445 | { |
| 446 | int ret; | ||
| 447 | |||
| 448 | ret = orangefs_revalidate_mapping(file_inode(file)); | ||
| 449 | if (ret) | ||
| 450 | return ret; | ||
| 451 | |||
| 555 | gossip_debug(GOSSIP_FILE_DEBUG, | 452 | gossip_debug(GOSSIP_FILE_DEBUG, |
| 556 | "orangefs_file_mmap: called on %s\n", | 453 | "orangefs_file_mmap: called on %s\n", |
| 557 | (file ? | 454 | (file ? |
| 558 | (char *)file->f_path.dentry->d_name.name : | 455 | (char *)file->f_path.dentry->d_name.name : |
| 559 | (char *)"Unknown")); | 456 | (char *)"Unknown")); |
| 560 | 457 | ||
| 561 | if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) | ||
| 562 | return -EINVAL; | ||
| 563 | |||
| 564 | /* set the sequential readahead hint */ | 458 | /* set the sequential readahead hint */ |
| 565 | vma->vm_flags |= VM_SEQ_READ; | 459 | vma->vm_flags |= VM_SEQ_READ; |
| 566 | vma->vm_flags &= ~VM_RAND_READ; | 460 | vma->vm_flags &= ~VM_RAND_READ; |
| @@ -600,8 +494,7 @@ static int orangefs_file_release(struct inode *inode, struct file *file) | |||
| 600 | gossip_debug(GOSSIP_INODE_DEBUG, | 494 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 601 | "flush_racache finished\n"); | 495 | "flush_racache finished\n"); |
| 602 | } | 496 | } |
| 603 | truncate_inode_pages(file_inode(file)->i_mapping, | 497 | |
| 604 | 0); | ||
| 605 | } | 498 | } |
| 606 | return 0; | 499 | return 0; |
| 607 | } | 500 | } |
| @@ -619,6 +512,11 @@ static int orangefs_fsync(struct file *file, | |||
| 619 | ORANGEFS_I(file_inode(file)); | 512 | ORANGEFS_I(file_inode(file)); |
| 620 | struct orangefs_kernel_op_s *new_op = NULL; | 513 | struct orangefs_kernel_op_s *new_op = NULL; |
| 621 | 514 | ||
| 515 | ret = filemap_write_and_wait_range(file_inode(file)->i_mapping, | ||
| 516 | start, end); | ||
| 517 | if (ret < 0) | ||
| 518 | return ret; | ||
| 519 | |||
| 622 | new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); | 520 | new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); |
| 623 | if (!new_op) | 521 | if (!new_op) |
| 624 | return -ENOMEM; | 522 | return -ENOMEM; |
| @@ -656,8 +554,8 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) | |||
| 656 | * NOTE: We are only interested in file size here, | 554 | * NOTE: We are only interested in file size here, |
| 657 | * so we set mask accordingly. | 555 | * so we set mask accordingly. |
| 658 | */ | 556 | */ |
| 659 | ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1, | 557 | ret = orangefs_inode_getattr(file->f_mapping->host, |
| 660 | STATX_SIZE); | 558 | ORANGEFS_GETATTR_SIZE); |
| 661 | if (ret == -ESTALE) | 559 | if (ret == -ESTALE) |
| 662 | ret = -EIO; | 560 | ret = -EIO; |
| 663 | if (ret) { | 561 | if (ret) { |
| @@ -700,6 +598,42 @@ static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
| 700 | return rc; | 598 | return rc; |
| 701 | } | 599 | } |
| 702 | 600 | ||
| 601 | static int orangefs_file_open(struct inode * inode, struct file *file) | ||
| 602 | { | ||
| 603 | file->private_data = NULL; | ||
| 604 | return generic_file_open(inode, file); | ||
| 605 | } | ||
| 606 | |||
| 607 | static int orangefs_flush(struct file *file, fl_owner_t id) | ||
| 608 | { | ||
| 609 | /* | ||
| 610 | * This is vfs_fsync_range(file, 0, LLONG_MAX, 0) without the | ||
| 611 | * service_operation in orangefs_fsync. | ||
| 612 | * | ||
| 613 | * Do not send fsync to OrangeFS server on a close. Do send fsync | ||
| 614 | * on an explicit fsync call. This duplicates historical OrangeFS | ||
| 615 | * behavior. | ||
| 616 | */ | ||
| 617 | struct inode *inode = file->f_mapping->host; | ||
| 618 | int r; | ||
| 619 | |||
| 620 | kfree(file->private_data); | ||
| 621 | file->private_data = NULL; | ||
| 622 | |||
| 623 | if (inode->i_state & I_DIRTY_TIME) { | ||
| 624 | spin_lock(&inode->i_lock); | ||
| 625 | inode->i_state &= ~I_DIRTY_TIME; | ||
| 626 | spin_unlock(&inode->i_lock); | ||
| 627 | mark_inode_dirty_sync(inode); | ||
| 628 | } | ||
| 629 | |||
| 630 | r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX); | ||
| 631 | if (r > 0) | ||
| 632 | return 0; | ||
| 633 | else | ||
| 634 | return r; | ||
| 635 | } | ||
| 636 | |||
| 703 | /** ORANGEFS implementation of VFS file operations */ | 637 | /** ORANGEFS implementation of VFS file operations */ |
| 704 | const struct file_operations orangefs_file_operations = { | 638 | const struct file_operations orangefs_file_operations = { |
| 705 | .llseek = orangefs_file_llseek, | 639 | .llseek = orangefs_file_llseek, |
| @@ -708,7 +642,8 @@ const struct file_operations orangefs_file_operations = { | |||
| 708 | .lock = orangefs_lock, | 642 | .lock = orangefs_lock, |
| 709 | .unlocked_ioctl = orangefs_ioctl, | 643 | .unlocked_ioctl = orangefs_ioctl, |
| 710 | .mmap = orangefs_file_mmap, | 644 | .mmap = orangefs_file_mmap, |
| 711 | .open = generic_file_open, | 645 | .open = orangefs_file_open, |
| 646 | .flush = orangefs_flush, | ||
| 712 | .release = orangefs_file_release, | 647 | .release = orangefs_file_release, |
| 713 | .fsync = orangefs_fsync, | 648 | .fsync = orangefs_fsync, |
| 714 | }; | 649 | }; |
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index c3334eca18c7..0c337d8bdaab 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* | 2 | /* |
| 3 | * (C) 2001 Clemson University and The University of Chicago | 3 | * (C) 2001 Clemson University and The University of Chicago |
| 4 | * Copyright 2018 Omnibond Systems, L.L.C. | ||
| 4 | * | 5 | * |
| 5 | * See COPYING in top-level directory. | 6 | * See COPYING in top-level directory. |
| 6 | */ | 7 | */ |
| @@ -14,40 +15,312 @@ | |||
| 14 | #include "orangefs-kernel.h" | 15 | #include "orangefs-kernel.h" |
| 15 | #include "orangefs-bufmap.h" | 16 | #include "orangefs-bufmap.h" |
| 16 | 17 | ||
| 17 | static int read_one_page(struct page *page) | 18 | static int orangefs_writepage_locked(struct page *page, |
| 19 | struct writeback_control *wbc) | ||
| 18 | { | 20 | { |
| 19 | int ret; | ||
| 20 | int max_block; | ||
| 21 | ssize_t bytes_read = 0; | ||
| 22 | struct inode *inode = page->mapping->host; | 21 | struct inode *inode = page->mapping->host; |
| 23 | const __u32 blocksize = PAGE_SIZE; | 22 | struct orangefs_write_range *wr = NULL; |
| 24 | const __u32 blockbits = PAGE_SHIFT; | 23 | struct iov_iter iter; |
| 25 | struct iov_iter to; | 24 | struct bio_vec bv; |
| 26 | struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE}; | 25 | size_t len, wlen; |
| 26 | ssize_t ret; | ||
| 27 | loff_t off; | ||
| 28 | |||
| 29 | set_page_writeback(page); | ||
| 30 | |||
| 31 | len = i_size_read(inode); | ||
| 32 | if (PagePrivate(page)) { | ||
| 33 | wr = (struct orangefs_write_range *)page_private(page); | ||
| 34 | WARN_ON(wr->pos >= len); | ||
| 35 | off = wr->pos; | ||
| 36 | if (off + wr->len > len) | ||
| 37 | wlen = len - off; | ||
| 38 | else | ||
| 39 | wlen = wr->len; | ||
| 40 | } else { | ||
| 41 | WARN_ON(1); | ||
| 42 | off = page_offset(page); | ||
| 43 | if (off + PAGE_SIZE > len) | ||
| 44 | wlen = len - off; | ||
| 45 | else | ||
| 46 | wlen = PAGE_SIZE; | ||
| 47 | } | ||
| 48 | /* Should've been handled in orangefs_invalidatepage. */ | ||
| 49 | WARN_ON(off == len || off + wlen > len); | ||
| 50 | |||
| 51 | bv.bv_page = page; | ||
| 52 | bv.bv_len = wlen; | ||
| 53 | bv.bv_offset = off % PAGE_SIZE; | ||
| 54 | WARN_ON(wlen == 0); | ||
| 55 | iov_iter_bvec(&iter, WRITE, &bv, 1, wlen); | ||
| 56 | |||
| 57 | ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen, | ||
| 58 | len, wr, NULL); | ||
| 59 | if (ret < 0) { | ||
| 60 | SetPageError(page); | ||
| 61 | mapping_set_error(page->mapping, ret); | ||
| 62 | } else { | ||
| 63 | ret = 0; | ||
| 64 | } | ||
| 65 | if (wr) { | ||
| 66 | kfree(wr); | ||
| 67 | set_page_private(page, 0); | ||
| 68 | ClearPagePrivate(page); | ||
| 69 | put_page(page); | ||
| 70 | } | ||
| 71 | return ret; | ||
| 72 | } | ||
| 73 | |||
| 74 | static int orangefs_writepage(struct page *page, struct writeback_control *wbc) | ||
| 75 | { | ||
| 76 | int ret; | ||
| 77 | ret = orangefs_writepage_locked(page, wbc); | ||
| 78 | unlock_page(page); | ||
| 79 | end_page_writeback(page); | ||
| 80 | return ret; | ||
| 81 | } | ||
| 27 | 82 | ||
| 28 | iov_iter_bvec(&to, READ, &bv, 1, PAGE_SIZE); | 83 | struct orangefs_writepages { |
| 84 | loff_t off; | ||
| 85 | size_t len; | ||
| 86 | kuid_t uid; | ||
| 87 | kgid_t gid; | ||
| 88 | int maxpages; | ||
| 89 | int npages; | ||
| 90 | struct page **pages; | ||
| 91 | struct bio_vec *bv; | ||
| 92 | }; | ||
| 29 | 93 | ||
| 30 | gossip_debug(GOSSIP_INODE_DEBUG, | 94 | static int orangefs_writepages_work(struct orangefs_writepages *ow, |
| 31 | "orangefs_readpage called with page %p\n", | 95 | struct writeback_control *wbc) |
| 32 | page); | 96 | { |
| 97 | struct inode *inode = ow->pages[0]->mapping->host; | ||
| 98 | struct orangefs_write_range *wrp, wr; | ||
| 99 | struct iov_iter iter; | ||
| 100 | ssize_t ret; | ||
| 101 | size_t len; | ||
| 102 | loff_t off; | ||
| 103 | int i; | ||
| 104 | |||
| 105 | len = i_size_read(inode); | ||
| 106 | |||
| 107 | for (i = 0; i < ow->npages; i++) { | ||
| 108 | set_page_writeback(ow->pages[i]); | ||
| 109 | ow->bv[i].bv_page = ow->pages[i]; | ||
| 110 | ow->bv[i].bv_len = min(page_offset(ow->pages[i]) + PAGE_SIZE, | ||
| 111 | ow->off + ow->len) - | ||
| 112 | max(ow->off, page_offset(ow->pages[i])); | ||
| 113 | if (i == 0) | ||
| 114 | ow->bv[i].bv_offset = ow->off - | ||
| 115 | page_offset(ow->pages[i]); | ||
| 116 | else | ||
| 117 | ow->bv[i].bv_offset = 0; | ||
| 118 | } | ||
| 119 | iov_iter_bvec(&iter, WRITE, ow->bv, ow->npages, ow->len); | ||
| 120 | |||
| 121 | WARN_ON(ow->off >= len); | ||
| 122 | if (ow->off + ow->len > len) | ||
| 123 | ow->len = len - ow->off; | ||
| 124 | |||
| 125 | off = ow->off; | ||
| 126 | wr.uid = ow->uid; | ||
| 127 | wr.gid = ow->gid; | ||
| 128 | ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, ow->len, | ||
| 129 | 0, &wr, NULL); | ||
| 130 | if (ret < 0) { | ||
| 131 | for (i = 0; i < ow->npages; i++) { | ||
| 132 | SetPageError(ow->pages[i]); | ||
| 133 | mapping_set_error(ow->pages[i]->mapping, ret); | ||
| 134 | if (PagePrivate(ow->pages[i])) { | ||
| 135 | wrp = (struct orangefs_write_range *) | ||
| 136 | page_private(ow->pages[i]); | ||
| 137 | ClearPagePrivate(ow->pages[i]); | ||
| 138 | put_page(ow->pages[i]); | ||
| 139 | kfree(wrp); | ||
| 140 | } | ||
| 141 | end_page_writeback(ow->pages[i]); | ||
| 142 | unlock_page(ow->pages[i]); | ||
| 143 | } | ||
| 144 | } else { | ||
| 145 | ret = 0; | ||
| 146 | for (i = 0; i < ow->npages; i++) { | ||
| 147 | if (PagePrivate(ow->pages[i])) { | ||
| 148 | wrp = (struct orangefs_write_range *) | ||
| 149 | page_private(ow->pages[i]); | ||
| 150 | ClearPagePrivate(ow->pages[i]); | ||
| 151 | put_page(ow->pages[i]); | ||
| 152 | kfree(wrp); | ||
| 153 | } | ||
| 154 | end_page_writeback(ow->pages[i]); | ||
| 155 | unlock_page(ow->pages[i]); | ||
| 156 | } | ||
| 157 | } | ||
| 158 | return ret; | ||
| 159 | } | ||
| 160 | |||
| 161 | static int orangefs_writepages_callback(struct page *page, | ||
| 162 | struct writeback_control *wbc, void *data) | ||
| 163 | { | ||
| 164 | struct orangefs_writepages *ow = data; | ||
| 165 | struct orangefs_write_range *wr; | ||
| 166 | int ret; | ||
| 167 | |||
| 168 | if (!PagePrivate(page)) { | ||
| 169 | unlock_page(page); | ||
| 170 | /* It's not private so there's nothing to write, right? */ | ||
| 171 | printk("writepages_callback not private!\n"); | ||
| 172 | BUG(); | ||
| 173 | return 0; | ||
| 174 | } | ||
| 175 | wr = (struct orangefs_write_range *)page_private(page); | ||
| 176 | |||
| 177 | ret = -1; | ||
| 178 | if (ow->npages == 0) { | ||
| 179 | ow->off = wr->pos; | ||
| 180 | ow->len = wr->len; | ||
| 181 | ow->uid = wr->uid; | ||
| 182 | ow->gid = wr->gid; | ||
| 183 | ow->pages[ow->npages++] = page; | ||
| 184 | ret = 0; | ||
| 185 | goto done; | ||
| 186 | } | ||
| 187 | if (!uid_eq(ow->uid, wr->uid) || !gid_eq(ow->gid, wr->gid)) { | ||
| 188 | orangefs_writepages_work(ow, wbc); | ||
| 189 | ow->npages = 0; | ||
| 190 | ret = -1; | ||
| 191 | goto done; | ||
| 192 | } | ||
| 193 | if (ow->off + ow->len == wr->pos) { | ||
| 194 | ow->len += wr->len; | ||
| 195 | ow->pages[ow->npages++] = page; | ||
| 196 | ret = 0; | ||
| 197 | goto done; | ||
| 198 | } | ||
| 199 | done: | ||
| 200 | if (ret == -1) { | ||
| 201 | if (ow->npages) { | ||
| 202 | orangefs_writepages_work(ow, wbc); | ||
| 203 | ow->npages = 0; | ||
| 204 | } | ||
| 205 | ret = orangefs_writepage_locked(page, wbc); | ||
| 206 | mapping_set_error(page->mapping, ret); | ||
| 207 | unlock_page(page); | ||
| 208 | end_page_writeback(page); | ||
| 209 | } else { | ||
| 210 | if (ow->npages == ow->maxpages) { | ||
| 211 | orangefs_writepages_work(ow, wbc); | ||
| 212 | ow->npages = 0; | ||
| 213 | } | ||
| 214 | } | ||
| 215 | return ret; | ||
| 216 | } | ||
| 217 | |||
| 218 | static int orangefs_writepages(struct address_space *mapping, | ||
| 219 | struct writeback_control *wbc) | ||
| 220 | { | ||
| 221 | struct orangefs_writepages *ow; | ||
| 222 | struct blk_plug plug; | ||
| 223 | int ret; | ||
| 224 | ow = kzalloc(sizeof(struct orangefs_writepages), GFP_KERNEL); | ||
| 225 | if (!ow) | ||
| 226 | return -ENOMEM; | ||
| 227 | ow->maxpages = orangefs_bufmap_size_query()/PAGE_SIZE; | ||
| 228 | ow->pages = kcalloc(ow->maxpages, sizeof(struct page *), GFP_KERNEL); | ||
| 229 | if (!ow->pages) { | ||
| 230 | kfree(ow); | ||
| 231 | return -ENOMEM; | ||
| 232 | } | ||
| 233 | ow->bv = kcalloc(ow->maxpages, sizeof(struct bio_vec), GFP_KERNEL); | ||
| 234 | if (!ow->bv) { | ||
| 235 | kfree(ow->pages); | ||
| 236 | kfree(ow); | ||
| 237 | return -ENOMEM; | ||
| 238 | } | ||
| 239 | blk_start_plug(&plug); | ||
| 240 | ret = write_cache_pages(mapping, wbc, orangefs_writepages_callback, ow); | ||
| 241 | if (ow->npages) | ||
| 242 | ret = orangefs_writepages_work(ow, wbc); | ||
| 243 | blk_finish_plug(&plug); | ||
| 244 | kfree(ow->pages); | ||
| 245 | kfree(ow->bv); | ||
| 246 | kfree(ow); | ||
| 247 | return ret; | ||
| 248 | } | ||
| 33 | 249 | ||
| 34 | max_block = ((inode->i_size / blocksize) + 1); | 250 | static int orangefs_launder_page(struct page *); |
| 35 | 251 | ||
| 36 | if (page->index < max_block) { | 252 | static int orangefs_readpage(struct file *file, struct page *page) |
| 37 | loff_t blockptr_offset = (((loff_t) page->index) << blockbits); | 253 | { |
| 254 | struct inode *inode = page->mapping->host; | ||
| 255 | struct iov_iter iter; | ||
| 256 | struct bio_vec bv; | ||
| 257 | ssize_t ret; | ||
| 258 | loff_t off; /* offset into this page */ | ||
| 259 | pgoff_t index; /* which page */ | ||
| 260 | struct page *next_page; | ||
| 261 | char *kaddr; | ||
| 262 | struct orangefs_read_options *ro = file->private_data; | ||
| 263 | loff_t read_size; | ||
| 264 | loff_t roundedup; | ||
| 265 | int buffer_index = -1; /* orangefs shared memory slot */ | ||
| 266 | int slot_index; /* index into slot */ | ||
| 267 | int remaining; | ||
| 38 | 268 | ||
| 39 | bytes_read = orangefs_inode_read(inode, | 269 | /* |
| 40 | &to, | 270 | * If they set some miniscule size for "count" in read(2) |
| 41 | &blockptr_offset, | 271 | * (for example) then let's try to read a page, or the whole file |
| 42 | inode->i_size); | 272 | * if it is smaller than a page. Once "count" goes over a page |
| 273 | * then lets round up to the highest page size multiple that is | ||
| 274 | * less than or equal to "count" and do that much orangefs IO and | ||
| 275 | * try to fill as many pages as we can from it. | ||
| 276 | * | ||
| 277 | * "count" should be represented in ro->blksiz. | ||
| 278 | * | ||
| 279 | * inode->i_size = file size. | ||
| 280 | */ | ||
| 281 | if (ro) { | ||
| 282 | if (ro->blksiz < PAGE_SIZE) { | ||
| 283 | if (inode->i_size < PAGE_SIZE) | ||
| 284 | read_size = inode->i_size; | ||
| 285 | else | ||
| 286 | read_size = PAGE_SIZE; | ||
| 287 | } else { | ||
| 288 | roundedup = ((PAGE_SIZE - 1) & ro->blksiz) ? | ||
| 289 | ((ro->blksiz + PAGE_SIZE) & ~(PAGE_SIZE -1)) : | ||
| 290 | ro->blksiz; | ||
| 291 | if (roundedup > inode->i_size) | ||
| 292 | read_size = inode->i_size; | ||
| 293 | else | ||
| 294 | read_size = roundedup; | ||
| 295 | |||
| 296 | } | ||
| 297 | } else { | ||
| 298 | read_size = PAGE_SIZE; | ||
| 43 | } | 299 | } |
| 300 | if (!read_size) | ||
| 301 | read_size = PAGE_SIZE; | ||
| 302 | |||
| 303 | if (PageDirty(page)) | ||
| 304 | orangefs_launder_page(page); | ||
| 305 | |||
| 306 | off = page_offset(page); | ||
| 307 | index = off >> PAGE_SHIFT; | ||
| 308 | bv.bv_page = page; | ||
| 309 | bv.bv_len = PAGE_SIZE; | ||
| 310 | bv.bv_offset = 0; | ||
| 311 | iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE); | ||
| 312 | |||
| 313 | ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter, | ||
| 314 | read_size, inode->i_size, NULL, &buffer_index); | ||
| 315 | remaining = ret; | ||
| 44 | /* this will only zero remaining unread portions of the page data */ | 316 | /* this will only zero remaining unread portions of the page data */ |
| 45 | iov_iter_zero(~0U, &to); | 317 | iov_iter_zero(~0U, &iter); |
| 46 | /* takes care of potential aliasing */ | 318 | /* takes care of potential aliasing */ |
| 47 | flush_dcache_page(page); | 319 | flush_dcache_page(page); |
| 48 | if (bytes_read < 0) { | 320 | if (ret < 0) { |
| 49 | ret = bytes_read; | ||
| 50 | SetPageError(page); | 321 | SetPageError(page); |
| 322 | unlock_page(page); | ||
| 323 | goto out; | ||
| 51 | } else { | 324 | } else { |
| 52 | SetPageUptodate(page); | 325 | SetPageUptodate(page); |
| 53 | if (PageError(page)) | 326 | if (PageError(page)) |
| @@ -56,96 +329,469 @@ static int read_one_page(struct page *page) | |||
| 56 | } | 329 | } |
| 57 | /* unlock the page after the ->readpage() routine completes */ | 330 | /* unlock the page after the ->readpage() routine completes */ |
| 58 | unlock_page(page); | 331 | unlock_page(page); |
| 332 | |||
| 333 | if (remaining > PAGE_SIZE) { | ||
| 334 | slot_index = 0; | ||
| 335 | while ((remaining - PAGE_SIZE) >= PAGE_SIZE) { | ||
| 336 | remaining -= PAGE_SIZE; | ||
| 337 | /* | ||
| 338 | * It is an optimization to try and fill more than one | ||
| 339 | * page... by now we've already gotten the single | ||
| 340 | * page we were after, if stuff doesn't seem to | ||
| 341 | * be going our way at this point just return | ||
| 342 | * and hope for the best. | ||
| 343 | * | ||
| 344 | * If we look for pages and they're already there is | ||
| 345 | * one reason to give up, and if they're not there | ||
| 346 | * and we can't create them is another reason. | ||
| 347 | */ | ||
| 348 | |||
| 349 | index++; | ||
| 350 | slot_index++; | ||
| 351 | next_page = find_get_page(inode->i_mapping, index); | ||
| 352 | if (next_page) { | ||
| 353 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 354 | "%s: found next page, quitting\n", | ||
| 355 | __func__); | ||
| 356 | put_page(next_page); | ||
| 357 | goto out; | ||
| 358 | } | ||
| 359 | next_page = find_or_create_page(inode->i_mapping, | ||
| 360 | index, | ||
| 361 | GFP_KERNEL); | ||
| 362 | /* | ||
| 363 | * I've never hit this, leave it as a printk for | ||
| 364 | * now so it will be obvious. | ||
| 365 | */ | ||
| 366 | if (!next_page) { | ||
| 367 | printk("%s: can't create next page, quitting\n", | ||
| 368 | __func__); | ||
| 369 | goto out; | ||
| 370 | } | ||
| 371 | kaddr = kmap_atomic(next_page); | ||
| 372 | orangefs_bufmap_page_fill(kaddr, | ||
| 373 | buffer_index, | ||
| 374 | slot_index); | ||
| 375 | kunmap_atomic(kaddr); | ||
| 376 | SetPageUptodate(next_page); | ||
| 377 | unlock_page(next_page); | ||
| 378 | put_page(next_page); | ||
| 379 | } | ||
| 380 | } | ||
| 381 | |||
| 382 | out: | ||
| 383 | if (buffer_index != -1) | ||
| 384 | orangefs_bufmap_put(buffer_index); | ||
| 59 | return ret; | 385 | return ret; |
| 60 | } | 386 | } |
| 61 | 387 | ||
| 62 | static int orangefs_readpage(struct file *file, struct page *page) | 388 | static int orangefs_write_begin(struct file *file, |
| 389 | struct address_space *mapping, | ||
| 390 | loff_t pos, unsigned len, unsigned flags, struct page **pagep, | ||
| 391 | void **fsdata) | ||
| 63 | { | 392 | { |
| 64 | return read_one_page(page); | 393 | struct orangefs_write_range *wr; |
| 394 | struct page *page; | ||
| 395 | pgoff_t index; | ||
| 396 | int ret; | ||
| 397 | |||
| 398 | index = pos >> PAGE_SHIFT; | ||
| 399 | |||
| 400 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
| 401 | if (!page) | ||
| 402 | return -ENOMEM; | ||
| 403 | |||
| 404 | *pagep = page; | ||
| 405 | |||
| 406 | if (PageDirty(page) && !PagePrivate(page)) { | ||
| 407 | /* | ||
| 408 | * Should be impossible. If it happens, launder the page | ||
| 409 | * since we don't know what's dirty. This will WARN in | ||
| 410 | * orangefs_writepage_locked. | ||
| 411 | */ | ||
| 412 | ret = orangefs_launder_page(page); | ||
| 413 | if (ret) | ||
| 414 | return ret; | ||
| 415 | } | ||
| 416 | if (PagePrivate(page)) { | ||
| 417 | struct orangefs_write_range *wr; | ||
| 418 | wr = (struct orangefs_write_range *)page_private(page); | ||
| 419 | if (wr->pos + wr->len == pos && | ||
| 420 | uid_eq(wr->uid, current_fsuid()) && | ||
| 421 | gid_eq(wr->gid, current_fsgid())) { | ||
| 422 | wr->len += len; | ||
| 423 | goto okay; | ||
| 424 | } else { | ||
| 425 | ret = orangefs_launder_page(page); | ||
| 426 | if (ret) | ||
| 427 | return ret; | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | wr = kmalloc(sizeof *wr, GFP_KERNEL); | ||
| 432 | if (!wr) | ||
| 433 | return -ENOMEM; | ||
| 434 | |||
| 435 | wr->pos = pos; | ||
| 436 | wr->len = len; | ||
| 437 | wr->uid = current_fsuid(); | ||
| 438 | wr->gid = current_fsgid(); | ||
| 439 | SetPagePrivate(page); | ||
| 440 | set_page_private(page, (unsigned long)wr); | ||
| 441 | get_page(page); | ||
| 442 | okay: | ||
| 443 | return 0; | ||
| 65 | } | 444 | } |
| 66 | 445 | ||
| 67 | static int orangefs_readpages(struct file *file, | 446 | static int orangefs_write_end(struct file *file, struct address_space *mapping, |
| 68 | struct address_space *mapping, | 447 | loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) |
| 69 | struct list_head *pages, | ||
| 70 | unsigned nr_pages) | ||
| 71 | { | 448 | { |
| 72 | int page_idx; | 449 | struct inode *inode = page->mapping->host; |
| 73 | int ret; | 450 | loff_t last_pos = pos + copied; |
| 74 | 451 | ||
| 75 | gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpages called\n"); | 452 | /* |
| 76 | 453 | * No need to use i_size_read() here, the i_size | |
| 77 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 454 | * cannot change under us because we hold the i_mutex. |
| 78 | struct page *page; | 455 | */ |
| 79 | 456 | if (last_pos > inode->i_size) | |
| 80 | page = lru_to_page(pages); | 457 | i_size_write(inode, last_pos); |
| 81 | list_del(&page->lru); | 458 | |
| 82 | if (!add_to_page_cache(page, | 459 | /* zero the stale part of the page if we did a short copy */ |
| 83 | mapping, | 460 | if (!PageUptodate(page)) { |
| 84 | page->index, | 461 | unsigned from = pos & (PAGE_SIZE - 1); |
| 85 | readahead_gfp_mask(mapping))) { | 462 | if (copied < len) { |
| 86 | ret = read_one_page(page); | 463 | zero_user(page, from + copied, len - copied); |
| 87 | gossip_debug(GOSSIP_INODE_DEBUG, | 464 | } |
| 88 | "failure adding page to cache, read_one_page returned: %d\n", | 465 | /* Set fully written pages uptodate. */ |
| 89 | ret); | 466 | if (pos == page_offset(page) && |
| 90 | } else { | 467 | (len == PAGE_SIZE || pos + len == inode->i_size)) { |
| 91 | put_page(page); | 468 | zero_user_segment(page, from + copied, PAGE_SIZE); |
| 92 | } | 469 | SetPageUptodate(page); |
| 93 | } | 470 | } |
| 94 | BUG_ON(!list_empty(pages)); | 471 | } |
| 95 | return 0; | 472 | |
| 473 | set_page_dirty(page); | ||
| 474 | unlock_page(page); | ||
| 475 | put_page(page); | ||
| 476 | |||
| 477 | mark_inode_dirty_sync(file_inode(file)); | ||
| 478 | return copied; | ||
| 96 | } | 479 | } |
| 97 | 480 | ||
| 98 | static void orangefs_invalidatepage(struct page *page, | 481 | static void orangefs_invalidatepage(struct page *page, |
| 99 | unsigned int offset, | 482 | unsigned int offset, |
| 100 | unsigned int length) | 483 | unsigned int length) |
| 101 | { | 484 | { |
| 102 | gossip_debug(GOSSIP_INODE_DEBUG, | 485 | struct orangefs_write_range *wr; |
| 103 | "orangefs_invalidatepage called on page %p " | 486 | wr = (struct orangefs_write_range *)page_private(page); |
| 104 | "(offset is %u)\n", | 487 | |
| 105 | page, | 488 | if (offset == 0 && length == PAGE_SIZE) { |
| 106 | offset); | 489 | kfree((struct orangefs_write_range *)page_private(page)); |
| 490 | set_page_private(page, 0); | ||
| 491 | ClearPagePrivate(page); | ||
| 492 | put_page(page); | ||
| 493 | return; | ||
| 494 | /* write range entirely within invalidate range (or equal) */ | ||
| 495 | } else if (page_offset(page) + offset <= wr->pos && | ||
| 496 | wr->pos + wr->len <= page_offset(page) + offset + length) { | ||
| 497 | kfree((struct orangefs_write_range *)page_private(page)); | ||
| 498 | set_page_private(page, 0); | ||
| 499 | ClearPagePrivate(page); | ||
| 500 | put_page(page); | ||
| 501 | /* XXX is this right? only caller in fs */ | ||
| 502 | cancel_dirty_page(page); | ||
| 503 | return; | ||
| 504 | /* invalidate range chops off end of write range */ | ||
| 505 | } else if (wr->pos < page_offset(page) + offset && | ||
| 506 | wr->pos + wr->len <= page_offset(page) + offset + length && | ||
| 507 | page_offset(page) + offset < wr->pos + wr->len) { | ||
| 508 | size_t x; | ||
| 509 | x = wr->pos + wr->len - (page_offset(page) + offset); | ||
| 510 | WARN_ON(x > wr->len); | ||
| 511 | wr->len -= x; | ||
| 512 | wr->uid = current_fsuid(); | ||
| 513 | wr->gid = current_fsgid(); | ||
| 514 | /* invalidate range chops off beginning of write range */ | ||
| 515 | } else if (page_offset(page) + offset <= wr->pos && | ||
| 516 | page_offset(page) + offset + length < wr->pos + wr->len && | ||
| 517 | wr->pos < page_offset(page) + offset + length) { | ||
| 518 | size_t x; | ||
| 519 | x = page_offset(page) + offset + length - wr->pos; | ||
| 520 | WARN_ON(x > wr->len); | ||
| 521 | wr->pos += x; | ||
| 522 | wr->len -= x; | ||
| 523 | wr->uid = current_fsuid(); | ||
| 524 | wr->gid = current_fsgid(); | ||
| 525 | /* invalidate range entirely within write range (punch hole) */ | ||
| 526 | } else if (wr->pos < page_offset(page) + offset && | ||
| 527 | page_offset(page) + offset + length < wr->pos + wr->len) { | ||
| 528 | /* XXX what do we do here... should not WARN_ON */ | ||
| 529 | WARN_ON(1); | ||
| 530 | /* punch hole */ | ||
| 531 | /* | ||
| 532 | * should we just ignore this and write it out anyway? | ||
| 533 | * it hardly makes sense | ||
| 534 | */ | ||
| 535 | return; | ||
| 536 | /* non-overlapping ranges */ | ||
| 537 | } else { | ||
| 538 | /* WARN if they do overlap */ | ||
| 539 | if (!((page_offset(page) + offset + length <= wr->pos) ^ | ||
| 540 | (wr->pos + wr->len <= page_offset(page) + offset))) { | ||
| 541 | WARN_ON(1); | ||
| 542 | printk("invalidate range offset %llu length %u\n", | ||
| 543 | page_offset(page) + offset, length); | ||
| 544 | printk("write range offset %llu length %zu\n", | ||
| 545 | wr->pos, wr->len); | ||
| 546 | } | ||
| 547 | return; | ||
| 548 | } | ||
| 107 | 549 | ||
| 108 | ClearPageUptodate(page); | 550 | /* |
| 109 | ClearPageMappedToDisk(page); | 551 | * Above there are returns where wr is freed or where we WARN. |
| 110 | return; | 552 | * Thus the following runs if wr was modified above. |
| 553 | */ | ||
| 111 | 554 | ||
| 555 | orangefs_launder_page(page); | ||
| 112 | } | 556 | } |
| 113 | 557 | ||
| 114 | static int orangefs_releasepage(struct page *page, gfp_t foo) | 558 | static int orangefs_releasepage(struct page *page, gfp_t foo) |
| 115 | { | 559 | { |
| 116 | gossip_debug(GOSSIP_INODE_DEBUG, | 560 | return !PagePrivate(page); |
| 117 | "orangefs_releasepage called on page %p\n", | ||
| 118 | page); | ||
| 119 | return 0; | ||
| 120 | } | 561 | } |
| 121 | 562 | ||
| 122 | /* | 563 | static void orangefs_freepage(struct page *page) |
| 123 | * Having a direct_IO entry point in the address_space_operations | 564 | { |
| 124 | * struct causes the kernel to allows us to use O_DIRECT on | 565 | if (PagePrivate(page)) { |
| 125 | * open. Nothing will ever call this thing, but in the future we | 566 | kfree((struct orangefs_write_range *)page_private(page)); |
| 126 | * will need to be able to use O_DIRECT on open in order to support | 567 | set_page_private(page, 0); |
| 127 | * AIO. Modeled after NFS, they do this too. | 568 | ClearPagePrivate(page); |
| 128 | */ | 569 | put_page(page); |
| 570 | } | ||
| 571 | } | ||
| 572 | |||
| 573 | static int orangefs_launder_page(struct page *page) | ||
| 574 | { | ||
| 575 | int r = 0; | ||
| 576 | struct writeback_control wbc = { | ||
| 577 | .sync_mode = WB_SYNC_ALL, | ||
| 578 | .nr_to_write = 0, | ||
| 579 | }; | ||
| 580 | wait_on_page_writeback(page); | ||
| 581 | if (clear_page_dirty_for_io(page)) { | ||
| 582 | r = orangefs_writepage_locked(page, &wbc); | ||
| 583 | end_page_writeback(page); | ||
| 584 | } | ||
| 585 | return r; | ||
| 586 | } | ||
| 129 | 587 | ||
| 130 | static ssize_t orangefs_direct_IO(struct kiocb *iocb, | 588 | static ssize_t orangefs_direct_IO(struct kiocb *iocb, |
| 131 | struct iov_iter *iter) | 589 | struct iov_iter *iter) |
| 132 | { | 590 | { |
| 133 | gossip_debug(GOSSIP_INODE_DEBUG, | 591 | /* |
| 134 | "orangefs_direct_IO: %pD\n", | 592 | * Comment from original do_readv_writev: |
| 135 | iocb->ki_filp); | 593 | * Common entry point for read/write/readv/writev |
| 594 | * This function will dispatch it to either the direct I/O | ||
| 595 | * or buffered I/O path depending on the mount options and/or | ||
| 596 | * augmented/extended metadata attached to the file. | ||
| 597 | * Note: File extended attributes override any mount options. | ||
| 598 | */ | ||
| 599 | struct file *file = iocb->ki_filp; | ||
| 600 | loff_t pos = iocb->ki_pos; | ||
| 601 | enum ORANGEFS_io_type type = iov_iter_rw(iter) == WRITE ? | ||
| 602 | ORANGEFS_IO_WRITE : ORANGEFS_IO_READ; | ||
| 603 | loff_t *offset = &pos; | ||
| 604 | struct inode *inode = file->f_mapping->host; | ||
| 605 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | ||
| 606 | struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; | ||
| 607 | size_t count = iov_iter_count(iter); | ||
| 608 | ssize_t total_count = 0; | ||
| 609 | ssize_t ret = -EINVAL; | ||
| 610 | int i = 0; | ||
| 611 | |||
| 612 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 613 | "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", | ||
| 614 | __func__, | ||
| 615 | handle, | ||
| 616 | (int)count); | ||
| 617 | |||
| 618 | if (type == ORANGEFS_IO_WRITE) { | ||
| 619 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 620 | "%s(%pU): proceeding with offset : %llu, " | ||
| 621 | "size %d\n", | ||
| 622 | __func__, | ||
| 623 | handle, | ||
| 624 | llu(*offset), | ||
| 625 | (int)count); | ||
| 626 | } | ||
| 627 | |||
| 628 | if (count == 0) { | ||
| 629 | ret = 0; | ||
| 630 | goto out; | ||
| 631 | } | ||
| 136 | 632 | ||
| 137 | return -EINVAL; | 633 | while (iov_iter_count(iter)) { |
| 634 | size_t each_count = iov_iter_count(iter); | ||
| 635 | size_t amt_complete; | ||
| 636 | i++; | ||
| 637 | |||
| 638 | /* how much to transfer in this loop iteration */ | ||
| 639 | if (each_count > orangefs_bufmap_size_query()) | ||
| 640 | each_count = orangefs_bufmap_size_query(); | ||
| 641 | |||
| 642 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 643 | "%s(%pU): size of each_count(%d)\n", | ||
| 644 | __func__, | ||
| 645 | handle, | ||
| 646 | (int)each_count); | ||
| 647 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 648 | "%s(%pU): BEFORE wait_for_io: offset is %d\n", | ||
| 649 | __func__, | ||
| 650 | handle, | ||
| 651 | (int)*offset); | ||
| 652 | |||
| 653 | ret = wait_for_direct_io(type, inode, offset, iter, | ||
| 654 | each_count, 0, NULL, NULL); | ||
| 655 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 656 | "%s(%pU): return from wait_for_io:%d\n", | ||
| 657 | __func__, | ||
| 658 | handle, | ||
| 659 | (int)ret); | ||
| 660 | |||
| 661 | if (ret < 0) | ||
| 662 | goto out; | ||
| 663 | |||
| 664 | *offset += ret; | ||
| 665 | total_count += ret; | ||
| 666 | amt_complete = ret; | ||
| 667 | |||
| 668 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 669 | "%s(%pU): AFTER wait_for_io: offset is %d\n", | ||
| 670 | __func__, | ||
| 671 | handle, | ||
| 672 | (int)*offset); | ||
| 673 | |||
| 674 | /* | ||
| 675 | * if we got a short I/O operations, | ||
| 676 | * fall out and return what we got so far | ||
| 677 | */ | ||
| 678 | if (amt_complete < each_count) | ||
| 679 | break; | ||
| 680 | } /*end while */ | ||
| 681 | |||
| 682 | out: | ||
| 683 | if (total_count > 0) | ||
| 684 | ret = total_count; | ||
| 685 | if (ret > 0) { | ||
| 686 | if (type == ORANGEFS_IO_READ) { | ||
| 687 | file_accessed(file); | ||
| 688 | } else { | ||
| 689 | file_update_time(file); | ||
| 690 | if (*offset > i_size_read(inode)) | ||
| 691 | i_size_write(inode, *offset); | ||
| 692 | } | ||
| 693 | } | ||
| 694 | |||
| 695 | gossip_debug(GOSSIP_FILE_DEBUG, | ||
| 696 | "%s(%pU): Value(%d) returned.\n", | ||
| 697 | __func__, | ||
| 698 | handle, | ||
| 699 | (int)ret); | ||
| 700 | |||
| 701 | return ret; | ||
| 138 | } | 702 | } |
| 139 | 703 | ||
| 140 | /** ORANGEFS2 implementation of address space operations */ | 704 | /** ORANGEFS2 implementation of address space operations */ |
| 141 | static const struct address_space_operations orangefs_address_operations = { | 705 | static const struct address_space_operations orangefs_address_operations = { |
| 706 | .writepage = orangefs_writepage, | ||
| 142 | .readpage = orangefs_readpage, | 707 | .readpage = orangefs_readpage, |
| 143 | .readpages = orangefs_readpages, | 708 | .writepages = orangefs_writepages, |
| 709 | .set_page_dirty = __set_page_dirty_nobuffers, | ||
| 710 | .write_begin = orangefs_write_begin, | ||
| 711 | .write_end = orangefs_write_end, | ||
| 144 | .invalidatepage = orangefs_invalidatepage, | 712 | .invalidatepage = orangefs_invalidatepage, |
| 145 | .releasepage = orangefs_releasepage, | 713 | .releasepage = orangefs_releasepage, |
| 714 | .freepage = orangefs_freepage, | ||
| 715 | .launder_page = orangefs_launder_page, | ||
| 146 | .direct_IO = orangefs_direct_IO, | 716 | .direct_IO = orangefs_direct_IO, |
| 147 | }; | 717 | }; |
| 148 | 718 | ||
| 719 | vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf) | ||
| 720 | { | ||
| 721 | struct page *page = vmf->page; | ||
| 722 | struct inode *inode = file_inode(vmf->vma->vm_file); | ||
| 723 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | ||
| 724 | unsigned long *bitlock = &orangefs_inode->bitlock; | ||
| 725 | vm_fault_t ret; | ||
| 726 | struct orangefs_write_range *wr; | ||
| 727 | |||
| 728 | sb_start_pagefault(inode->i_sb); | ||
| 729 | |||
| 730 | if (wait_on_bit(bitlock, 1, TASK_KILLABLE)) { | ||
| 731 | ret = VM_FAULT_RETRY; | ||
| 732 | goto out; | ||
| 733 | } | ||
| 734 | |||
| 735 | lock_page(page); | ||
| 736 | if (PageDirty(page) && !PagePrivate(page)) { | ||
| 737 | /* | ||
| 738 | * Should be impossible. If it happens, launder the page | ||
| 739 | * since we don't know what's dirty. This will WARN in | ||
| 740 | * orangefs_writepage_locked. | ||
| 741 | */ | ||
| 742 | if (orangefs_launder_page(page)) { | ||
| 743 | ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; | ||
| 744 | goto out; | ||
| 745 | } | ||
| 746 | } | ||
| 747 | if (PagePrivate(page)) { | ||
| 748 | wr = (struct orangefs_write_range *)page_private(page); | ||
| 749 | if (uid_eq(wr->uid, current_fsuid()) && | ||
| 750 | gid_eq(wr->gid, current_fsgid())) { | ||
| 751 | wr->pos = page_offset(page); | ||
| 752 | wr->len = PAGE_SIZE; | ||
| 753 | goto okay; | ||
| 754 | } else { | ||
| 755 | if (orangefs_launder_page(page)) { | ||
| 756 | ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; | ||
| 757 | goto out; | ||
| 758 | } | ||
| 759 | } | ||
| 760 | } | ||
| 761 | wr = kmalloc(sizeof *wr, GFP_KERNEL); | ||
| 762 | if (!wr) { | ||
| 763 | ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; | ||
| 764 | goto out; | ||
| 765 | } | ||
| 766 | wr->pos = page_offset(page); | ||
| 767 | wr->len = PAGE_SIZE; | ||
| 768 | wr->uid = current_fsuid(); | ||
| 769 | wr->gid = current_fsgid(); | ||
| 770 | SetPagePrivate(page); | ||
| 771 | set_page_private(page, (unsigned long)wr); | ||
| 772 | get_page(page); | ||
| 773 | okay: | ||
| 774 | |||
| 775 | file_update_time(vmf->vma->vm_file); | ||
| 776 | if (page->mapping != inode->i_mapping) { | ||
| 777 | unlock_page(page); | ||
| 778 | ret = VM_FAULT_LOCKED|VM_FAULT_NOPAGE; | ||
| 779 | goto out; | ||
| 780 | } | ||
| 781 | |||
| 782 | /* | ||
| 783 | * We mark the page dirty already here so that when freeze is in | ||
| 784 | * progress, we are guaranteed that writeback during freezing will | ||
| 785 | * see the dirty page and writeprotect it again. | ||
| 786 | */ | ||
| 787 | set_page_dirty(page); | ||
| 788 | wait_for_stable_page(page); | ||
| 789 | ret = VM_FAULT_LOCKED; | ||
| 790 | out: | ||
| 791 | sb_end_pagefault(inode->i_sb); | ||
| 792 | return ret; | ||
| 793 | } | ||
| 794 | |||
| 149 | static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) | 795 | static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) |
| 150 | { | 796 | { |
| 151 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 797 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| @@ -162,7 +808,7 @@ static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) | |||
| 162 | iattr->ia_size); | 808 | iattr->ia_size); |
| 163 | 809 | ||
| 164 | /* Ensure that we have a up to date size, so we know if it changed. */ | 810 | /* Ensure that we have a up to date size, so we know if it changed. */ |
| 165 | ret = orangefs_inode_getattr(inode, 0, 1, STATX_SIZE); | 811 | ret = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_SIZE); |
| 166 | if (ret == -ESTALE) | 812 | if (ret == -ESTALE) |
| 167 | ret = -EIO; | 813 | ret = -EIO; |
| 168 | if (ret) { | 814 | if (ret) { |
| @@ -172,7 +818,11 @@ static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) | |||
| 172 | } | 818 | } |
| 173 | orig_size = i_size_read(inode); | 819 | orig_size = i_size_read(inode); |
| 174 | 820 | ||
| 175 | truncate_setsize(inode, iattr->ia_size); | 821 | /* This is truncate_setsize in a different order. */ |
| 822 | truncate_pagecache(inode, iattr->ia_size); | ||
| 823 | i_size_write(inode, iattr->ia_size); | ||
| 824 | if (iattr->ia_size > orig_size) | ||
| 825 | pagecache_isize_extended(inode, orig_size, iattr->ia_size); | ||
| 176 | 826 | ||
| 177 | new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE); | 827 | new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE); |
| 178 | if (!new_op) | 828 | if (!new_op) |
| @@ -202,22 +852,33 @@ static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) | |||
| 202 | return ret; | 852 | return ret; |
| 203 | } | 853 | } |
| 204 | 854 | ||
| 205 | /* | 855 | int __orangefs_setattr(struct inode *inode, struct iattr *iattr) |
| 206 | * Change attributes of an object referenced by dentry. | ||
| 207 | */ | ||
| 208 | int orangefs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
| 209 | { | 856 | { |
| 210 | int ret = -EINVAL; | 857 | int ret; |
| 211 | struct inode *inode = dentry->d_inode; | ||
| 212 | |||
| 213 | gossip_debug(GOSSIP_INODE_DEBUG, | ||
| 214 | "%s: called on %pd\n", | ||
| 215 | __func__, | ||
| 216 | dentry); | ||
| 217 | 858 | ||
| 218 | ret = setattr_prepare(dentry, iattr); | 859 | if (iattr->ia_valid & ATTR_MODE) { |
| 219 | if (ret) | 860 | if (iattr->ia_mode & (S_ISVTX)) { |
| 220 | goto out; | 861 | if (is_root_handle(inode)) { |
| 862 | /* | ||
| 863 | * allow sticky bit to be set on root (since | ||
| 864 | * it shows up that way by default anyhow), | ||
| 865 | * but don't show it to the server | ||
| 866 | */ | ||
| 867 | iattr->ia_mode -= S_ISVTX; | ||
| 868 | } else { | ||
| 869 | gossip_debug(GOSSIP_UTILS_DEBUG, | ||
| 870 | "User attempted to set sticky bit on non-root directory; returning EINVAL.\n"); | ||
| 871 | ret = -EINVAL; | ||
| 872 | goto out; | ||
| 873 | } | ||
| 874 | } | ||
| 875 | if (iattr->ia_mode & (S_ISUID)) { | ||
| 876 | gossip_debug(GOSSIP_UTILS_DEBUG, | ||
| 877 | "Attempting to set setuid bit (not supported); returning EINVAL.\n"); | ||
| 878 | ret = -EINVAL; | ||
| 879 | goto out; | ||
| 880 | } | ||
| 881 | } | ||
| 221 | 882 | ||
| 222 | if (iattr->ia_valid & ATTR_SIZE) { | 883 | if (iattr->ia_valid & ATTR_SIZE) { |
| 223 | ret = orangefs_setattr_size(inode, iattr); | 884 | ret = orangefs_setattr_size(inode, iattr); |
| @@ -225,21 +886,51 @@ int orangefs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
| 225 | goto out; | 886 | goto out; |
| 226 | } | 887 | } |
| 227 | 888 | ||
| 889 | again: | ||
| 890 | spin_lock(&inode->i_lock); | ||
| 891 | if (ORANGEFS_I(inode)->attr_valid) { | ||
| 892 | if (uid_eq(ORANGEFS_I(inode)->attr_uid, current_fsuid()) && | ||
| 893 | gid_eq(ORANGEFS_I(inode)->attr_gid, current_fsgid())) { | ||
| 894 | ORANGEFS_I(inode)->attr_valid = iattr->ia_valid; | ||
| 895 | } else { | ||
| 896 | spin_unlock(&inode->i_lock); | ||
| 897 | write_inode_now(inode, 1); | ||
| 898 | goto again; | ||
| 899 | } | ||
| 900 | } else { | ||
| 901 | ORANGEFS_I(inode)->attr_valid = iattr->ia_valid; | ||
| 902 | ORANGEFS_I(inode)->attr_uid = current_fsuid(); | ||
| 903 | ORANGEFS_I(inode)->attr_gid = current_fsgid(); | ||
| 904 | } | ||
| 228 | setattr_copy(inode, iattr); | 905 | setattr_copy(inode, iattr); |
| 906 | spin_unlock(&inode->i_lock); | ||
| 229 | mark_inode_dirty(inode); | 907 | mark_inode_dirty(inode); |
| 230 | 908 | ||
| 231 | ret = orangefs_inode_setattr(inode, iattr); | 909 | if (iattr->ia_valid & ATTR_MODE) |
| 232 | gossip_debug(GOSSIP_INODE_DEBUG, | ||
| 233 | "%s: orangefs_inode_setattr returned %d\n", | ||
| 234 | __func__, | ||
| 235 | ret); | ||
| 236 | |||
| 237 | if (!ret && (iattr->ia_valid & ATTR_MODE)) | ||
| 238 | /* change mod on a file that has ACLs */ | 910 | /* change mod on a file that has ACLs */ |
| 239 | ret = posix_acl_chmod(inode, inode->i_mode); | 911 | ret = posix_acl_chmod(inode, inode->i_mode); |
| 240 | 912 | ||
| 913 | ret = 0; | ||
| 241 | out: | 914 | out: |
| 242 | gossip_debug(GOSSIP_INODE_DEBUG, "%s: ret:%d:\n", __func__, ret); | 915 | return ret; |
| 916 | } | ||
| 917 | |||
| 918 | /* | ||
| 919 | * Change attributes of an object referenced by dentry. | ||
| 920 | */ | ||
| 921 | int orangefs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
| 922 | { | ||
| 923 | int ret; | ||
| 924 | gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n", | ||
| 925 | dentry); | ||
| 926 | ret = setattr_prepare(dentry, iattr); | ||
| 927 | if (ret) | ||
| 928 | goto out; | ||
| 929 | ret = __orangefs_setattr(d_inode(dentry), iattr); | ||
| 930 | sync_inode_metadata(d_inode(dentry), 1); | ||
| 931 | out: | ||
| 932 | gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", | ||
| 933 | ret); | ||
| 243 | return ret; | 934 | return ret; |
| 244 | } | 935 | } |
| 245 | 936 | ||
| @@ -253,10 +944,11 @@ int orangefs_getattr(const struct path *path, struct kstat *stat, | |||
| 253 | struct inode *inode = path->dentry->d_inode; | 944 | struct inode *inode = path->dentry->d_inode; |
| 254 | 945 | ||
| 255 | gossip_debug(GOSSIP_INODE_DEBUG, | 946 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 256 | "orangefs_getattr: called on %pd\n", | 947 | "orangefs_getattr: called on %pd mask %u\n", |
| 257 | path->dentry); | 948 | path->dentry, request_mask); |
| 258 | 949 | ||
| 259 | ret = orangefs_inode_getattr(inode, 0, 0, request_mask); | 950 | ret = orangefs_inode_getattr(inode, |
| 951 | request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0); | ||
| 260 | if (ret == 0) { | 952 | if (ret == 0) { |
| 261 | generic_fillattr(inode, stat); | 953 | generic_fillattr(inode, stat); |
| 262 | 954 | ||
| @@ -284,7 +976,7 @@ int orangefs_permission(struct inode *inode, int mask) | |||
| 284 | gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__); | 976 | gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__); |
| 285 | 977 | ||
| 286 | /* Make sure the permission (and other common attrs) are up to date. */ | 978 | /* Make sure the permission (and other common attrs) are up to date. */ |
| 287 | ret = orangefs_inode_getattr(inode, 0, 0, STATX_MODE); | 979 | ret = orangefs_inode_getattr(inode, 0); |
| 288 | if (ret < 0) | 980 | if (ret < 0) |
| 289 | return ret; | 981 | return ret; |
| 290 | 982 | ||
| @@ -304,7 +996,7 @@ int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags | |||
| 304 | iattr.ia_valid |= ATTR_CTIME; | 996 | iattr.ia_valid |= ATTR_CTIME; |
| 305 | if (flags & S_MTIME) | 997 | if (flags & S_MTIME) |
| 306 | iattr.ia_valid |= ATTR_MTIME; | 998 | iattr.ia_valid |= ATTR_MTIME; |
| 307 | return orangefs_inode_setattr(inode, &iattr); | 999 | return __orangefs_setattr(inode, &iattr); |
| 308 | } | 1000 | } |
| 309 | 1001 | ||
| 310 | /* ORANGEFS2 implementation of VFS inode operations for files */ | 1002 | /* ORANGEFS2 implementation of VFS inode operations for files */ |
| @@ -364,6 +1056,10 @@ static int orangefs_set_inode(struct inode *inode, void *data) | |||
| 364 | struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; | 1056 | struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; |
| 365 | ORANGEFS_I(inode)->refn.fs_id = ref->fs_id; | 1057 | ORANGEFS_I(inode)->refn.fs_id = ref->fs_id; |
| 366 | ORANGEFS_I(inode)->refn.khandle = ref->khandle; | 1058 | ORANGEFS_I(inode)->refn.khandle = ref->khandle; |
| 1059 | ORANGEFS_I(inode)->attr_valid = 0; | ||
| 1060 | hash_init(ORANGEFS_I(inode)->xattr_cache); | ||
| 1061 | ORANGEFS_I(inode)->mapping_time = jiffies - 1; | ||
| 1062 | ORANGEFS_I(inode)->bitlock = 0; | ||
| 367 | return 0; | 1063 | return 0; |
| 368 | } | 1064 | } |
| 369 | 1065 | ||
| @@ -409,7 +1105,7 @@ struct inode *orangefs_iget(struct super_block *sb, | |||
| 409 | if (!(inode->i_state & I_NEW)) | 1105 | if (!(inode->i_state & I_NEW)) |
| 410 | return inode; | 1106 | return inode; |
| 411 | 1107 | ||
| 412 | error = orangefs_inode_getattr(inode, 1, 1, STATX_ALL); | 1108 | error = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_NEW); |
| 413 | if (error) { | 1109 | if (error) { |
| 414 | iget_failed(inode); | 1110 | iget_failed(inode); |
| 415 | return ERR_PTR(error); | 1111 | return ERR_PTR(error); |
| @@ -454,17 +1150,11 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, | |||
| 454 | orangefs_set_inode(inode, ref); | 1150 | orangefs_set_inode(inode, ref); |
| 455 | inode->i_ino = hash; /* needed for stat etc */ | 1151 | inode->i_ino = hash; /* needed for stat etc */ |
| 456 | 1152 | ||
| 457 | error = orangefs_inode_getattr(inode, 1, 1, STATX_ALL); | 1153 | error = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_NEW); |
| 458 | if (error) | 1154 | if (error) |
| 459 | goto out_iput; | 1155 | goto out_iput; |
| 460 | 1156 | ||
| 461 | orangefs_init_iops(inode); | 1157 | orangefs_init_iops(inode); |
| 462 | |||
| 463 | inode->i_mode = mode; | ||
| 464 | inode->i_uid = current_fsuid(); | ||
| 465 | inode->i_gid = current_fsgid(); | ||
| 466 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); | ||
| 467 | inode->i_size = PAGE_SIZE; | ||
| 468 | inode->i_rdev = dev; | 1158 | inode->i_rdev = dev; |
| 469 | 1159 | ||
| 470 | error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref); | 1160 | error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref); |
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index c8676c996249..1dd710e5f376 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c | |||
| @@ -76,19 +76,16 @@ static int orangefs_create(struct inode *dir, | |||
| 76 | 76 | ||
| 77 | d_instantiate_new(dentry, inode); | 77 | d_instantiate_new(dentry, inode); |
| 78 | orangefs_set_timeout(dentry); | 78 | orangefs_set_timeout(dentry); |
| 79 | ORANGEFS_I(inode)->getattr_time = jiffies - 1; | ||
| 80 | ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; | ||
| 81 | 79 | ||
| 82 | gossip_debug(GOSSIP_NAME_DEBUG, | 80 | gossip_debug(GOSSIP_NAME_DEBUG, |
| 83 | "%s: dentry instantiated for %pd\n", | 81 | "%s: dentry instantiated for %pd\n", |
| 84 | __func__, | 82 | __func__, |
| 85 | dentry); | 83 | dentry); |
| 86 | 84 | ||
| 87 | dir->i_mtime = dir->i_ctime = current_time(dir); | ||
| 88 | memset(&iattr, 0, sizeof iattr); | 85 | memset(&iattr, 0, sizeof iattr); |
| 89 | iattr.ia_valid |= ATTR_MTIME; | 86 | iattr.ia_valid |= ATTR_MTIME | ATTR_CTIME; |
| 90 | orangefs_inode_setattr(dir, &iattr); | 87 | iattr.ia_mtime = iattr.ia_ctime = current_time(dir); |
| 91 | mark_inode_dirty_sync(dir); | 88 | __orangefs_setattr(dir, &iattr); |
| 92 | ret = 0; | 89 | ret = 0; |
| 93 | out: | 90 | out: |
| 94 | op_release(new_op); | 91 | op_release(new_op); |
| @@ -210,11 +207,10 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 210 | if (!ret) { | 207 | if (!ret) { |
| 211 | drop_nlink(inode); | 208 | drop_nlink(inode); |
| 212 | 209 | ||
| 213 | dir->i_mtime = dir->i_ctime = current_time(dir); | ||
| 214 | memset(&iattr, 0, sizeof iattr); | 210 | memset(&iattr, 0, sizeof iattr); |
| 215 | iattr.ia_valid |= ATTR_MTIME; | 211 | iattr.ia_valid |= ATTR_MTIME | ATTR_CTIME; |
| 216 | orangefs_inode_setattr(dir, &iattr); | 212 | iattr.ia_mtime = iattr.ia_ctime = current_time(dir); |
| 217 | mark_inode_dirty_sync(dir); | 213 | __orangefs_setattr(dir, &iattr); |
| 218 | } | 214 | } |
| 219 | return ret; | 215 | return ret; |
| 220 | } | 216 | } |
| @@ -291,19 +287,16 @@ static int orangefs_symlink(struct inode *dir, | |||
| 291 | 287 | ||
| 292 | d_instantiate_new(dentry, inode); | 288 | d_instantiate_new(dentry, inode); |
| 293 | orangefs_set_timeout(dentry); | 289 | orangefs_set_timeout(dentry); |
| 294 | ORANGEFS_I(inode)->getattr_time = jiffies - 1; | ||
| 295 | ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; | ||
| 296 | 290 | ||
| 297 | gossip_debug(GOSSIP_NAME_DEBUG, | 291 | gossip_debug(GOSSIP_NAME_DEBUG, |
| 298 | "Inode (Symlink) %pU -> %pd\n", | 292 | "Inode (Symlink) %pU -> %pd\n", |
| 299 | get_khandle_from_ino(inode), | 293 | get_khandle_from_ino(inode), |
| 300 | dentry); | 294 | dentry); |
| 301 | 295 | ||
| 302 | dir->i_mtime = dir->i_ctime = current_time(dir); | ||
| 303 | memset(&iattr, 0, sizeof iattr); | 296 | memset(&iattr, 0, sizeof iattr); |
| 304 | iattr.ia_valid |= ATTR_MTIME; | 297 | iattr.ia_valid |= ATTR_MTIME | ATTR_CTIME; |
| 305 | orangefs_inode_setattr(dir, &iattr); | 298 | iattr.ia_mtime = iattr.ia_ctime = current_time(dir); |
| 306 | mark_inode_dirty_sync(dir); | 299 | __orangefs_setattr(dir, &iattr); |
| 307 | ret = 0; | 300 | ret = 0; |
| 308 | out: | 301 | out: |
| 309 | op_release(new_op); | 302 | op_release(new_op); |
| @@ -360,8 +353,6 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
| 360 | 353 | ||
| 361 | d_instantiate_new(dentry, inode); | 354 | d_instantiate_new(dentry, inode); |
| 362 | orangefs_set_timeout(dentry); | 355 | orangefs_set_timeout(dentry); |
| 363 | ORANGEFS_I(inode)->getattr_time = jiffies - 1; | ||
| 364 | ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; | ||
| 365 | 356 | ||
| 366 | gossip_debug(GOSSIP_NAME_DEBUG, | 357 | gossip_debug(GOSSIP_NAME_DEBUG, |
| 367 | "Inode (Directory) %pU -> %pd\n", | 358 | "Inode (Directory) %pU -> %pd\n", |
| @@ -372,11 +363,10 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
| 372 | * NOTE: we have no good way to keep nlink consistent for directories | 363 | * NOTE: we have no good way to keep nlink consistent for directories |
| 373 | * across clients; keep constant at 1. | 364 | * across clients; keep constant at 1. |
| 374 | */ | 365 | */ |
| 375 | dir->i_mtime = dir->i_ctime = current_time(dir); | ||
| 376 | memset(&iattr, 0, sizeof iattr); | 366 | memset(&iattr, 0, sizeof iattr); |
| 377 | iattr.ia_valid |= ATTR_MTIME; | 367 | iattr.ia_valid |= ATTR_MTIME | ATTR_CTIME; |
| 378 | orangefs_inode_setattr(dir, &iattr); | 368 | iattr.ia_mtime = iattr.ia_ctime = current_time(dir); |
| 379 | mark_inode_dirty_sync(dir); | 369 | __orangefs_setattr(dir, &iattr); |
| 380 | out: | 370 | out: |
| 381 | op_release(new_op); | 371 | op_release(new_op); |
| 382 | return ret; | 372 | return ret; |
| @@ -389,6 +379,7 @@ static int orangefs_rename(struct inode *old_dir, | |||
| 389 | unsigned int flags) | 379 | unsigned int flags) |
| 390 | { | 380 | { |
| 391 | struct orangefs_kernel_op_s *new_op; | 381 | struct orangefs_kernel_op_s *new_op; |
| 382 | struct iattr iattr; | ||
| 392 | int ret; | 383 | int ret; |
| 393 | 384 | ||
| 394 | if (flags) | 385 | if (flags) |
| @@ -398,7 +389,10 @@ static int orangefs_rename(struct inode *old_dir, | |||
| 398 | "orangefs_rename: called (%pd2 => %pd2) ct=%d\n", | 389 | "orangefs_rename: called (%pd2 => %pd2) ct=%d\n", |
| 399 | old_dentry, new_dentry, d_count(new_dentry)); | 390 | old_dentry, new_dentry, d_count(new_dentry)); |
| 400 | 391 | ||
| 401 | ORANGEFS_I(new_dentry->d_parent->d_inode)->getattr_time = jiffies - 1; | 392 | memset(&iattr, 0, sizeof iattr); |
| 393 | iattr.ia_valid |= ATTR_MTIME | ATTR_CTIME; | ||
| 394 | iattr.ia_mtime = iattr.ia_ctime = current_time(new_dir); | ||
| 395 | __orangefs_setattr(new_dir, &iattr); | ||
| 402 | 396 | ||
| 403 | new_op = op_alloc(ORANGEFS_VFS_OP_RENAME); | 397 | new_op = op_alloc(ORANGEFS_VFS_OP_RENAME); |
| 404 | if (!new_op) | 398 | if (!new_op) |
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 443bcd8c3c19..d4811f981608 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c | |||
| @@ -538,3 +538,16 @@ int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter, | |||
| 538 | } | 538 | } |
| 539 | return 0; | 539 | return 0; |
| 540 | } | 540 | } |
| 541 | |||
| 542 | void orangefs_bufmap_page_fill(void *page_to, | ||
| 543 | int buffer_index, | ||
| 544 | int slot_index) | ||
| 545 | { | ||
| 546 | struct orangefs_bufmap_desc *from; | ||
| 547 | void *page_from; | ||
| 548 | |||
| 549 | from = &__orangefs_bufmap->desc_array[buffer_index]; | ||
| 550 | page_from = kmap_atomic(from->page_array[slot_index]); | ||
| 551 | memcpy(page_to, page_from, PAGE_SIZE); | ||
| 552 | kunmap_atomic(page_from); | ||
| 553 | } | ||
diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index c2c3c5a0eeab..75b2d2833af1 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h | |||
| @@ -34,4 +34,6 @@ int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter, | |||
| 34 | int buffer_index, | 34 | int buffer_index, |
| 35 | size_t size); | 35 | size_t size); |
| 36 | 36 | ||
| 37 | void orangefs_bufmap_page_fill(void *kaddr, int buffer_index, int slot_index); | ||
| 38 | |||
| 37 | #endif /* __ORANGEFS_BUFMAP_H */ | 39 | #endif /* __ORANGEFS_BUFMAP_H */ |
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 0732cb08173e..87b1a6fce628 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c | |||
| @@ -963,7 +963,7 @@ int orangefs_debugfs_new_client_mask(void __user *arg) | |||
| 963 | return ret; | 963 | return ret; |
| 964 | } | 964 | } |
| 965 | 965 | ||
| 966 | int orangefs_debugfs_new_client_string(void __user *arg) | 966 | int orangefs_debugfs_new_client_string(void __user *arg) |
| 967 | { | 967 | { |
| 968 | int ret; | 968 | int ret; |
| 969 | 969 | ||
| @@ -1016,7 +1016,7 @@ int orangefs_debugfs_new_client_string(void __user *arg) | |||
| 1016 | return 0; | 1016 | return 0; |
| 1017 | } | 1017 | } |
| 1018 | 1018 | ||
| 1019 | int orangefs_debugfs_new_debug(void __user *arg) | 1019 | int orangefs_debugfs_new_debug(void __user *arg) |
| 1020 | { | 1020 | { |
| 1021 | struct dev_mask_info_s mask_info = {0}; | 1021 | struct dev_mask_info_s mask_info = {0}; |
| 1022 | int ret; | 1022 | int ret; |
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 17b24ad6b264..572dd29fbd54 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <linux/rwsem.h> | 51 | #include <linux/rwsem.h> |
| 52 | #include <linux/xattr.h> | 52 | #include <linux/xattr.h> |
| 53 | #include <linux/exportfs.h> | 53 | #include <linux/exportfs.h> |
| 54 | #include <linux/hashtable.h> | ||
| 54 | 55 | ||
| 55 | #include <asm/unaligned.h> | 56 | #include <asm/unaligned.h> |
| 56 | 57 | ||
| @@ -192,7 +193,13 @@ struct orangefs_inode_s { | |||
| 192 | sector_t last_failed_block_index_read; | 193 | sector_t last_failed_block_index_read; |
| 193 | 194 | ||
| 194 | unsigned long getattr_time; | 195 | unsigned long getattr_time; |
| 195 | u32 getattr_mask; | 196 | unsigned long mapping_time; |
| 197 | int attr_valid; | ||
| 198 | kuid_t attr_uid; | ||
| 199 | kgid_t attr_gid; | ||
| 200 | unsigned long bitlock; | ||
| 201 | |||
| 202 | DECLARE_HASHTABLE(xattr_cache, 4); | ||
| 196 | }; | 203 | }; |
| 197 | 204 | ||
| 198 | /* per superblock private orangefs info */ | 205 | /* per superblock private orangefs info */ |
| @@ -217,6 +224,25 @@ struct orangefs_stats { | |||
| 217 | unsigned long writes; | 224 | unsigned long writes; |
| 218 | }; | 225 | }; |
| 219 | 226 | ||
| 227 | struct orangefs_cached_xattr { | ||
| 228 | struct hlist_node node; | ||
| 229 | char key[ORANGEFS_MAX_XATTR_NAMELEN]; | ||
| 230 | char val[ORANGEFS_MAX_XATTR_VALUELEN]; | ||
| 231 | ssize_t length; | ||
| 232 | unsigned long timeout; | ||
| 233 | }; | ||
| 234 | |||
| 235 | struct orangefs_write_range { | ||
| 236 | loff_t pos; | ||
| 237 | size_t len; | ||
| 238 | kuid_t uid; | ||
| 239 | kgid_t gid; | ||
| 240 | }; | ||
| 241 | |||
| 242 | struct orangefs_read_options { | ||
| 243 | ssize_t blksiz; | ||
| 244 | }; | ||
| 245 | |||
| 220 | extern struct orangefs_stats orangefs_stats; | 246 | extern struct orangefs_stats orangefs_stats; |
| 221 | 247 | ||
| 222 | /* | 248 | /* |
| @@ -329,13 +355,15 @@ void fsid_key_table_finalize(void); | |||
| 329 | /* | 355 | /* |
| 330 | * defined in inode.c | 356 | * defined in inode.c |
| 331 | */ | 357 | */ |
| 358 | vm_fault_t orangefs_page_mkwrite(struct vm_fault *); | ||
| 332 | struct inode *orangefs_new_inode(struct super_block *sb, | 359 | struct inode *orangefs_new_inode(struct super_block *sb, |
| 333 | struct inode *dir, | 360 | struct inode *dir, |
| 334 | int mode, | 361 | int mode, |
| 335 | dev_t dev, | 362 | dev_t dev, |
| 336 | struct orangefs_object_kref *ref); | 363 | struct orangefs_object_kref *ref); |
| 337 | 364 | ||
| 338 | int orangefs_setattr(struct dentry *dentry, struct iattr *iattr); | 365 | int __orangefs_setattr(struct inode *, struct iattr *); |
| 366 | int orangefs_setattr(struct dentry *, struct iattr *); | ||
| 339 | 367 | ||
| 340 | int orangefs_getattr(const struct path *path, struct kstat *stat, | 368 | int orangefs_getattr(const struct path *path, struct kstat *stat, |
| 341 | u32 request_mask, unsigned int flags); | 369 | u32 request_mask, unsigned int flags); |
| @@ -355,11 +383,6 @@ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size); | |||
| 355 | struct inode *orangefs_iget(struct super_block *sb, | 383 | struct inode *orangefs_iget(struct super_block *sb, |
| 356 | struct orangefs_object_kref *ref); | 384 | struct orangefs_object_kref *ref); |
| 357 | 385 | ||
| 358 | ssize_t orangefs_inode_read(struct inode *inode, | ||
| 359 | struct iov_iter *iter, | ||
| 360 | loff_t *offset, | ||
| 361 | loff_t readahead_size); | ||
| 362 | |||
| 363 | /* | 386 | /* |
| 364 | * defined in devorangefs-req.c | 387 | * defined in devorangefs-req.c |
| 365 | */ | 388 | */ |
| @@ -371,6 +394,15 @@ int is_daemon_in_service(void); | |||
| 371 | bool __is_daemon_in_service(void); | 394 | bool __is_daemon_in_service(void); |
| 372 | 395 | ||
| 373 | /* | 396 | /* |
| 397 | * defined in file.c | ||
| 398 | */ | ||
| 399 | int orangefs_revalidate_mapping(struct inode *); | ||
| 400 | ssize_t wait_for_direct_io(enum ORANGEFS_io_type, struct inode *, loff_t *, | ||
| 401 | struct iov_iter *, size_t, loff_t, struct orangefs_write_range *, int *); | ||
| 402 | ssize_t do_readv_writev(enum ORANGEFS_io_type, struct file *, loff_t *, | ||
| 403 | struct iov_iter *); | ||
| 404 | |||
| 405 | /* | ||
| 374 | * defined in orangefs-utils.c | 406 | * defined in orangefs-utils.c |
| 375 | */ | 407 | */ |
| 376 | __s32 fsid_of_op(struct orangefs_kernel_op_s *op); | 408 | __s32 fsid_of_op(struct orangefs_kernel_op_s *op); |
| @@ -386,12 +418,14 @@ int orangefs_inode_setxattr(struct inode *inode, | |||
| 386 | size_t size, | 418 | size_t size, |
| 387 | int flags); | 419 | int flags); |
| 388 | 420 | ||
| 389 | int orangefs_inode_getattr(struct inode *inode, int new, int bypass, | 421 | #define ORANGEFS_GETATTR_NEW 1 |
| 390 | u32 request_mask); | 422 | #define ORANGEFS_GETATTR_SIZE 2 |
| 423 | |||
| 424 | int orangefs_inode_getattr(struct inode *, int); | ||
| 391 | 425 | ||
| 392 | int orangefs_inode_check_changed(struct inode *inode); | 426 | int orangefs_inode_check_changed(struct inode *inode); |
| 393 | 427 | ||
| 394 | int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); | 428 | int orangefs_inode_setattr(struct inode *inode); |
| 395 | 429 | ||
| 396 | bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); | 430 | bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); |
| 397 | 431 | ||
| @@ -400,6 +434,7 @@ int orangefs_normalize_to_errno(__s32 error_code); | |||
| 400 | extern struct mutex orangefs_request_mutex; | 434 | extern struct mutex orangefs_request_mutex; |
| 401 | extern int op_timeout_secs; | 435 | extern int op_timeout_secs; |
| 402 | extern int slot_timeout_secs; | 436 | extern int slot_timeout_secs; |
| 437 | extern int orangefs_cache_timeout_msecs; | ||
| 403 | extern int orangefs_dcache_timeout_msecs; | 438 | extern int orangefs_dcache_timeout_msecs; |
| 404 | extern int orangefs_getattr_timeout_msecs; | 439 | extern int orangefs_getattr_timeout_msecs; |
| 405 | extern struct list_head orangefs_superblocks; | 440 | extern struct list_head orangefs_superblocks; |
| @@ -426,6 +461,7 @@ extern const struct dentry_operations orangefs_dentry_operations; | |||
| 426 | #define ORANGEFS_OP_CANCELLATION 4 /* this is a cancellation */ | 461 | #define ORANGEFS_OP_CANCELLATION 4 /* this is a cancellation */ |
| 427 | #define ORANGEFS_OP_NO_MUTEX 8 /* don't acquire request_mutex */ | 462 | #define ORANGEFS_OP_NO_MUTEX 8 /* don't acquire request_mutex */ |
| 428 | #define ORANGEFS_OP_ASYNC 16 /* Queue it, but don't wait */ | 463 | #define ORANGEFS_OP_ASYNC 16 /* Queue it, but don't wait */ |
| 464 | #define ORANGEFS_OP_WRITEBACK 32 | ||
| 429 | 465 | ||
| 430 | int service_operation(struct orangefs_kernel_op_s *op, | 466 | int service_operation(struct orangefs_kernel_op_s *op, |
| 431 | const char *op_name, | 467 | const char *op_name, |
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 85ef87245a87..82cf8b3e568b 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c | |||
| @@ -30,6 +30,7 @@ static ulong module_parm_debug_mask; | |||
| 30 | __u64 orangefs_gossip_debug_mask; | 30 | __u64 orangefs_gossip_debug_mask; |
| 31 | int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS; | 31 | int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS; |
| 32 | int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS; | 32 | int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS; |
| 33 | int orangefs_cache_timeout_msecs = 50; | ||
| 33 | int orangefs_dcache_timeout_msecs = 50; | 34 | int orangefs_dcache_timeout_msecs = 50; |
| 34 | int orangefs_getattr_timeout_msecs = 50; | 35 | int orangefs_getattr_timeout_msecs = 50; |
| 35 | 36 | ||
diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 19739aaee675..3627ea946402 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c | |||
| @@ -62,6 +62,14 @@ | |||
| 62 | * Slots are requested and waited for, | 62 | * Slots are requested and waited for, |
| 63 | * the wait times out after slot_timeout_secs. | 63 | * the wait times out after slot_timeout_secs. |
| 64 | * | 64 | * |
| 65 | * What: /sys/fs/orangefs/cache_timeout_msecs | ||
| 66 | * Date: Mar 2018 | ||
| 67 | * Contact: Martin Brandenburg <martin@omnibond.com> | ||
| 68 | * Description: | ||
| 69 | * Time in milliseconds between which | ||
| 70 | * orangefs_revalidate_mapping will invalidate the page | ||
| 71 | * cache. | ||
| 72 | * | ||
| 65 | * What: /sys/fs/orangefs/dcache_timeout_msecs | 73 | * What: /sys/fs/orangefs/dcache_timeout_msecs |
| 66 | * Date: Jul 2016 | 74 | * Date: Jul 2016 |
| 67 | * Contact: Martin Brandenburg <martin@omnibond.com> | 75 | * Contact: Martin Brandenburg <martin@omnibond.com> |
| @@ -222,6 +230,13 @@ static ssize_t sysfs_int_show(struct kobject *kobj, | |||
| 222 | slot_timeout_secs); | 230 | slot_timeout_secs); |
| 223 | goto out; | 231 | goto out; |
| 224 | } else if (!strcmp(attr->attr.name, | 232 | } else if (!strcmp(attr->attr.name, |
| 233 | "cache_timeout_msecs")) { | ||
| 234 | rc = scnprintf(buf, | ||
| 235 | PAGE_SIZE, | ||
| 236 | "%d\n", | ||
| 237 | orangefs_cache_timeout_msecs); | ||
| 238 | goto out; | ||
| 239 | } else if (!strcmp(attr->attr.name, | ||
| 225 | "dcache_timeout_msecs")) { | 240 | "dcache_timeout_msecs")) { |
| 226 | rc = scnprintf(buf, | 241 | rc = scnprintf(buf, |
| 227 | PAGE_SIZE, | 242 | PAGE_SIZE, |
| @@ -277,6 +292,9 @@ static ssize_t sysfs_int_store(struct kobject *kobj, | |||
| 277 | } else if (!strcmp(attr->attr.name, "slot_timeout_secs")) { | 292 | } else if (!strcmp(attr->attr.name, "slot_timeout_secs")) { |
| 278 | rc = kstrtoint(buf, 0, &slot_timeout_secs); | 293 | rc = kstrtoint(buf, 0, &slot_timeout_secs); |
| 279 | goto out; | 294 | goto out; |
| 295 | } else if (!strcmp(attr->attr.name, "cache_timeout_msecs")) { | ||
| 296 | rc = kstrtoint(buf, 0, &orangefs_cache_timeout_msecs); | ||
| 297 | goto out; | ||
| 280 | } else if (!strcmp(attr->attr.name, "dcache_timeout_msecs")) { | 298 | } else if (!strcmp(attr->attr.name, "dcache_timeout_msecs")) { |
| 281 | rc = kstrtoint(buf, 0, &orangefs_dcache_timeout_msecs); | 299 | rc = kstrtoint(buf, 0, &orangefs_dcache_timeout_msecs); |
| 282 | goto out; | 300 | goto out; |
| @@ -818,6 +836,9 @@ static struct orangefs_attribute op_timeout_secs_attribute = | |||
| 818 | static struct orangefs_attribute slot_timeout_secs_attribute = | 836 | static struct orangefs_attribute slot_timeout_secs_attribute = |
| 819 | __ATTR(slot_timeout_secs, 0664, sysfs_int_show, sysfs_int_store); | 837 | __ATTR(slot_timeout_secs, 0664, sysfs_int_show, sysfs_int_store); |
| 820 | 838 | ||
| 839 | static struct orangefs_attribute cache_timeout_msecs_attribute = | ||
| 840 | __ATTR(cache_timeout_msecs, 0664, sysfs_int_show, sysfs_int_store); | ||
| 841 | |||
| 821 | static struct orangefs_attribute dcache_timeout_msecs_attribute = | 842 | static struct orangefs_attribute dcache_timeout_msecs_attribute = |
| 822 | __ATTR(dcache_timeout_msecs, 0664, sysfs_int_show, sysfs_int_store); | 843 | __ATTR(dcache_timeout_msecs, 0664, sysfs_int_show, sysfs_int_store); |
| 823 | 844 | ||
| @@ -861,6 +882,7 @@ static struct orangefs_attribute perf_time_interval_secs_attribute = | |||
| 861 | static struct attribute *orangefs_default_attrs[] = { | 882 | static struct attribute *orangefs_default_attrs[] = { |
| 862 | &op_timeout_secs_attribute.attr, | 883 | &op_timeout_secs_attribute.attr, |
| 863 | &slot_timeout_secs_attribute.attr, | 884 | &slot_timeout_secs_attribute.attr, |
| 885 | &cache_timeout_msecs_attribute.attr, | ||
| 864 | &dcache_timeout_msecs_attribute.attr, | 886 | &dcache_timeout_msecs_attribute.attr, |
| 865 | &getattr_timeout_msecs_attribute.attr, | 887 | &getattr_timeout_msecs_attribute.attr, |
| 866 | &readahead_count_attribute.attr, | 888 | &readahead_count_attribute.attr, |
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 804c8a261e4b..d4b7ae763186 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* | 2 | /* |
| 3 | * (C) 2001 Clemson University and The University of Chicago | 3 | * (C) 2001 Clemson University and The University of Chicago |
| 4 | * Copyright 2018 Omnibond Systems, L.L.C. | ||
| 4 | * | 5 | * |
| 5 | * See COPYING in top-level directory. | 6 | * See COPYING in top-level directory. |
| 6 | */ | 7 | */ |
| @@ -135,51 +136,37 @@ static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs) | |||
| 135 | * NOTE: in kernel land, we never use the sys_attr->link_target for | 136 | * NOTE: in kernel land, we never use the sys_attr->link_target for |
| 136 | * anything, so don't bother copying it into the sys_attr object here. | 137 | * anything, so don't bother copying it into the sys_attr object here. |
| 137 | */ | 138 | */ |
| 138 | static inline int copy_attributes_from_inode(struct inode *inode, | 139 | static inline void copy_attributes_from_inode(struct inode *inode, |
| 139 | struct ORANGEFS_sys_attr_s *attrs, | 140 | struct ORANGEFS_sys_attr_s *attrs) |
| 140 | struct iattr *iattr) | ||
| 141 | { | 141 | { |
| 142 | umode_t tmp_mode; | 142 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 143 | |||
| 144 | if (!iattr || !inode || !attrs) { | ||
| 145 | gossip_err("NULL iattr (%p), inode (%p), attrs (%p) " | ||
| 146 | "in copy_attributes_from_inode!\n", | ||
| 147 | iattr, | ||
| 148 | inode, | ||
| 149 | attrs); | ||
| 150 | return -EINVAL; | ||
| 151 | } | ||
| 152 | /* | ||
| 153 | * We need to be careful to only copy the attributes out of the | ||
| 154 | * iattr object that we know are valid. | ||
| 155 | */ | ||
| 156 | attrs->mask = 0; | 143 | attrs->mask = 0; |
| 157 | if (iattr->ia_valid & ATTR_UID) { | 144 | if (orangefs_inode->attr_valid & ATTR_UID) { |
| 158 | attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid); | 145 | attrs->owner = from_kuid(&init_user_ns, inode->i_uid); |
| 159 | attrs->mask |= ORANGEFS_ATTR_SYS_UID; | 146 | attrs->mask |= ORANGEFS_ATTR_SYS_UID; |
| 160 | gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner); | 147 | gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner); |
| 161 | } | 148 | } |
| 162 | if (iattr->ia_valid & ATTR_GID) { | 149 | if (orangefs_inode->attr_valid & ATTR_GID) { |
| 163 | attrs->group = from_kgid(&init_user_ns, iattr->ia_gid); | 150 | attrs->group = from_kgid(&init_user_ns, inode->i_gid); |
| 164 | attrs->mask |= ORANGEFS_ATTR_SYS_GID; | 151 | attrs->mask |= ORANGEFS_ATTR_SYS_GID; |
| 165 | gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group); | 152 | gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group); |
| 166 | } | 153 | } |
| 167 | 154 | ||
| 168 | if (iattr->ia_valid & ATTR_ATIME) { | 155 | if (orangefs_inode->attr_valid & ATTR_ATIME) { |
| 169 | attrs->mask |= ORANGEFS_ATTR_SYS_ATIME; | 156 | attrs->mask |= ORANGEFS_ATTR_SYS_ATIME; |
| 170 | if (iattr->ia_valid & ATTR_ATIME_SET) { | 157 | if (orangefs_inode->attr_valid & ATTR_ATIME_SET) { |
| 171 | attrs->atime = (time64_t)iattr->ia_atime.tv_sec; | 158 | attrs->atime = (time64_t)inode->i_atime.tv_sec; |
| 172 | attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET; | 159 | attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET; |
| 173 | } | 160 | } |
| 174 | } | 161 | } |
| 175 | if (iattr->ia_valid & ATTR_MTIME) { | 162 | if (orangefs_inode->attr_valid & ATTR_MTIME) { |
| 176 | attrs->mask |= ORANGEFS_ATTR_SYS_MTIME; | 163 | attrs->mask |= ORANGEFS_ATTR_SYS_MTIME; |
| 177 | if (iattr->ia_valid & ATTR_MTIME_SET) { | 164 | if (orangefs_inode->attr_valid & ATTR_MTIME_SET) { |
| 178 | attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec; | 165 | attrs->mtime = (time64_t)inode->i_mtime.tv_sec; |
| 179 | attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET; | 166 | attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET; |
| 180 | } | 167 | } |
| 181 | } | 168 | } |
| 182 | if (iattr->ia_valid & ATTR_CTIME) | 169 | if (orangefs_inode->attr_valid & ATTR_CTIME) |
| 183 | attrs->mask |= ORANGEFS_ATTR_SYS_CTIME; | 170 | attrs->mask |= ORANGEFS_ATTR_SYS_CTIME; |
| 184 | 171 | ||
| 185 | /* | 172 | /* |
| @@ -188,36 +175,10 @@ static inline int copy_attributes_from_inode(struct inode *inode, | |||
| 188 | * worry about ATTR_SIZE | 175 | * worry about ATTR_SIZE |
| 189 | */ | 176 | */ |
| 190 | 177 | ||
| 191 | if (iattr->ia_valid & ATTR_MODE) { | 178 | if (orangefs_inode->attr_valid & ATTR_MODE) { |
| 192 | tmp_mode = iattr->ia_mode; | 179 | attrs->perms = ORANGEFS_util_translate_mode(inode->i_mode); |
| 193 | if (tmp_mode & (S_ISVTX)) { | ||
| 194 | if (is_root_handle(inode)) { | ||
| 195 | /* | ||
| 196 | * allow sticky bit to be set on root (since | ||
| 197 | * it shows up that way by default anyhow), | ||
| 198 | * but don't show it to the server | ||
| 199 | */ | ||
| 200 | tmp_mode -= S_ISVTX; | ||
| 201 | } else { | ||
| 202 | gossip_debug(GOSSIP_UTILS_DEBUG, | ||
| 203 | "%s: setting sticky bit not supported.\n", | ||
| 204 | __func__); | ||
| 205 | return -EINVAL; | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | if (tmp_mode & (S_ISUID)) { | ||
| 210 | gossip_debug(GOSSIP_UTILS_DEBUG, | ||
| 211 | "%s: setting setuid bit not supported.\n", | ||
| 212 | __func__); | ||
| 213 | return -EINVAL; | ||
| 214 | } | ||
| 215 | |||
| 216 | attrs->perms = ORANGEFS_util_translate_mode(tmp_mode); | ||
| 217 | attrs->mask |= ORANGEFS_ATTR_SYS_PERM; | 180 | attrs->mask |= ORANGEFS_ATTR_SYS_PERM; |
| 218 | } | 181 | } |
| 219 | |||
| 220 | return 0; | ||
| 221 | } | 182 | } |
| 222 | 183 | ||
| 223 | static int orangefs_inode_type(enum orangefs_ds_type objtype) | 184 | static int orangefs_inode_type(enum orangefs_ds_type objtype) |
| @@ -272,27 +233,30 @@ static int orangefs_inode_is_stale(struct inode *inode, | |||
| 272 | return 0; | 233 | return 0; |
| 273 | } | 234 | } |
| 274 | 235 | ||
| 275 | int orangefs_inode_getattr(struct inode *inode, int new, int bypass, | 236 | int orangefs_inode_getattr(struct inode *inode, int flags) |
| 276 | u32 request_mask) | ||
| 277 | { | 237 | { |
| 278 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 238 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 279 | struct orangefs_kernel_op_s *new_op; | 239 | struct orangefs_kernel_op_s *new_op; |
| 280 | loff_t inode_size; | 240 | loff_t inode_size; |
| 281 | int ret, type; | 241 | int ret, type; |
| 282 | 242 | ||
| 283 | gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, | 243 | gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU flags %d\n", |
| 284 | get_khandle_from_ino(inode)); | 244 | __func__, get_khandle_from_ino(inode), flags); |
| 285 | 245 | ||
| 286 | if (!new && !bypass) { | 246 | again: |
| 287 | /* | 247 | spin_lock(&inode->i_lock); |
| 288 | * Must have all the attributes in the mask and be within cache | 248 | /* Must have all the attributes in the mask and be within cache time. */ |
| 289 | * time. | 249 | if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) || |
| 290 | */ | 250 | orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) { |
| 291 | if ((request_mask & orangefs_inode->getattr_mask) == | 251 | if (orangefs_inode->attr_valid) { |
| 292 | request_mask && | 252 | spin_unlock(&inode->i_lock); |
| 293 | time_before(jiffies, orangefs_inode->getattr_time)) | 253 | write_inode_now(inode, 1); |
| 294 | return 0; | 254 | goto again; |
| 255 | } | ||
| 256 | spin_unlock(&inode->i_lock); | ||
| 257 | return 0; | ||
| 295 | } | 258 | } |
| 259 | spin_unlock(&inode->i_lock); | ||
| 296 | 260 | ||
| 297 | new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); | 261 | new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); |
| 298 | if (!new_op) | 262 | if (!new_op) |
| @@ -302,7 +266,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, | |||
| 302 | * Size is the hardest attribute to get. The incremental cost of any | 266 | * Size is the hardest attribute to get. The incremental cost of any |
| 303 | * other attribute is essentially zero. | 267 | * other attribute is essentially zero. |
| 304 | */ | 268 | */ |
| 305 | if (request_mask & STATX_SIZE || new) | 269 | if (flags) |
| 306 | new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT; | 270 | new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT; |
| 307 | else | 271 | else |
| 308 | new_op->upcall.req.getattr.mask = | 272 | new_op->upcall.req.getattr.mask = |
| @@ -313,13 +277,33 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, | |||
| 313 | if (ret != 0) | 277 | if (ret != 0) |
| 314 | goto out; | 278 | goto out; |
| 315 | 279 | ||
| 316 | if (!new) { | 280 | again2: |
| 281 | spin_lock(&inode->i_lock); | ||
| 282 | /* Must have all the attributes in the mask and be within cache time. */ | ||
| 283 | if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) || | ||
| 284 | orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) { | ||
| 285 | if (orangefs_inode->attr_valid) { | ||
| 286 | spin_unlock(&inode->i_lock); | ||
| 287 | write_inode_now(inode, 1); | ||
| 288 | goto again2; | ||
| 289 | } | ||
| 290 | if (inode->i_state & I_DIRTY_PAGES) { | ||
| 291 | ret = 0; | ||
| 292 | goto out_unlock; | ||
| 293 | } | ||
| 294 | gossip_debug(GOSSIP_UTILS_DEBUG, "%s: in cache or dirty\n", | ||
| 295 | __func__); | ||
| 296 | ret = 0; | ||
| 297 | goto out_unlock; | ||
| 298 | } | ||
| 299 | |||
| 300 | if (!(flags & ORANGEFS_GETATTR_NEW)) { | ||
| 317 | ret = orangefs_inode_is_stale(inode, | 301 | ret = orangefs_inode_is_stale(inode, |
| 318 | &new_op->downcall.resp.getattr.attributes, | 302 | &new_op->downcall.resp.getattr.attributes, |
| 319 | new_op->downcall.resp.getattr.link_target); | 303 | new_op->downcall.resp.getattr.link_target); |
| 320 | if (ret) { | 304 | if (ret) { |
| 321 | ret = -ESTALE; | 305 | ret = -ESTALE; |
| 322 | goto out; | 306 | goto out_unlock; |
| 323 | } | 307 | } |
| 324 | } | 308 | } |
| 325 | 309 | ||
| @@ -329,30 +313,26 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, | |||
| 329 | case S_IFREG: | 313 | case S_IFREG: |
| 330 | inode->i_flags = orangefs_inode_flags(&new_op-> | 314 | inode->i_flags = orangefs_inode_flags(&new_op-> |
| 331 | downcall.resp.getattr.attributes); | 315 | downcall.resp.getattr.attributes); |
| 332 | if (request_mask & STATX_SIZE || new) { | 316 | if (flags) { |
| 333 | inode_size = (loff_t)new_op-> | 317 | inode_size = (loff_t)new_op-> |
| 334 | downcall.resp.getattr.attributes.size; | 318 | downcall.resp.getattr.attributes.size; |
| 335 | inode->i_size = inode_size; | 319 | inode->i_size = inode_size; |
| 336 | inode->i_blkbits = ffs(new_op->downcall.resp.getattr. | 320 | inode->i_blkbits = ffs(new_op->downcall.resp.getattr. |
| 337 | attributes.blksize); | 321 | attributes.blksize); |
| 338 | spin_lock(&inode->i_lock); | ||
| 339 | inode->i_bytes = inode_size; | 322 | inode->i_bytes = inode_size; |
| 340 | inode->i_blocks = | 323 | inode->i_blocks = |
| 341 | (inode_size + 512 - inode_size % 512)/512; | 324 | (inode_size + 512 - inode_size % 512)/512; |
| 342 | spin_unlock(&inode->i_lock); | ||
| 343 | } | 325 | } |
| 344 | break; | 326 | break; |
| 345 | case S_IFDIR: | 327 | case S_IFDIR: |
| 346 | if (request_mask & STATX_SIZE || new) { | 328 | if (flags) { |
| 347 | inode->i_size = PAGE_SIZE; | 329 | inode->i_size = PAGE_SIZE; |
| 348 | spin_lock(&inode->i_lock); | ||
| 349 | inode_set_bytes(inode, inode->i_size); | 330 | inode_set_bytes(inode, inode->i_size); |
| 350 | spin_unlock(&inode->i_lock); | ||
| 351 | } | 331 | } |
| 352 | set_nlink(inode, 1); | 332 | set_nlink(inode, 1); |
| 353 | break; | 333 | break; |
| 354 | case S_IFLNK: | 334 | case S_IFLNK: |
| 355 | if (new) { | 335 | if (flags & ORANGEFS_GETATTR_NEW) { |
| 356 | inode->i_size = (loff_t)strlen(new_op-> | 336 | inode->i_size = (loff_t)strlen(new_op-> |
| 357 | downcall.resp.getattr.link_target); | 337 | downcall.resp.getattr.link_target); |
| 358 | ret = strscpy(orangefs_inode->link_target, | 338 | ret = strscpy(orangefs_inode->link_target, |
| @@ -360,7 +340,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, | |||
| 360 | ORANGEFS_NAME_MAX); | 340 | ORANGEFS_NAME_MAX); |
| 361 | if (ret == -E2BIG) { | 341 | if (ret == -E2BIG) { |
| 362 | ret = -EIO; | 342 | ret = -EIO; |
| 363 | goto out; | 343 | goto out_unlock; |
| 364 | } | 344 | } |
| 365 | inode->i_link = orangefs_inode->link_target; | 345 | inode->i_link = orangefs_inode->link_target; |
| 366 | } | 346 | } |
| @@ -370,7 +350,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, | |||
| 370 | /* XXX: ESTALE? This is what is done if it is not new. */ | 350 | /* XXX: ESTALE? This is what is done if it is not new. */ |
| 371 | orangefs_make_bad_inode(inode); | 351 | orangefs_make_bad_inode(inode); |
| 372 | ret = -ESTALE; | 352 | ret = -ESTALE; |
| 373 | goto out; | 353 | goto out_unlock; |
| 374 | } | 354 | } |
| 375 | 355 | ||
| 376 | inode->i_uid = make_kuid(&init_user_ns, new_op-> | 356 | inode->i_uid = make_kuid(&init_user_ns, new_op-> |
| @@ -393,11 +373,9 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, | |||
| 393 | 373 | ||
| 394 | orangefs_inode->getattr_time = jiffies + | 374 | orangefs_inode->getattr_time = jiffies + |
| 395 | orangefs_getattr_timeout_msecs*HZ/1000; | 375 | orangefs_getattr_timeout_msecs*HZ/1000; |
| 396 | if (request_mask & STATX_SIZE || new) | ||
| 397 | orangefs_inode->getattr_mask = STATX_BASIC_STATS; | ||
| 398 | else | ||
| 399 | orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE; | ||
| 400 | ret = 0; | 376 | ret = 0; |
| 377 | out_unlock: | ||
| 378 | spin_unlock(&inode->i_lock); | ||
| 401 | out: | 379 | out: |
| 402 | op_release(new_op); | 380 | op_release(new_op); |
| 403 | return ret; | 381 | return ret; |
| @@ -436,7 +414,7 @@ out: | |||
| 436 | * issues a orangefs setattr request to make sure the new attribute values | 414 | * issues a orangefs setattr request to make sure the new attribute values |
| 437 | * take effect if successful. returns 0 on success; -errno otherwise | 415 | * take effect if successful. returns 0 on success; -errno otherwise |
| 438 | */ | 416 | */ |
| 439 | int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) | 417 | int orangefs_inode_setattr(struct inode *inode) |
| 440 | { | 418 | { |
| 441 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 419 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 442 | struct orangefs_kernel_op_s *new_op; | 420 | struct orangefs_kernel_op_s *new_op; |
| @@ -446,24 +424,31 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) | |||
| 446 | if (!new_op) | 424 | if (!new_op) |
| 447 | return -ENOMEM; | 425 | return -ENOMEM; |
| 448 | 426 | ||
| 427 | spin_lock(&inode->i_lock); | ||
| 428 | new_op->upcall.uid = from_kuid(&init_user_ns, orangefs_inode->attr_uid); | ||
| 429 | new_op->upcall.gid = from_kgid(&init_user_ns, orangefs_inode->attr_gid); | ||
| 449 | new_op->upcall.req.setattr.refn = orangefs_inode->refn; | 430 | new_op->upcall.req.setattr.refn = orangefs_inode->refn; |
| 450 | ret = copy_attributes_from_inode(inode, | 431 | copy_attributes_from_inode(inode, |
| 451 | &new_op->upcall.req.setattr.attributes, | 432 | &new_op->upcall.req.setattr.attributes); |
| 452 | iattr); | 433 | orangefs_inode->attr_valid = 0; |
| 453 | if (ret >= 0) { | 434 | if (!new_op->upcall.req.setattr.attributes.mask) { |
| 454 | ret = service_operation(new_op, __func__, | 435 | spin_unlock(&inode->i_lock); |
| 455 | get_interruptible_flag(inode)); | 436 | op_release(new_op); |
| 456 | 437 | return 0; | |
| 457 | gossip_debug(GOSSIP_UTILS_DEBUG, | ||
| 458 | "orangefs_inode_setattr: returning %d\n", | ||
| 459 | ret); | ||
| 460 | } | 438 | } |
| 439 | spin_unlock(&inode->i_lock); | ||
| 440 | |||
| 441 | ret = service_operation(new_op, __func__, | ||
| 442 | get_interruptible_flag(inode) | ORANGEFS_OP_WRITEBACK); | ||
| 443 | gossip_debug(GOSSIP_UTILS_DEBUG, | ||
| 444 | "orangefs_inode_setattr: returning %d\n", ret); | ||
| 445 | if (ret) | ||
| 446 | orangefs_make_bad_inode(inode); | ||
| 461 | 447 | ||
| 462 | op_release(new_op); | 448 | op_release(new_op); |
| 463 | 449 | ||
| 464 | if (ret == 0) | 450 | if (ret == 0) |
| 465 | orangefs_inode->getattr_time = jiffies - 1; | 451 | orangefs_inode->getattr_time = jiffies - 1; |
| 466 | |||
| 467 | return ret; | 452 | return ret; |
| 468 | } | 453 | } |
| 469 | 454 | ||
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 3784f7e8b603..ee5efdc35cc1 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "orangefs-bufmap.h" | 10 | #include "orangefs-bufmap.h" |
| 11 | 11 | ||
| 12 | #include <linux/parser.h> | 12 | #include <linux/parser.h> |
| 13 | #include <linux/hashtable.h> | ||
| 13 | 14 | ||
| 14 | /* a cache for orangefs-inode objects (i.e. orangefs inode private data) */ | 15 | /* a cache for orangefs-inode objects (i.e. orangefs inode private data) */ |
| 15 | static struct kmem_cache *orangefs_inode_cache; | 16 | static struct kmem_cache *orangefs_inode_cache; |
| @@ -126,7 +127,17 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb) | |||
| 126 | 127 | ||
| 127 | static void orangefs_free_inode(struct inode *inode) | 128 | static void orangefs_free_inode(struct inode *inode) |
| 128 | { | 129 | { |
| 129 | kmem_cache_free(orangefs_inode_cache, ORANGEFS_I(inode)); | 130 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 131 | struct orangefs_cached_xattr *cx; | ||
| 132 | struct hlist_node *tmp; | ||
| 133 | int i; | ||
| 134 | |||
| 135 | hash_for_each_safe(orangefs_inode->xattr_cache, i, tmp, cx, node) { | ||
| 136 | hlist_del(&cx->node); | ||
| 137 | kfree(cx); | ||
| 138 | } | ||
| 139 | |||
| 140 | kmem_cache_free(orangefs_inode_cache, orangefs_inode); | ||
| 130 | } | 141 | } |
| 131 | 142 | ||
| 132 | static void orangefs_destroy_inode(struct inode *inode) | 143 | static void orangefs_destroy_inode(struct inode *inode) |
| @@ -138,6 +149,13 @@ static void orangefs_destroy_inode(struct inode *inode) | |||
| 138 | __func__, orangefs_inode, get_khandle_from_ino(inode)); | 149 | __func__, orangefs_inode, get_khandle_from_ino(inode)); |
| 139 | } | 150 | } |
| 140 | 151 | ||
| 152 | static int orangefs_write_inode(struct inode *inode, | ||
| 153 | struct writeback_control *wbc) | ||
| 154 | { | ||
| 155 | gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_write_inode\n"); | ||
| 156 | return orangefs_inode_setattr(inode); | ||
| 157 | } | ||
| 158 | |||
| 141 | /* | 159 | /* |
| 142 | * NOTE: information filled in here is typically reflected in the | 160 | * NOTE: information filled in here is typically reflected in the |
| 143 | * output of the system command 'df' | 161 | * output of the system command 'df' |
| @@ -297,6 +315,7 @@ static const struct super_operations orangefs_s_ops = { | |||
| 297 | .alloc_inode = orangefs_alloc_inode, | 315 | .alloc_inode = orangefs_alloc_inode, |
| 298 | .free_inode = orangefs_free_inode, | 316 | .free_inode = orangefs_free_inode, |
| 299 | .destroy_inode = orangefs_destroy_inode, | 317 | .destroy_inode = orangefs_destroy_inode, |
| 318 | .write_inode = orangefs_write_inode, | ||
| 300 | .drop_inode = generic_delete_inode, | 319 | .drop_inode = generic_delete_inode, |
| 301 | .statfs = orangefs_statfs, | 320 | .statfs = orangefs_statfs, |
| 302 | .remount_fs = orangefs_remount_fs, | 321 | .remount_fs = orangefs_remount_fs, |
| @@ -394,15 +413,11 @@ static int orangefs_fill_sb(struct super_block *sb, | |||
| 394 | struct orangefs_fs_mount_response *fs_mount, | 413 | struct orangefs_fs_mount_response *fs_mount, |
| 395 | void *data, int silent) | 414 | void *data, int silent) |
| 396 | { | 415 | { |
| 397 | int ret = -EINVAL; | 416 | int ret; |
| 398 | struct inode *root = NULL; | 417 | struct inode *root; |
| 399 | struct dentry *root_dentry = NULL; | 418 | struct dentry *root_dentry; |
| 400 | struct orangefs_object_kref root_object; | 419 | struct orangefs_object_kref root_object; |
| 401 | 420 | ||
| 402 | /* alloc and init our private orangefs sb info */ | ||
| 403 | sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL); | ||
| 404 | if (!ORANGEFS_SB(sb)) | ||
| 405 | return -ENOMEM; | ||
| 406 | ORANGEFS_SB(sb)->sb = sb; | 421 | ORANGEFS_SB(sb)->sb = sb; |
| 407 | 422 | ||
| 408 | ORANGEFS_SB(sb)->root_khandle = fs_mount->root_khandle; | 423 | ORANGEFS_SB(sb)->root_khandle = fs_mount->root_khandle; |
| @@ -425,6 +440,10 @@ static int orangefs_fill_sb(struct super_block *sb, | |||
| 425 | sb->s_blocksize_bits = PAGE_SHIFT; | 440 | sb->s_blocksize_bits = PAGE_SHIFT; |
| 426 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 441 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
| 427 | 442 | ||
| 443 | ret = super_setup_bdi(sb); | ||
| 444 | if (ret) | ||
| 445 | return ret; | ||
| 446 | |||
| 428 | root_object.khandle = ORANGEFS_SB(sb)->root_khandle; | 447 | root_object.khandle = ORANGEFS_SB(sb)->root_khandle; |
| 429 | root_object.fs_id = ORANGEFS_SB(sb)->fs_id; | 448 | root_object.fs_id = ORANGEFS_SB(sb)->fs_id; |
| 430 | gossip_debug(GOSSIP_SUPER_DEBUG, | 449 | gossip_debug(GOSSIP_SUPER_DEBUG, |
| @@ -503,6 +522,13 @@ struct dentry *orangefs_mount(struct file_system_type *fst, | |||
| 503 | goto free_op; | 522 | goto free_op; |
| 504 | } | 523 | } |
| 505 | 524 | ||
| 525 | /* alloc and init our private orangefs sb info */ | ||
| 526 | sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL); | ||
| 527 | if (!ORANGEFS_SB(sb)) { | ||
| 528 | d = ERR_PTR(-ENOMEM); | ||
| 529 | goto free_op; | ||
| 530 | } | ||
| 531 | |||
| 506 | ret = orangefs_fill_sb(sb, | 532 | ret = orangefs_fill_sb(sb, |
| 507 | &new_op->downcall.resp.fs_mount, data, | 533 | &new_op->downcall.resp.fs_mount, data, |
| 508 | flags & SB_SILENT ? 1 : 0); | 534 | flags & SB_SILENT ? 1 : 0); |
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 0729d2645d6a..beafc33d57be 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | 19 | ||
| 20 | static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, | 20 | static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, |
| 21 | long timeout, | 21 | long timeout, |
| 22 | bool interruptible) | 22 | int flags) |
| 23 | __acquires(op->lock); | 23 | __acquires(op->lock); |
| 24 | static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) | 24 | static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) |
| 25 | __releases(op->lock); | 25 | __releases(op->lock); |
| @@ -143,9 +143,7 @@ retry_servicing: | |||
| 143 | if (!(flags & ORANGEFS_OP_NO_MUTEX)) | 143 | if (!(flags & ORANGEFS_OP_NO_MUTEX)) |
| 144 | mutex_unlock(&orangefs_request_mutex); | 144 | mutex_unlock(&orangefs_request_mutex); |
| 145 | 145 | ||
| 146 | ret = wait_for_matching_downcall(op, timeout, | 146 | ret = wait_for_matching_downcall(op, timeout, flags); |
| 147 | flags & ORANGEFS_OP_INTERRUPTIBLE); | ||
| 148 | |||
| 149 | gossip_debug(GOSSIP_WAIT_DEBUG, | 147 | gossip_debug(GOSSIP_WAIT_DEBUG, |
| 150 | "%s: wait_for_matching_downcall returned %d for %p\n", | 148 | "%s: wait_for_matching_downcall returned %d for %p\n", |
| 151 | __func__, | 149 | __func__, |
| @@ -319,10 +317,12 @@ static void | |||
| 319 | */ | 317 | */ |
| 320 | static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, | 318 | static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, |
| 321 | long timeout, | 319 | long timeout, |
| 322 | bool interruptible) | 320 | int flags) |
| 323 | __acquires(op->lock) | 321 | __acquires(op->lock) |
| 324 | { | 322 | { |
| 325 | long n; | 323 | long n; |
| 324 | int writeback = flags & ORANGEFS_OP_WRITEBACK, | ||
| 325 | interruptible = flags & ORANGEFS_OP_INTERRUPTIBLE; | ||
| 326 | 326 | ||
| 327 | /* | 327 | /* |
| 328 | * There's a "schedule_timeout" inside of these wait | 328 | * There's a "schedule_timeout" inside of these wait |
| @@ -330,10 +330,12 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, | |||
| 330 | * user process that needs something done and is being | 330 | * user process that needs something done and is being |
| 331 | * manipulated by the client-core process. | 331 | * manipulated by the client-core process. |
| 332 | */ | 332 | */ |
| 333 | if (interruptible) | 333 | if (writeback) |
| 334 | n = wait_for_completion_io_timeout(&op->waitq, timeout); | ||
| 335 | else if (!writeback && interruptible) | ||
| 334 | n = wait_for_completion_interruptible_timeout(&op->waitq, | 336 | n = wait_for_completion_interruptible_timeout(&op->waitq, |
| 335 | timeout); | 337 | timeout); |
| 336 | else | 338 | else /* !writeback && !interruptible but compiler complains */ |
| 337 | n = wait_for_completion_killable_timeout(&op->waitq, timeout); | 339 | n = wait_for_completion_killable_timeout(&op->waitq, timeout); |
| 338 | 340 | ||
| 339 | spin_lock(&op->lock); | 341 | spin_lock(&op->lock); |
diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 03bcb871544d..bdc285aea360 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* | 2 | /* |
| 3 | * (C) 2001 Clemson University and The University of Chicago | 3 | * (C) 2001 Clemson University and The University of Chicago |
| 4 | * Copyright 2018 Omnibond Systems, L.L.C. | ||
| 4 | * | 5 | * |
| 5 | * See COPYING in top-level directory. | 6 | * See COPYING in top-level directory. |
| 6 | */ | 7 | */ |
| @@ -14,7 +15,7 @@ | |||
| 14 | #include "orangefs-bufmap.h" | 15 | #include "orangefs-bufmap.h" |
| 15 | #include <linux/posix_acl_xattr.h> | 16 | #include <linux/posix_acl_xattr.h> |
| 16 | #include <linux/xattr.h> | 17 | #include <linux/xattr.h> |
| 17 | 18 | #include <linux/hashtable.h> | |
| 18 | 19 | ||
| 19 | #define SYSTEM_ORANGEFS_KEY "system.pvfs2." | 20 | #define SYSTEM_ORANGEFS_KEY "system.pvfs2." |
| 20 | #define SYSTEM_ORANGEFS_KEY_LEN 13 | 21 | #define SYSTEM_ORANGEFS_KEY_LEN 13 |
| @@ -50,6 +51,35 @@ static inline int convert_to_internal_xattr_flags(int setxattr_flags) | |||
| 50 | return internal_flag; | 51 | return internal_flag; |
| 51 | } | 52 | } |
| 52 | 53 | ||
| 54 | static unsigned int xattr_key(const char *key) | ||
| 55 | { | ||
| 56 | unsigned int i = 0; | ||
| 57 | while (key) | ||
| 58 | i += *key++; | ||
| 59 | return i % 16; | ||
| 60 | } | ||
| 61 | |||
| 62 | static struct orangefs_cached_xattr *find_cached_xattr(struct inode *inode, | ||
| 63 | const char *key) | ||
| 64 | { | ||
| 65 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | ||
| 66 | struct orangefs_cached_xattr *cx; | ||
| 67 | struct hlist_head *h; | ||
| 68 | struct hlist_node *tmp; | ||
| 69 | h = &orangefs_inode->xattr_cache[xattr_key(key)]; | ||
| 70 | if (hlist_empty(h)) | ||
| 71 | return NULL; | ||
| 72 | hlist_for_each_entry_safe(cx, tmp, h, node) { | ||
| 73 | /* if (!time_before(jiffies, cx->timeout)) { | ||
| 74 | hlist_del(&cx->node); | ||
| 75 | kfree(cx); | ||
| 76 | continue; | ||
| 77 | }*/ | ||
| 78 | if (!strcmp(cx->key, key)) | ||
| 79 | return cx; | ||
| 80 | } | ||
| 81 | return NULL; | ||
| 82 | } | ||
| 53 | 83 | ||
| 54 | /* | 84 | /* |
| 55 | * Tries to get a specified key's attributes of a given | 85 | * Tries to get a specified key's attributes of a given |
| @@ -65,6 +95,7 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, | |||
| 65 | { | 95 | { |
| 66 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 96 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 67 | struct orangefs_kernel_op_s *new_op = NULL; | 97 | struct orangefs_kernel_op_s *new_op = NULL; |
| 98 | struct orangefs_cached_xattr *cx; | ||
| 68 | ssize_t ret = -ENOMEM; | 99 | ssize_t ret = -ENOMEM; |
| 69 | ssize_t length = 0; | 100 | ssize_t length = 0; |
| 70 | int fsuid; | 101 | int fsuid; |
| @@ -93,6 +124,27 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, | |||
| 93 | 124 | ||
| 94 | down_read(&orangefs_inode->xattr_sem); | 125 | down_read(&orangefs_inode->xattr_sem); |
| 95 | 126 | ||
| 127 | cx = find_cached_xattr(inode, name); | ||
| 128 | if (cx && time_before(jiffies, cx->timeout)) { | ||
| 129 | if (cx->length == -1) { | ||
| 130 | ret = -ENODATA; | ||
| 131 | goto out_unlock; | ||
| 132 | } else { | ||
| 133 | if (size == 0) { | ||
| 134 | ret = cx->length; | ||
| 135 | goto out_unlock; | ||
| 136 | } | ||
| 137 | if (cx->length > size) { | ||
| 138 | ret = -ERANGE; | ||
| 139 | goto out_unlock; | ||
| 140 | } | ||
| 141 | memcpy(buffer, cx->val, cx->length); | ||
| 142 | memset(buffer + cx->length, 0, size - cx->length); | ||
| 143 | ret = cx->length; | ||
| 144 | goto out_unlock; | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 96 | new_op = op_alloc(ORANGEFS_VFS_OP_GETXATTR); | 148 | new_op = op_alloc(ORANGEFS_VFS_OP_GETXATTR); |
| 97 | if (!new_op) | 149 | if (!new_op) |
| 98 | goto out_unlock; | 150 | goto out_unlock; |
| @@ -117,6 +169,15 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, | |||
| 117 | " does not exist!\n", | 169 | " does not exist!\n", |
| 118 | get_khandle_from_ino(inode), | 170 | get_khandle_from_ino(inode), |
| 119 | (char *)new_op->upcall.req.getxattr.key); | 171 | (char *)new_op->upcall.req.getxattr.key); |
| 172 | cx = kmalloc(sizeof *cx, GFP_KERNEL); | ||
| 173 | if (cx) { | ||
| 174 | strcpy(cx->key, name); | ||
| 175 | cx->length = -1; | ||
| 176 | cx->timeout = jiffies + | ||
| 177 | orangefs_getattr_timeout_msecs*HZ/1000; | ||
| 178 | hash_add(orangefs_inode->xattr_cache, &cx->node, | ||
| 179 | xattr_key(cx->key)); | ||
| 180 | } | ||
| 120 | } | 181 | } |
| 121 | goto out_release_op; | 182 | goto out_release_op; |
| 122 | } | 183 | } |
| @@ -156,6 +217,23 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, | |||
| 156 | 217 | ||
| 157 | ret = length; | 218 | ret = length; |
| 158 | 219 | ||
| 220 | if (cx) { | ||
| 221 | strcpy(cx->key, name); | ||
| 222 | memcpy(cx->val, buffer, length); | ||
| 223 | cx->length = length; | ||
| 224 | cx->timeout = jiffies + HZ; | ||
| 225 | } else { | ||
| 226 | cx = kmalloc(sizeof *cx, GFP_KERNEL); | ||
| 227 | if (cx) { | ||
| 228 | strcpy(cx->key, name); | ||
| 229 | memcpy(cx->val, buffer, length); | ||
| 230 | cx->length = length; | ||
| 231 | cx->timeout = jiffies + HZ; | ||
| 232 | hash_add(orangefs_inode->xattr_cache, &cx->node, | ||
| 233 | xattr_key(cx->key)); | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 159 | out_release_op: | 237 | out_release_op: |
| 160 | op_release(new_op); | 238 | op_release(new_op); |
| 161 | out_unlock: | 239 | out_unlock: |
| @@ -168,6 +246,9 @@ static int orangefs_inode_removexattr(struct inode *inode, const char *name, | |||
| 168 | { | 246 | { |
| 169 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 247 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 170 | struct orangefs_kernel_op_s *new_op = NULL; | 248 | struct orangefs_kernel_op_s *new_op = NULL; |
| 249 | struct orangefs_cached_xattr *cx; | ||
| 250 | struct hlist_head *h; | ||
| 251 | struct hlist_node *tmp; | ||
| 171 | int ret = -ENOMEM; | 252 | int ret = -ENOMEM; |
| 172 | 253 | ||
| 173 | if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) | 254 | if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) |
| @@ -209,6 +290,16 @@ static int orangefs_inode_removexattr(struct inode *inode, const char *name, | |||
| 209 | "orangefs_inode_removexattr: returning %d\n", ret); | 290 | "orangefs_inode_removexattr: returning %d\n", ret); |
| 210 | 291 | ||
| 211 | op_release(new_op); | 292 | op_release(new_op); |
| 293 | |||
| 294 | h = &orangefs_inode->xattr_cache[xattr_key(name)]; | ||
| 295 | hlist_for_each_entry_safe(cx, tmp, h, node) { | ||
| 296 | if (!strcmp(cx->key, name)) { | ||
| 297 | hlist_del(&cx->node); | ||
| 298 | kfree(cx); | ||
| 299 | break; | ||
| 300 | } | ||
| 301 | } | ||
| 302 | |||
| 212 | out_unlock: | 303 | out_unlock: |
| 213 | up_write(&orangefs_inode->xattr_sem); | 304 | up_write(&orangefs_inode->xattr_sem); |
| 214 | return ret; | 305 | return ret; |
| @@ -226,6 +317,9 @@ int orangefs_inode_setxattr(struct inode *inode, const char *name, | |||
| 226 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); | 317 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 227 | struct orangefs_kernel_op_s *new_op; | 318 | struct orangefs_kernel_op_s *new_op; |
| 228 | int internal_flag = 0; | 319 | int internal_flag = 0; |
| 320 | struct orangefs_cached_xattr *cx; | ||
| 321 | struct hlist_head *h; | ||
| 322 | struct hlist_node *tmp; | ||
| 229 | int ret = -ENOMEM; | 323 | int ret = -ENOMEM; |
| 230 | 324 | ||
| 231 | gossip_debug(GOSSIP_XATTR_DEBUG, | 325 | gossip_debug(GOSSIP_XATTR_DEBUG, |
| @@ -287,6 +381,16 @@ int orangefs_inode_setxattr(struct inode *inode, const char *name, | |||
| 287 | 381 | ||
| 288 | /* when request is serviced properly, free req op struct */ | 382 | /* when request is serviced properly, free req op struct */ |
| 289 | op_release(new_op); | 383 | op_release(new_op); |
| 384 | |||
| 385 | h = &orangefs_inode->xattr_cache[xattr_key(name)]; | ||
| 386 | hlist_for_each_entry_safe(cx, tmp, h, node) { | ||
| 387 | if (!strcmp(cx->key, name)) { | ||
| 388 | hlist_del(&cx->node); | ||
| 389 | kfree(cx); | ||
| 390 | break; | ||
| 391 | } | ||
| 392 | } | ||
| 393 | |||
| 290 | out_unlock: | 394 | out_unlock: |
| 291 | up_write(&orangefs_inode->xattr_sem); | 395 | up_write(&orangefs_inode->xattr_sem); |
| 292 | return ret; | 396 | return ret; |
