aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Marshall <hubcap@omnibond.com>2015-07-17 10:38:15 -0400
committerMike Marshall <hubcap@omnibond.com>2015-10-03 11:39:57 -0400
commit1182fca3bc00441d5b2dee2f0548a3b7f978f9e7 (patch)
treee33ca6e48fdbb2e64671b0c7bfc4a230868bb51b
parentf7be4ee07fb72a516563bc2870ef41fa589a964a (diff)
Orangefs: kernel client part 5
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
-rw-r--r--fs/orangefs/super.c558
-rw-r--r--fs/orangefs/symlink.c31
-rw-r--r--fs/orangefs/waitqueue.c522
-rw-r--r--fs/orangefs/xattr.c532
4 files changed, 1643 insertions, 0 deletions
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
new file mode 100644
index 000000000000..a854390fc0ea
--- /dev/null
+++ b/fs/orangefs/super.c
@@ -0,0 +1,558 @@
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7#include "protocol.h"
8#include "pvfs2-kernel.h"
9#include "pvfs2-bufmap.h"
10
11#include <linux/parser.h>
12
13/* a cache for pvfs2-inode objects (i.e. pvfs2 inode private data) */
14static struct kmem_cache *pvfs2_inode_cache;
15
16/* list for storing pvfs2 specific superblocks in use */
17LIST_HEAD(pvfs2_superblocks);
18
19DEFINE_SPINLOCK(pvfs2_superblocks_lock);
20
21enum {
22 Opt_intr,
23 Opt_acl,
24 Opt_local_lock,
25
26 Opt_err
27};
28
29static const match_table_t tokens = {
30 { Opt_acl, "acl" },
31 { Opt_intr, "intr" },
32 { Opt_local_lock, "local_lock" },
33 { Opt_err, NULL }
34};
35
36
37static int parse_mount_options(struct super_block *sb, char *options,
38 int silent)
39{
40 struct pvfs2_sb_info_s *pvfs2_sb = PVFS2_SB(sb);
41 substring_t args[MAX_OPT_ARGS];
42 char *p;
43
44 /*
45 * Force any potential flags that might be set from the mount
46 * to zero, ie, initialize to unset.
47 */
48 sb->s_flags &= ~MS_POSIXACL;
49 pvfs2_sb->flags &= ~PVFS2_OPT_INTR;
50 pvfs2_sb->flags &= ~PVFS2_OPT_LOCAL_LOCK;
51
52 while ((p = strsep(&options, ",")) != NULL) {
53 int token;
54
55 if (!*p)
56 continue;
57
58 token = match_token(p, tokens, args);
59 switch (token) {
60 case Opt_acl:
61 sb->s_flags |= MS_POSIXACL;
62 break;
63 case Opt_intr:
64 pvfs2_sb->flags |= PVFS2_OPT_INTR;
65 break;
66 case Opt_local_lock:
67 pvfs2_sb->flags |= PVFS2_OPT_LOCAL_LOCK;
68 break;
69 default:
70 goto fail;
71 }
72 }
73
74 return 0;
75fail:
76 if (!silent)
77 gossip_err("Error: mount option [%s] is not supported.\n", p);
78 return -EINVAL;
79}
80
81static void pvfs2_inode_cache_ctor(void *req)
82{
83 struct pvfs2_inode_s *pvfs2_inode = req;
84
85 inode_init_once(&pvfs2_inode->vfs_inode);
86 init_rwsem(&pvfs2_inode->xattr_sem);
87
88 pvfs2_inode->vfs_inode.i_version = 1;
89}
90
91static struct inode *pvfs2_alloc_inode(struct super_block *sb)
92{
93 struct pvfs2_inode_s *pvfs2_inode;
94
95 pvfs2_inode = kmem_cache_alloc(pvfs2_inode_cache,
96 PVFS2_CACHE_ALLOC_FLAGS);
97 if (pvfs2_inode == NULL) {
98 gossip_err("Failed to allocate pvfs2_inode\n");
99 return NULL;
100 }
101
102 /*
103 * We want to clear everything except for rw_semaphore and the
104 * vfs_inode.
105 */
106 memset(&pvfs2_inode->refn.khandle, 0, 16);
107 pvfs2_inode->refn.fs_id = PVFS_FS_ID_NULL;
108 pvfs2_inode->last_failed_block_index_read = 0;
109 memset(pvfs2_inode->link_target, 0, sizeof(pvfs2_inode->link_target));
110 pvfs2_inode->pinode_flags = 0;
111
112 gossip_debug(GOSSIP_SUPER_DEBUG,
113 "pvfs2_alloc_inode: allocated %p\n",
114 &pvfs2_inode->vfs_inode);
115 return &pvfs2_inode->vfs_inode;
116}
117
118static void pvfs2_destroy_inode(struct inode *inode)
119{
120 struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
121
122 gossip_debug(GOSSIP_SUPER_DEBUG,
123 "%s: deallocated %p destroying inode %pU\n",
124 __func__, pvfs2_inode, get_khandle_from_ino(inode));
125
126 kmem_cache_free(pvfs2_inode_cache, pvfs2_inode);
127}
128
129/*
130 * NOTE: information filled in here is typically reflected in the
131 * output of the system command 'df'
132*/
133static int pvfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
134{
135 int ret = -ENOMEM;
136 struct pvfs2_kernel_op_s *new_op = NULL;
137 int flags = 0;
138 struct super_block *sb = NULL;
139
140 sb = dentry->d_sb;
141
142 gossip_debug(GOSSIP_SUPER_DEBUG,
143 "pvfs2_statfs: called on sb %p (fs_id is %d)\n",
144 sb,
145 (int)(PVFS2_SB(sb)->fs_id));
146
147 new_op = op_alloc(PVFS2_VFS_OP_STATFS);
148 if (!new_op)
149 return ret;
150 new_op->upcall.req.statfs.fs_id = PVFS2_SB(sb)->fs_id;
151
152 if (PVFS2_SB(sb)->flags & PVFS2_OPT_INTR)
153 flags = PVFS2_OP_INTERRUPTIBLE;
154
155 ret = service_operation(new_op, "pvfs2_statfs", flags);
156
157 if (new_op->downcall.status < 0)
158 goto out_op_release;
159
160 gossip_debug(GOSSIP_SUPER_DEBUG,
161 "pvfs2_statfs: got %ld blocks available | "
162 "%ld blocks total | %ld block size\n",
163 (long)new_op->downcall.resp.statfs.blocks_avail,
164 (long)new_op->downcall.resp.statfs.blocks_total,
165 (long)new_op->downcall.resp.statfs.block_size);
166
167 buf->f_type = sb->s_magic;
168 memcpy(&buf->f_fsid, &PVFS2_SB(sb)->fs_id, sizeof(buf->f_fsid));
169 buf->f_bsize = new_op->downcall.resp.statfs.block_size;
170 buf->f_namelen = PVFS2_NAME_LEN;
171
172 buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total;
173 buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
174 buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
175 buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total;
176 buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail;
177 buf->f_frsize = sb->s_blocksize;
178
179out_op_release:
180 op_release(new_op);
181 gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_statfs: returning %d\n", ret);
182 return ret;
183}
184
185/*
186 * Remount as initiated by VFS layer. We just need to reparse the mount
187 * options, no need to signal pvfs2-client-core about it.
188 */
189static int pvfs2_remount_fs(struct super_block *sb, int *flags, char *data)
190{
191 gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount_fs: called\n");
192 return parse_mount_options(sb, data, 1);
193}
194
195/*
196 * Remount as initiated by pvfs2-client-core on restart. This is used to
197 * repopulate mount information left from previous pvfs2-client-core.
198 *
199 * the idea here is that given a valid superblock, we're
200 * re-initializing the user space client with the initial mount
201 * information specified when the super block was first initialized.
202 * this is very different than the first initialization/creation of a
203 * superblock. we use the special service_priority_operation to make
204 * sure that the mount gets ahead of any other pending operation that
205 * is waiting for servicing. this means that the pvfs2-client won't
206 * fail to start several times for all other pending operations before
207 * the client regains all of the mount information from us.
208 * NOTE: this function assumes that the request_mutex is already acquired!
209 */
210int pvfs2_remount(struct super_block *sb)
211{
212 struct pvfs2_kernel_op_s *new_op;
213 int ret = -EINVAL;
214
215 gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount: called\n");
216
217 new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT);
218 if (!new_op)
219 return -ENOMEM;
220 strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server,
221 PVFS2_SB(sb)->devname,
222 PVFS_MAX_SERVER_ADDR_LEN);
223
224 gossip_debug(GOSSIP_SUPER_DEBUG,
225 "Attempting PVFS2 Remount via host %s\n",
226 new_op->upcall.req.fs_mount.pvfs2_config_server);
227
228 /*
229 * we assume that the calling function has already acquire the
230 * request_mutex to prevent other operations from bypassing
231 * this one
232 */
233 ret = service_operation(new_op, "pvfs2_remount",
234 PVFS2_OP_PRIORITY | PVFS2_OP_NO_SEMAPHORE);
235 gossip_debug(GOSSIP_SUPER_DEBUG,
236 "pvfs2_remount: mount got return value of %d\n",
237 ret);
238 if (ret == 0) {
239 /*
240 * store the id assigned to this sb -- it's just a
241 * short-lived mapping that the system interface uses
242 * to map this superblock to a particular mount entry
243 */
244 PVFS2_SB(sb)->id = new_op->downcall.resp.fs_mount.id;
245 PVFS2_SB(sb)->mount_pending = 0;
246 }
247
248 op_release(new_op);
249 return ret;
250}
251
252int fsid_key_table_initialize(void)
253{
254 return 0;
255}
256
257void fsid_key_table_finalize(void)
258{
259}
260
261/* Called whenever the VFS dirties the inode in response to atime updates */
262static void pvfs2_dirty_inode(struct inode *inode, int flags)
263{
264 struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
265
266 gossip_debug(GOSSIP_SUPER_DEBUG,
267 "pvfs2_dirty_inode: %pU\n",
268 get_khandle_from_ino(inode));
269 SetAtimeFlag(pvfs2_inode);
270}
271
272struct super_operations pvfs2_s_ops = {
273 .alloc_inode = pvfs2_alloc_inode,
274 .destroy_inode = pvfs2_destroy_inode,
275 .dirty_inode = pvfs2_dirty_inode,
276 .drop_inode = generic_delete_inode,
277 .statfs = pvfs2_statfs,
278 .remount_fs = pvfs2_remount_fs,
279 .show_options = generic_show_options,
280};
281
282struct dentry *pvfs2_fh_to_dentry(struct super_block *sb,
283 struct fid *fid,
284 int fh_len,
285 int fh_type)
286{
287 struct pvfs2_object_kref refn;
288
289 if (fh_len < 5 || fh_type > 2)
290 return NULL;
291
292 PVFS_khandle_from(&(refn.khandle), fid->raw, 16);
293 refn.fs_id = (u32) fid->raw[4];
294 gossip_debug(GOSSIP_SUPER_DEBUG,
295 "fh_to_dentry: handle %pU, fs_id %d\n",
296 &refn.khandle,
297 refn.fs_id);
298
299 return d_obtain_alias(pvfs2_iget(sb, &refn));
300}
301
302int pvfs2_encode_fh(struct inode *inode,
303 __u32 *fh,
304 int *max_len,
305 struct inode *parent)
306{
307 int len = parent ? 10 : 5;
308 int type = 1;
309 struct pvfs2_object_kref refn;
310
311 if (*max_len < len) {
312 gossip_lerr("fh buffer is too small for encoding\n");
313 *max_len = len;
314 type = 255;
315 goto out;
316 }
317
318 refn = PVFS2_I(inode)->refn;
319 PVFS_khandle_to(&refn.khandle, fh, 16);
320 fh[4] = refn.fs_id;
321
322 gossip_debug(GOSSIP_SUPER_DEBUG,
323 "Encoding fh: handle %pU, fsid %u\n",
324 &refn.khandle,
325 refn.fs_id);
326
327
328 if (parent) {
329 refn = PVFS2_I(parent)->refn;
330 PVFS_khandle_to(&refn.khandle, (char *) fh + 20, 16);
331 fh[9] = refn.fs_id;
332
333 type = 2;
334 gossip_debug(GOSSIP_SUPER_DEBUG,
335 "Encoding parent: handle %pU, fsid %u\n",
336 &refn.khandle,
337 refn.fs_id);
338 }
339 *max_len = len;
340
341out:
342 return type;
343}
344
345static struct export_operations pvfs2_export_ops = {
346 .encode_fh = pvfs2_encode_fh,
347 .fh_to_dentry = pvfs2_fh_to_dentry,
348};
349
350int pvfs2_fill_sb(struct super_block *sb, void *data, int silent)
351{
352 int ret = -EINVAL;
353 struct inode *root = NULL;
354 struct dentry *root_dentry = NULL;
355 struct pvfs2_mount_sb_info_s *mount_sb_info =
356 (struct pvfs2_mount_sb_info_s *) data;
357 struct pvfs2_object_kref root_object;
358
359 /* alloc and init our private pvfs2 sb info */
360 sb->s_fs_info =
361 kmalloc(sizeof(struct pvfs2_sb_info_s), PVFS2_GFP_FLAGS);
362 if (!PVFS2_SB(sb))
363 return -ENOMEM;
364 memset(sb->s_fs_info, 0, sizeof(struct pvfs2_sb_info_s));
365 PVFS2_SB(sb)->sb = sb;
366
367 PVFS2_SB(sb)->root_khandle = mount_sb_info->root_khandle;
368 PVFS2_SB(sb)->fs_id = mount_sb_info->fs_id;
369 PVFS2_SB(sb)->id = mount_sb_info->id;
370
371 if (mount_sb_info->data) {
372 ret = parse_mount_options(sb, mount_sb_info->data,
373 silent);
374 if (ret)
375 return ret;
376 }
377
378 /* Hang the xattr handlers off the superblock */
379 sb->s_xattr = pvfs2_xattr_handlers;
380 sb->s_magic = PVFS2_SUPER_MAGIC;
381 sb->s_op = &pvfs2_s_ops;
382 sb->s_d_op = &pvfs2_dentry_operations;
383
384 sb->s_blocksize = pvfs_bufmap_size_query();
385 sb->s_blocksize_bits = pvfs_bufmap_shift_query();
386 sb->s_maxbytes = MAX_LFS_FILESIZE;
387
388 root_object.khandle = PVFS2_SB(sb)->root_khandle;
389 root_object.fs_id = PVFS2_SB(sb)->fs_id;
390 gossip_debug(GOSSIP_SUPER_DEBUG,
391 "get inode %pU, fsid %d\n",
392 &root_object.khandle,
393 root_object.fs_id);
394
395 root = pvfs2_iget(sb, &root_object);
396 if (IS_ERR(root))
397 return PTR_ERR(root);
398
399 gossip_debug(GOSSIP_SUPER_DEBUG,
400 "Allocated root inode [%p] with mode %x\n",
401 root,
402 root->i_mode);
403
404 /* allocates and places root dentry in dcache */
405 root_dentry = d_make_root(root);
406 if (!root_dentry) {
407 iput(root);
408 return -ENOMEM;
409 }
410
411 sb->s_export_op = &pvfs2_export_ops;
412 sb->s_root = root_dentry;
413 return 0;
414}
415
416struct dentry *pvfs2_mount(struct file_system_type *fst,
417 int flags,
418 const char *devname,
419 void *data)
420{
421 int ret = -EINVAL;
422 struct super_block *sb = ERR_PTR(-EINVAL);
423 struct pvfs2_kernel_op_s *new_op;
424 struct pvfs2_mount_sb_info_s mount_sb_info;
425 struct dentry *mnt_sb_d = ERR_PTR(-EINVAL);
426
427 gossip_debug(GOSSIP_SUPER_DEBUG,
428 "pvfs2_mount: called with devname %s\n",
429 devname);
430
431 if (!devname) {
432 gossip_err("ERROR: device name not specified.\n");
433 return ERR_PTR(-EINVAL);
434 }
435
436 new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT);
437 if (!new_op)
438 return ERR_PTR(-ENOMEM);
439
440 strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server,
441 devname,
442 PVFS_MAX_SERVER_ADDR_LEN);
443
444 gossip_debug(GOSSIP_SUPER_DEBUG,
445 "Attempting PVFS2 Mount via host %s\n",
446 new_op->upcall.req.fs_mount.pvfs2_config_server);
447
448 ret = service_operation(new_op, "pvfs2_mount", 0);
449 gossip_debug(GOSSIP_SUPER_DEBUG,
450 "pvfs2_mount: mount got return value of %d\n", ret);
451 if (ret)
452 goto free_op;
453
454 if (new_op->downcall.resp.fs_mount.fs_id == PVFS_FS_ID_NULL) {
455 gossip_err("ERROR: Retrieved null fs_id\n");
456 ret = -EINVAL;
457 goto free_op;
458 }
459
460 /* fill in temporary structure passed to fill_sb method */
461 mount_sb_info.data = data;
462 mount_sb_info.root_khandle =
463 new_op->downcall.resp.fs_mount.root_khandle;
464 mount_sb_info.fs_id = new_op->downcall.resp.fs_mount.fs_id;
465 mount_sb_info.id = new_op->downcall.resp.fs_mount.id;
466
467 /*
468 * the mount_sb_info structure looks odd, but it's used because
469 * the private sb info isn't allocated until we call
470 * pvfs2_fill_sb, yet we have the info we need to fill it with
471 * here. so we store it temporarily and pass all of the info
472 * to fill_sb where it's properly copied out
473 */
474 mnt_sb_d = mount_nodev(fst,
475 flags,
476 (void *)&mount_sb_info,
477 pvfs2_fill_sb);
478 if (IS_ERR(mnt_sb_d)) {
479 sb = ERR_CAST(mnt_sb_d);
480 goto free_op;
481 }
482
483 sb = mnt_sb_d->d_sb;
484
485 /*
486 * on successful mount, store the devname and data
487 * used
488 */
489 strncpy(PVFS2_SB(sb)->devname,
490 devname,
491 PVFS_MAX_SERVER_ADDR_LEN);
492
493 /* mount_pending must be cleared */
494 PVFS2_SB(sb)->mount_pending = 0;
495
496 /*
497 * finally, add this sb to our list of known pvfs2
498 * sb's
499 */
500 add_pvfs2_sb(sb);
501 op_release(new_op);
502 return mnt_sb_d;
503
504free_op:
505 gossip_err("pvfs2_mount: mount request failed with %d\n", ret);
506 if (ret == -EINVAL) {
507 gossip_err("Ensure that all pvfs2-servers have the same FS configuration files\n");
508 gossip_err("Look at pvfs2-client-core log file (typically /tmp/pvfs2-client.log) for more details\n");
509 }
510
511 op_release(new_op);
512
513 gossip_debug(GOSSIP_SUPER_DEBUG,
514 "pvfs2_mount: returning dentry %p\n",
515 mnt_sb_d);
516 return mnt_sb_d;
517}
518
519void pvfs2_kill_sb(struct super_block *sb)
520{
521 gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_kill_sb: called\n");
522
523 /*
524 * issue the unmount to userspace to tell it to remove the
525 * dynamic mount info it has for this superblock
526 */
527 pvfs2_unmount_sb(sb);
528
529 /* remove the sb from our list of pvfs2 specific sb's */
530 remove_pvfs2_sb(sb);
531
532 /* provided sb cleanup */
533 kill_anon_super(sb);
534
535 /* free the pvfs2 superblock private data */
536 kfree(PVFS2_SB(sb));
537}
538
539int pvfs2_inode_cache_initialize(void)
540{
541 pvfs2_inode_cache = kmem_cache_create("pvfs2_inode_cache",
542 sizeof(struct pvfs2_inode_s),
543 0,
544 PVFS2_CACHE_CREATE_FLAGS,
545 pvfs2_inode_cache_ctor);
546
547 if (!pvfs2_inode_cache) {
548 gossip_err("Cannot create pvfs2_inode_cache\n");
549 return -ENOMEM;
550 }
551 return 0;
552}
553
554int pvfs2_inode_cache_finalize(void)
555{
556 kmem_cache_destroy(pvfs2_inode_cache);
557 return 0;
558}
diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c
new file mode 100644
index 000000000000..2adfceff7730
--- /dev/null
+++ b/fs/orangefs/symlink.c
@@ -0,0 +1,31 @@
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7#include "protocol.h"
8#include "pvfs2-kernel.h"
9#include "pvfs2-bufmap.h"
10
11static const char *pvfs2_follow_link(struct dentry *dentry, void **cookie)
12{
13 char *target = PVFS2_I(dentry->d_inode)->link_target;
14
15 gossip_debug(GOSSIP_INODE_DEBUG,
16 "%s: called on %s (target is %p)\n",
17 __func__, (char *)dentry->d_name.name, target);
18
19 *cookie = target;
20
21 return target;
22}
23
24struct inode_operations pvfs2_symlink_inode_operations = {
25 .readlink = generic_readlink,
26 .follow_link = pvfs2_follow_link,
27 .setattr = pvfs2_setattr,
28 .getattr = pvfs2_getattr,
29 .listxattr = pvfs2_listxattr,
30 .setxattr = generic_setxattr,
31};
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
new file mode 100644
index 000000000000..9b32286a7dc4
--- /dev/null
+++ b/fs/orangefs/waitqueue.c
@@ -0,0 +1,522 @@
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 * (C) 2011 Omnibond Systems
4 *
5 * Changes by Acxiom Corporation to implement generic service_operation()
6 * function, Copyright Acxiom Corporation, 2005.
7 *
8 * See COPYING in top-level directory.
9 */
10
11/*
12 * In-kernel waitqueue operations.
13 */
14
15#include "protocol.h"
16#include "pvfs2-kernel.h"
17#include "pvfs2-bufmap.h"
18
19/*
20 * What we do in this function is to walk the list of operations that are
21 * present in the request queue and mark them as purged.
22 * NOTE: This is called from the device close after client-core has
23 * guaranteed that no new operations could appear on the list since the
24 * client-core is anyway going to exit.
25 */
26void purge_waiting_ops(void)
27{
28 struct pvfs2_kernel_op_s *op;
29
30 spin_lock(&pvfs2_request_list_lock);
31 list_for_each_entry(op, &pvfs2_request_list, list) {
32 gossip_debug(GOSSIP_WAIT_DEBUG,
33 "pvfs2-client-core: purging op tag %llu %s\n",
34 llu(op->tag),
35 get_opname_string(op));
36 spin_lock(&op->lock);
37 set_op_state_purged(op);
38 spin_unlock(&op->lock);
39 wake_up_interruptible(&op->waitq);
40 }
41 spin_unlock(&pvfs2_request_list_lock);
42}
43
44/*
45 * submits a PVFS2 operation and waits for it to complete
46 *
47 * Note op->downcall.status will contain the status of the operation (in
48 * errno format), whether provided by pvfs2-client or a result of failure to
49 * service the operation. If the caller wishes to distinguish, then
50 * op->state can be checked to see if it was serviced or not.
51 *
52 * Returns contents of op->downcall.status for convenience
53 */
54int service_operation(struct pvfs2_kernel_op_s *op,
55 const char *op_name,
56 int flags)
57{
58 /* flags to modify behavior */
59 sigset_t orig_sigset;
60 int ret = 0;
61
62 /* irqflags and wait_entry are only used IF the client-core aborts */
63 unsigned long irqflags;
64
65 DECLARE_WAITQUEUE(wait_entry, current);
66
67 op->upcall.tgid = current->tgid;
68 op->upcall.pid = current->pid;
69
70retry_servicing:
71 op->downcall.status = 0;
72 gossip_debug(GOSSIP_WAIT_DEBUG,
73 "pvfs2: service_operation: %s %p\n",
74 op_name,
75 op);
76 gossip_debug(GOSSIP_WAIT_DEBUG,
77 "pvfs2: operation posted by process: %s, pid: %i\n",
78 current->comm,
79 current->pid);
80
81 /* mask out signals if this operation is not to be interrupted */
82 if (!(flags & PVFS2_OP_INTERRUPTIBLE))
83 mask_blocked_signals(&orig_sigset);
84
85 if (!(flags & PVFS2_OP_NO_SEMAPHORE)) {
86 ret = mutex_lock_interruptible(&request_mutex);
87 /*
88 * check to see if we were interrupted while waiting for
89 * semaphore
90 */
91 if (ret < 0) {
92 if (!(flags & PVFS2_OP_INTERRUPTIBLE))
93 unmask_blocked_signals(&orig_sigset);
94 op->downcall.status = ret;
95 gossip_debug(GOSSIP_WAIT_DEBUG,
96 "pvfs2: service_operation interrupted.\n");
97 return ret;
98 }
99 }
100
101 gossip_debug(GOSSIP_WAIT_DEBUG,
102 "%s:About to call is_daemon_in_service().\n",
103 __func__);
104
105 if (is_daemon_in_service() < 0) {
106 /*
107 * By incrementing the per-operation attempt counter, we
108 * directly go into the timeout logic while waiting for
109 * the matching downcall to be read
110 */
111 gossip_debug(GOSSIP_WAIT_DEBUG,
112 "%s:client core is NOT in service(%d).\n",
113 __func__,
114 is_daemon_in_service());
115 op->attempts++;
116 }
117
118 /* queue up the operation */
119 if (flags & PVFS2_OP_PRIORITY) {
120 add_priority_op_to_request_list(op);
121 } else {
122 gossip_debug(GOSSIP_WAIT_DEBUG,
123 "%s:About to call add_op_to_request_list().\n",
124 __func__);
125 add_op_to_request_list(op);
126 }
127
128 if (!(flags & PVFS2_OP_NO_SEMAPHORE))
129 mutex_unlock(&request_mutex);
130
131 /*
132 * If we are asked to service an asynchronous operation from
133 * VFS perspective, we are done.
134 */
135 if (flags & PVFS2_OP_ASYNC)
136 return 0;
137
138 if (flags & PVFS2_OP_CANCELLATION) {
139 gossip_debug(GOSSIP_WAIT_DEBUG,
140 "%s:"
141 "About to call wait_for_cancellation_downcall.\n",
142 __func__);
143 ret = wait_for_cancellation_downcall(op);
144 } else {
145 ret = wait_for_matching_downcall(op);
146 }
147
148 if (ret < 0) {
149 /* failed to get matching downcall */
150 if (ret == -ETIMEDOUT) {
151 gossip_err("pvfs2: %s -- wait timed out; aborting attempt.\n",
152 op_name);
153 }
154 op->downcall.status = ret;
155 } else {
156 /* got matching downcall; make sure status is in errno format */
157 op->downcall.status =
158 pvfs2_normalize_to_errno(op->downcall.status);
159 ret = op->downcall.status;
160 }
161
162 if (!(flags & PVFS2_OP_INTERRUPTIBLE))
163 unmask_blocked_signals(&orig_sigset);
164
165 BUG_ON(ret != op->downcall.status);
166 /* retry if operation has not been serviced and if requested */
167 if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
168 gossip_debug(GOSSIP_WAIT_DEBUG,
169 "pvfs2: tag %llu (%s)"
170 " -- operation to be retried (%d attempt)\n",
171 llu(op->tag),
172 op_name,
173 op->attempts + 1);
174
175 if (!op->uses_shared_memory)
176 /*
177 * this operation doesn't use the shared memory
178 * system
179 */
180 goto retry_servicing;
181
182 /* op uses shared memory */
183 if (get_bufmap_init() == 0) {
184 /*
185 * This operation uses the shared memory system AND
186 * the system is not yet ready. This situation occurs
187 * when the client-core is restarted AND there were
188 * operations waiting to be processed or were already
189 * in process.
190 */
191 gossip_debug(GOSSIP_WAIT_DEBUG,
192 "uses_shared_memory is true.\n");
193 gossip_debug(GOSSIP_WAIT_DEBUG,
194 "Client core in-service status(%d).\n",
195 is_daemon_in_service());
196 gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
197 get_bufmap_init());
198 gossip_debug(GOSSIP_WAIT_DEBUG,
199 "operation's status is 0x%0x.\n",
200 op->op_state);
201
202 /*
203 * let process sleep for a few seconds so shared
204 * memory system can be initialized.
205 */
206 spin_lock_irqsave(&op->lock, irqflags);
207 add_wait_queue(&pvfs2_bufmap_init_waitq, &wait_entry);
208 spin_unlock_irqrestore(&op->lock, irqflags);
209
210 set_current_state(TASK_INTERRUPTIBLE);
211
212 /*
213 * Wait for pvfs_bufmap_initialize() to wake me up
214 * within the allotted time.
215 */
216 ret = schedule_timeout(MSECS_TO_JIFFIES
217 (1000 * PVFS2_BUFMAP_WAIT_TIMEOUT_SECS));
218
219 gossip_debug(GOSSIP_WAIT_DEBUG,
220 "Value returned from schedule_timeout:"
221 "%d.\n",
222 ret);
223 gossip_debug(GOSSIP_WAIT_DEBUG,
224 "Is shared memory available? (%d).\n",
225 get_bufmap_init());
226
227 spin_lock_irqsave(&op->lock, irqflags);
228 remove_wait_queue(&pvfs2_bufmap_init_waitq,
229 &wait_entry);
230 spin_unlock_irqrestore(&op->lock, irqflags);
231
232 if (get_bufmap_init() == 0) {
233 gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted. Aborting user's request(%s).\n",
234 __func__,
235 PVFS2_BUFMAP_WAIT_TIMEOUT_SECS,
236 get_opname_string(op));
237 return -EIO;
238 }
239
240 /*
241 * Return to the calling function and re-populate a
242 * shared memory buffer.
243 */
244 return -EAGAIN;
245 }
246 }
247
248 gossip_debug(GOSSIP_WAIT_DEBUG,
249 "pvfs2: service_operation %s returning: %d for %p.\n",
250 op_name,
251 ret,
252 op);
253 return ret;
254}
255
256void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op)
257{
258 /*
259 * handle interrupted cases depending on what state we were in when
260 * the interruption is detected. there is a coarse grained lock
261 * across the operation.
262 *
263 * NOTE: be sure not to reverse lock ordering by locking an op lock
264 * while holding the request_list lock. Here, we first lock the op
265 * and then lock the appropriate list.
266 */
267 if (!op) {
268 gossip_debug(GOSSIP_WAIT_DEBUG,
269 "%s: op is null, ignoring\n",
270 __func__);
271 return;
272 }
273
274 /*
275 * one more sanity check, make sure it's in one of the possible states
276 * or don't try to cancel it
277 */
278 if (!(op_state_waiting(op) ||
279 op_state_in_progress(op) ||
280 op_state_serviced(op) ||
281 op_state_purged(op))) {
282 gossip_debug(GOSSIP_WAIT_DEBUG,
283 "%s: op %p not in a valid state (%0x), "
284 "ignoring\n",
285 __func__,
286 op,
287 op->op_state);
288 return;
289 }
290
291 spin_lock(&op->lock);
292
293 if (op_state_waiting(op)) {
294 /*
295 * upcall hasn't been read; remove op from upcall request
296 * list.
297 */
298 spin_unlock(&op->lock);
299 remove_op_from_request_list(op);
300 gossip_debug(GOSSIP_WAIT_DEBUG,
301 "Interrupted: Removed op %p from request_list\n",
302 op);
303 } else if (op_state_in_progress(op)) {
304 /* op must be removed from the in progress htable */
305 spin_unlock(&op->lock);
306 spin_lock(&htable_ops_in_progress_lock);
307 list_del(&op->list);
308 spin_unlock(&htable_ops_in_progress_lock);
309 gossip_debug(GOSSIP_WAIT_DEBUG,
310 "Interrupted: Removed op %p"
311 " from htable_ops_in_progress\n",
312 op);
313 } else if (!op_state_serviced(op)) {
314 spin_unlock(&op->lock);
315 gossip_err("interrupted operation is in a weird state 0x%x\n",
316 op->op_state);
317 }
318}
319
320/*
321 * sleeps on waitqueue waiting for matching downcall.
322 * if client-core finishes servicing, then we are good to go.
323 * else if client-core exits, we get woken up here, and retry with a timeout
324 *
325 * Post when this call returns to the caller, the specified op will no
326 * longer be on any list or htable.
327 *
328 * Returns 0 on success and -errno on failure
329 * Errors are:
330 * EAGAIN in case we want the caller to requeue and try again..
331 * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
332 * operation since client-core seems to be exiting too often
333 * or if we were interrupted.
334 */
335int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op)
336{
337 int ret = -EINVAL;
338 DECLARE_WAITQUEUE(wait_entry, current);
339
340 spin_lock(&op->lock);
341 add_wait_queue(&op->waitq, &wait_entry);
342 spin_unlock(&op->lock);
343
344 while (1) {
345 set_current_state(TASK_INTERRUPTIBLE);
346
347 spin_lock(&op->lock);
348 if (op_state_serviced(op)) {
349 spin_unlock(&op->lock);
350 ret = 0;
351 break;
352 }
353 spin_unlock(&op->lock);
354
355 if (!signal_pending(current)) {
356 /*
357 * if this was our first attempt and client-core
358 * has not purged our operation, we are happy to
359 * simply wait
360 */
361 spin_lock(&op->lock);
362 if (op->attempts == 0 && !op_state_purged(op)) {
363 spin_unlock(&op->lock);
364 schedule();
365 } else {
366 spin_unlock(&op->lock);
367 /*
368 * subsequent attempts, we retry exactly once
369 * with timeouts
370 */
371 if (!schedule_timeout(MSECS_TO_JIFFIES
372 (1000 * op_timeout_secs))) {
373 gossip_debug(GOSSIP_WAIT_DEBUG,
374 "*** %s:"
375 " operation timed out (tag"
376 " %llu, %p, att %d)\n",
377 __func__,
378 llu(op->tag),
379 op,
380 op->attempts);
381 ret = -ETIMEDOUT;
382 pvfs2_clean_up_interrupted_operation
383 (op);
384 break;
385 }
386 }
387 spin_lock(&op->lock);
388 op->attempts++;
389 /*
390 * if the operation was purged in the meantime, it
391 * is better to requeue it afresh but ensure that
392 * we have not been purged repeatedly. This could
393 * happen if client-core crashes when an op
394 * is being serviced, so we requeue the op, client
395 * core crashes again so we requeue the op, client
396 * core starts, and so on...
397 */
398 if (op_state_purged(op)) {
399 ret = (op->attempts < PVFS2_PURGE_RETRY_COUNT) ?
400 -EAGAIN :
401 -EIO;
402 spin_unlock(&op->lock);
403 gossip_debug(GOSSIP_WAIT_DEBUG,
404 "*** %s:"
405 " operation purged (tag "
406 "%llu, %p, att %d)\n",
407 __func__,
408 llu(op->tag),
409 op,
410 op->attempts);
411 pvfs2_clean_up_interrupted_operation(op);
412 break;
413 }
414 spin_unlock(&op->lock);
415 continue;
416 }
417
418 gossip_debug(GOSSIP_WAIT_DEBUG,
419 "*** %s:"
420 " operation interrupted by a signal (tag "
421 "%llu, op %p)\n",
422 __func__,
423 llu(op->tag),
424 op);
425 pvfs2_clean_up_interrupted_operation(op);
426 ret = -EINTR;
427 break;
428 }
429
430 set_current_state(TASK_RUNNING);
431
432 spin_lock(&op->lock);
433 remove_wait_queue(&op->waitq, &wait_entry);
434 spin_unlock(&op->lock);
435
436 return ret;
437}
438
439/*
440 * similar to wait_for_matching_downcall(), but used in the special case
441 * of I/O cancellations.
442 *
443 * Note we need a special wait function because if this is called we already
444 * know that a signal is pending in current and need to service the
445 * cancellation upcall anyway. the only way to exit this is to either
446 * timeout or have the cancellation be serviced properly.
447 */
448int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op)
449{
450 int ret = -EINVAL;
451 DECLARE_WAITQUEUE(wait_entry, current);
452
453 spin_lock(&op->lock);
454 add_wait_queue(&op->waitq, &wait_entry);
455 spin_unlock(&op->lock);
456
457 while (1) {
458 set_current_state(TASK_INTERRUPTIBLE);
459
460 spin_lock(&op->lock);
461 if (op_state_serviced(op)) {
462 gossip_debug(GOSSIP_WAIT_DEBUG,
463 "%s:op-state is SERVICED.\n",
464 __func__);
465 spin_unlock(&op->lock);
466 ret = 0;
467 break;
468 }
469 spin_unlock(&op->lock);
470
471 if (signal_pending(current)) {
472 gossip_debug(GOSSIP_WAIT_DEBUG,
473 "%s:operation interrupted by a signal (tag"
474 " %llu, op %p)\n",
475 __func__,
476 llu(op->tag),
477 op);
478 pvfs2_clean_up_interrupted_operation(op);
479 ret = -EINTR;
480 break;
481 }
482
483 gossip_debug(GOSSIP_WAIT_DEBUG,
484 "%s:About to call schedule_timeout.\n",
485 __func__);
486 ret =
487 schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs));
488
489 gossip_debug(GOSSIP_WAIT_DEBUG,
490 "%s:Value returned from schedule_timeout(%d).\n",
491 __func__,
492 ret);
493 if (!ret) {
494 gossip_debug(GOSSIP_WAIT_DEBUG,
495 "%s:*** operation timed out: %p\n",
496 __func__,
497 op);
498 pvfs2_clean_up_interrupted_operation(op);
499 ret = -ETIMEDOUT;
500 break;
501 }
502
503 gossip_debug(GOSSIP_WAIT_DEBUG,
504 "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
505 __func__);
506 ret = -ETIMEDOUT;
507 break;
508 }
509
510 set_current_state(TASK_RUNNING);
511
512 spin_lock(&op->lock);
513 remove_wait_queue(&op->waitq, &wait_entry);
514 spin_unlock(&op->lock);
515
516 gossip_debug(GOSSIP_WAIT_DEBUG,
517 "%s:returning ret(%d)\n",
518 __func__,
519 ret);
520
521 return ret;
522}
diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c
new file mode 100644
index 000000000000..2766090f5ca4
--- /dev/null
+++ b/fs/orangefs/xattr.c
@@ -0,0 +1,532 @@
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7/*
8 * Linux VFS extended attribute operations.
9 */
10
11#include "protocol.h"
12#include "pvfs2-kernel.h"
13#include "pvfs2-bufmap.h"
14#include <linux/posix_acl_xattr.h>
15#include <linux/xattr.h>
16
17
18#define SYSTEM_PVFS2_KEY "system.pvfs2."
19#define SYSTEM_PVFS2_KEY_LEN 13
20
21/*
22 * this function returns
23 * 0 if the key corresponding to name is not meant to be printed as part
24 * of a listxattr.
25 * 1 if the key corresponding to name is meant to be returned as part of
26 * a listxattr.
27 * The ones that start SYSTEM_PVFS2_KEY are the ones to avoid printing.
28 */
29static int is_reserved_key(const char *key, size_t size)
30{
31
32 if (size < SYSTEM_PVFS2_KEY_LEN)
33 return 1;
34
35 return strncmp(key, SYSTEM_PVFS2_KEY, SYSTEM_PVFS2_KEY_LEN) ? 1 : 0;
36}
37
38static inline int convert_to_internal_xattr_flags(int setxattr_flags)
39{
40 int internal_flag = 0;
41
42 if (setxattr_flags & XATTR_REPLACE) {
43 /* Attribute must exist! */
44 internal_flag = PVFS_XATTR_REPLACE;
45 } else if (setxattr_flags & XATTR_CREATE) {
46 /* Attribute must not exist */
47 internal_flag = PVFS_XATTR_CREATE;
48 }
49 return internal_flag;
50}
51
52
53/*
54 * Tries to get a specified key's attributes of a given
55 * file into a user-specified buffer. Note that the getxattr
56 * interface allows for the users to probe the size of an
57 * extended attribute by passing in a value of 0 to size.
58 * Thus our return value is always the size of the attribute
59 * unless the key does not exist for the file and/or if
60 * there were errors in fetching the attribute value.
61 */
62ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix,
63 const char *name, void *buffer, size_t size)
64{
65 struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
66 struct pvfs2_kernel_op_s *new_op = NULL;
67 ssize_t ret = -ENOMEM;
68 ssize_t length = 0;
69 int fsuid;
70 int fsgid;
71
72 gossip_debug(GOSSIP_XATTR_DEBUG,
73 "%s: prefix %s name %s, buffer_size %zd\n",
74 __func__, prefix, name, size);
75
76 if (name == NULL || (size > 0 && buffer == NULL)) {
77 gossip_err("pvfs2_inode_getxattr: bogus NULL pointers\n");
78 return -EINVAL;
79 }
80 if (size < 0 ||
81 (strlen(name) + strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN) {
82 gossip_err("Invalid size (%d) or key length (%d)\n",
83 (int)size,
84 (int)(strlen(name) + strlen(prefix)));
85 return -EINVAL;
86 }
87
88 fsuid = from_kuid(current_user_ns(), current_fsuid());
89 fsgid = from_kgid(current_user_ns(), current_fsgid());
90
91 gossip_debug(GOSSIP_XATTR_DEBUG,
92 "getxattr on inode %pU, name %s "
93 "(uid %o, gid %o)\n",
94 get_khandle_from_ino(inode),
95 name,
96 fsuid,
97 fsgid);
98
99 down_read(&pvfs2_inode->xattr_sem);
100
101 new_op = op_alloc(PVFS2_VFS_OP_GETXATTR);
102 if (!new_op)
103 goto out_unlock;
104
105 new_op->upcall.req.getxattr.refn = pvfs2_inode->refn;
106 ret = snprintf((char *)new_op->upcall.req.getxattr.key,
107 PVFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name);
108
109 /*
110 * NOTE: Although keys are meant to be NULL terminated textual
111 * strings, I am going to explicitly pass the length just in case
112 * we change this later on...
113 */
114 new_op->upcall.req.getxattr.key_sz = ret + 1;
115
116 ret = service_operation(new_op, "pvfs2_inode_getxattr",
117 get_interruptible_flag(inode));
118 if (ret != 0) {
119 if (ret == -ENOENT) {
120 ret = -ENODATA;
121 gossip_debug(GOSSIP_XATTR_DEBUG,
122 "pvfs2_inode_getxattr: inode %pU key %s"
123 " does not exist!\n",
124 get_khandle_from_ino(inode),
125 (char *)new_op->upcall.req.getxattr.key);
126 }
127 goto out_release_op;
128 }
129
130 /*
131 * Length returned includes null terminator.
132 */
133 length = new_op->downcall.resp.getxattr.val_sz;
134
135 /*
136 * Just return the length of the queried attribute.
137 */
138 if (size == 0) {
139 ret = length;
140 goto out_release_op;
141 }
142
143 /*
144 * Check to see if key length is > provided buffer size.
145 */
146 if (length > size) {
147 ret = -ERANGE;
148 goto out_release_op;
149 }
150
151 memset(buffer, 0, size);
152 memcpy(buffer, new_op->downcall.resp.getxattr.val, length);
153 gossip_debug(GOSSIP_XATTR_DEBUG,
154 "pvfs2_inode_getxattr: inode %pU "
155 "key %s key_sz %d, val_len %d\n",
156 get_khandle_from_ino(inode),
157 (char *)new_op->
158 upcall.req.getxattr.key,
159 (int)new_op->
160 upcall.req.getxattr.key_sz,
161 (int)ret);
162
163 ret = length;
164
165out_release_op:
166 op_release(new_op);
167out_unlock:
168 up_read(&pvfs2_inode->xattr_sem);
169 return ret;
170}
171
172static int pvfs2_inode_removexattr(struct inode *inode,
173 const char *prefix,
174 const char *name,
175 int flags)
176{
177 struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
178 struct pvfs2_kernel_op_s *new_op = NULL;
179 int ret = -ENOMEM;
180
181 down_write(&pvfs2_inode->xattr_sem);
182 new_op = op_alloc(PVFS2_VFS_OP_REMOVEXATTR);
183 if (!new_op)
184 goto out_unlock;
185
186 new_op->upcall.req.removexattr.refn = pvfs2_inode->refn;
187 /*
188 * NOTE: Although keys are meant to be NULL terminated
189 * textual strings, I am going to explicitly pass the
190 * length just in case we change this later on...
191 */
192 ret = snprintf((char *)new_op->upcall.req.removexattr.key,
193 PVFS_MAX_XATTR_NAMELEN,
194 "%s%s",
195 (prefix ? prefix : ""),
196 name);
197 new_op->upcall.req.removexattr.key_sz = ret + 1;
198
199 gossip_debug(GOSSIP_XATTR_DEBUG,
200 "pvfs2_inode_removexattr: key %s, key_sz %d\n",
201 (char *)new_op->upcall.req.removexattr.key,
202 (int)new_op->upcall.req.removexattr.key_sz);
203
204 ret = service_operation(new_op,
205 "pvfs2_inode_removexattr",
206 get_interruptible_flag(inode));
207 if (ret == -ENOENT) {
208 /*
209 * Request to replace a non-existent attribute is an error.
210 */
211 if (flags & XATTR_REPLACE)
212 ret = -ENODATA;
213 else
214 ret = 0;
215 }
216
217 gossip_debug(GOSSIP_XATTR_DEBUG,
218 "pvfs2_inode_removexattr: returning %d\n", ret);
219
220 op_release(new_op);
221out_unlock:
222 up_write(&pvfs2_inode->xattr_sem);
223 return ret;
224}
225
226/*
227 * Tries to set an attribute for a given key on a file.
228 *
229 * Returns a -ve number on error and 0 on success. Key is text, but value
230 * can be binary!
231 */
232int pvfs2_inode_setxattr(struct inode *inode, const char *prefix,
233 const char *name, const void *value, size_t size, int flags)
234{
235 struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
236 struct pvfs2_kernel_op_s *new_op;
237 int internal_flag = 0;
238 int ret = -ENOMEM;
239
240 gossip_debug(GOSSIP_XATTR_DEBUG,
241 "%s: prefix %s, name %s, buffer_size %zd\n",
242 __func__, prefix, name, size);
243
244 if (size < 0 ||
245 size >= PVFS_MAX_XATTR_VALUELEN ||
246 flags < 0) {
247 gossip_err("pvfs2_inode_setxattr: bogus values of size(%d), flags(%d)\n",
248 (int)size,
249 flags);
250 return -EINVAL;
251 }
252
253 if (name == NULL ||
254 (size > 0 && value == NULL)) {
255 gossip_err("pvfs2_inode_setxattr: bogus NULL pointers!\n");
256 return -EINVAL;
257 }
258
259 internal_flag = convert_to_internal_xattr_flags(flags);
260
261 if (prefix) {
262 if (strlen(name) + strlen(prefix) >= PVFS_MAX_XATTR_NAMELEN) {
263 gossip_err
264 ("pvfs2_inode_setxattr: bogus key size (%d)\n",
265 (int)(strlen(name) + strlen(prefix)));
266 return -EINVAL;
267 }
268 } else {
269 if (strlen(name) >= PVFS_MAX_XATTR_NAMELEN) {
270 gossip_err
271 ("pvfs2_inode_setxattr: bogus key size (%d)\n",
272 (int)(strlen(name)));
273 return -EINVAL;
274 }
275 }
276
277 /* This is equivalent to a removexattr */
278 if (size == 0 && value == NULL) {
279 gossip_debug(GOSSIP_XATTR_DEBUG,
280 "removing xattr (%s%s)\n",
281 prefix,
282 name);
283 return pvfs2_inode_removexattr(inode, prefix, name, flags);
284 }
285
286 gossip_debug(GOSSIP_XATTR_DEBUG,
287 "setxattr on inode %pU, name %s\n",
288 get_khandle_from_ino(inode),
289 name);
290
291 down_write(&pvfs2_inode->xattr_sem);
292 new_op = op_alloc(PVFS2_VFS_OP_SETXATTR);
293 if (!new_op)
294 goto out_unlock;
295
296
297 new_op->upcall.req.setxattr.refn = pvfs2_inode->refn;
298 new_op->upcall.req.setxattr.flags = internal_flag;
299 /*
300 * NOTE: Although keys are meant to be NULL terminated textual
301 * strings, I am going to explicitly pass the length just in
302 * case we change this later on...
303 */
304 ret = snprintf((char *)new_op->upcall.req.setxattr.keyval.key,
305 PVFS_MAX_XATTR_NAMELEN,
306 "%s%s",
307 prefix, name);
308 new_op->upcall.req.setxattr.keyval.key_sz = ret + 1;
309 memcpy(new_op->upcall.req.setxattr.keyval.val, value, size);
310 new_op->upcall.req.setxattr.keyval.val_sz = size;
311
312 gossip_debug(GOSSIP_XATTR_DEBUG,
313 "pvfs2_inode_setxattr: key %s, key_sz %d "
314 " value size %zd\n",
315 (char *)new_op->upcall.req.setxattr.keyval.key,
316 (int)new_op->upcall.req.setxattr.keyval.key_sz,
317 size);
318
319 ret = service_operation(new_op,
320 "pvfs2_inode_setxattr",
321 get_interruptible_flag(inode));
322
323 gossip_debug(GOSSIP_XATTR_DEBUG,
324 "pvfs2_inode_setxattr: returning %d\n",
325 ret);
326
327 /* when request is serviced properly, free req op struct */
328 op_release(new_op);
329out_unlock:
330 up_write(&pvfs2_inode->xattr_sem);
331 return ret;
332}
333
334/*
335 * Tries to get a specified object's keys into a user-specified buffer of a
336 * given size. Note that like the previous instances of xattr routines, this
337 * also allows you to pass in a NULL pointer and 0 size to probe the size for
338 * subsequent memory allocations. Thus our return value is always the size of
339 * all the keys unless there were errors in fetching the keys!
340 */
341ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
342{
343 struct inode *inode = dentry->d_inode;
344 struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
345 struct pvfs2_kernel_op_s *new_op;
346 __u64 token = PVFS_ITERATE_START;
347 ssize_t ret = -ENOMEM;
348 ssize_t total = 0;
349 ssize_t length = 0;
350 int count_keys = 0;
351 int key_size;
352 int i = 0;
353
354 if (size > 0 && buffer == NULL) {
355 gossip_err("%s: bogus NULL pointers\n", __func__);
356 return -EINVAL;
357 }
358 if (size < 0) {
359 gossip_err("Invalid size (%d)\n", (int)size);
360 return -EINVAL;
361 }
362
363 down_read(&pvfs2_inode->xattr_sem);
364 new_op = op_alloc(PVFS2_VFS_OP_LISTXATTR);
365 if (!new_op)
366 goto out_unlock;
367
368 if (buffer && size > 0)
369 memset(buffer, 0, size);
370
371try_again:
372 key_size = 0;
373 new_op->upcall.req.listxattr.refn = pvfs2_inode->refn;
374 new_op->upcall.req.listxattr.token = token;
375 new_op->upcall.req.listxattr.requested_count =
376 (size == 0) ? 0 : PVFS_MAX_XATTR_LISTLEN;
377 ret = service_operation(new_op, __func__,
378 get_interruptible_flag(inode));
379 if (ret != 0)
380 goto done;
381
382 if (size == 0) {
383 /*
384 * This is a bit of a big upper limit, but I did not want to
385 * spend too much time getting this correct, since users end
386 * up allocating memory rather than us...
387 */
388 total = new_op->downcall.resp.listxattr.returned_count *
389 PVFS_MAX_XATTR_NAMELEN;
390 goto done;
391 }
392
393 length = new_op->downcall.resp.listxattr.keylen;
394 if (length == 0)
395 goto done;
396
397 /*
398 * Check to see how much can be fit in the buffer. Fit only whole keys.
399 */
400 for (i = 0; i < new_op->downcall.resp.listxattr.returned_count; i++) {
401 if (total + new_op->downcall.resp.listxattr.lengths[i] > size)
402 goto done;
403
404 /*
405 * Since many dumb programs try to setxattr() on our reserved
406 * xattrs this is a feeble attempt at defeating those by not
407 * listing them in the output of listxattr.. sigh
408 */
409 if (is_reserved_key(new_op->downcall.resp.listxattr.key +
410 key_size,
411 new_op->downcall.resp.
412 listxattr.lengths[i])) {
413 gossip_debug(GOSSIP_XATTR_DEBUG, "Copying key %d -> %s\n",
414 i, new_op->downcall.resp.listxattr.key +
415 key_size);
416 memcpy(buffer + total,
417 new_op->downcall.resp.listxattr.key + key_size,
418 new_op->downcall.resp.listxattr.lengths[i]);
419 total += new_op->downcall.resp.listxattr.lengths[i];
420 count_keys++;
421 } else {
422 gossip_debug(GOSSIP_XATTR_DEBUG, "[RESERVED] key %d -> %s\n",
423 i, new_op->downcall.resp.listxattr.key +
424 key_size);
425 }
426 key_size += new_op->downcall.resp.listxattr.lengths[i];
427 }
428
429 /*
430 * Since the buffer was large enough, we might have to continue
431 * fetching more keys!
432 */
433 token = new_op->downcall.resp.listxattr.token;
434 if (token != PVFS_ITERATE_END)
435 goto try_again;
436
437done:
438 gossip_debug(GOSSIP_XATTR_DEBUG, "%s: returning %d"
439 " [size of buffer %ld] (filled in %d keys)\n",
440 __func__,
441 ret ? (int)ret : (int)total,
442 (long)size,
443 count_keys);
444 op_release(new_op);
445 if (ret == 0)
446 ret = total;
447out_unlock:
448 up_read(&pvfs2_inode->xattr_sem);
449 return ret;
450}
451
452int pvfs2_xattr_set_default(struct dentry *dentry,
453 const char *name,
454 const void *buffer,
455 size_t size,
456 int flags,
457 int handler_flags)
458{
459 return pvfs2_inode_setxattr(dentry->d_inode,
460 PVFS2_XATTR_NAME_DEFAULT_PREFIX,
461 name,
462 buffer,
463 size,
464 flags);
465}
466
467int pvfs2_xattr_get_default(struct dentry *dentry,
468 const char *name,
469 void *buffer,
470 size_t size,
471 int handler_flags)
472{
473 return pvfs2_inode_getxattr(dentry->d_inode,
474 PVFS2_XATTR_NAME_DEFAULT_PREFIX,
475 name,
476 buffer,
477 size);
478
479}
480
481static int pvfs2_xattr_set_trusted(struct dentry *dentry,
482 const char *name,
483 const void *buffer,
484 size_t size,
485 int flags,
486 int handler_flags)
487{
488 return pvfs2_inode_setxattr(dentry->d_inode,
489 PVFS2_XATTR_NAME_TRUSTED_PREFIX,
490 name,
491 buffer,
492 size,
493 flags);
494}
495
496static int pvfs2_xattr_get_trusted(struct dentry *dentry,
497 const char *name,
498 void *buffer,
499 size_t size,
500 int handler_flags)
501{
502 return pvfs2_inode_getxattr(dentry->d_inode,
503 PVFS2_XATTR_NAME_TRUSTED_PREFIX,
504 name,
505 buffer,
506 size);
507}
508
509static struct xattr_handler pvfs2_xattr_trusted_handler = {
510 .prefix = PVFS2_XATTR_NAME_TRUSTED_PREFIX,
511 .get = pvfs2_xattr_get_trusted,
512 .set = pvfs2_xattr_set_trusted,
513};
514
515static struct xattr_handler pvfs2_xattr_default_handler = {
516 /*
517 * NOTE: this is set to be the empty string.
518 * so that all un-prefixed xattrs keys get caught
519 * here!
520 */
521 .prefix = PVFS2_XATTR_NAME_DEFAULT_PREFIX,
522 .get = pvfs2_xattr_get_default,
523 .set = pvfs2_xattr_set_default,
524};
525
526const struct xattr_handler *pvfs2_xattr_handlers[] = {
527 &posix_acl_access_xattr_handler,
528 &posix_acl_default_xattr_handler,
529 &pvfs2_xattr_trusted_handler,
530 &pvfs2_xattr_default_handler,
531 NULL
532};