diff options
Diffstat (limited to 'fs/xfs/linux-2.6')
38 files changed, 12989 insertions, 0 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c new file mode 100644 index 000000000000..364ea8c386b1 --- /dev/null +++ b/fs/xfs/linux-2.6/kmem.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include <linux/sched.h> | ||
34 | #include <linux/mm.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | #include <linux/highmem.h> | ||
37 | #include <linux/swap.h> | ||
38 | #include <linux/blkdev.h> | ||
39 | |||
40 | #include "time.h" | ||
41 | #include "kmem.h" | ||
42 | |||
43 | #define MAX_VMALLOCS 6 | ||
44 | #define MAX_SLAB_SIZE 0x20000 | ||
45 | |||
46 | |||
47 | void * | ||
48 | kmem_alloc(size_t size, int flags) | ||
49 | { | ||
50 | int retries = 0; | ||
51 | int lflags = kmem_flags_convert(flags); | ||
52 | void *ptr; | ||
53 | |||
54 | do { | ||
55 | if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) | ||
56 | ptr = kmalloc(size, lflags); | ||
57 | else | ||
58 | ptr = __vmalloc(size, lflags, PAGE_KERNEL); | ||
59 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | ||
60 | return ptr; | ||
61 | if (!(++retries % 100)) | ||
62 | printk(KERN_ERR "XFS: possible memory allocation " | ||
63 | "deadlock in %s (mode:0x%x)\n", | ||
64 | __FUNCTION__, lflags); | ||
65 | blk_congestion_wait(WRITE, HZ/50); | ||
66 | } while (1); | ||
67 | } | ||
68 | |||
69 | void * | ||
70 | kmem_zalloc(size_t size, int flags) | ||
71 | { | ||
72 | void *ptr; | ||
73 | |||
74 | ptr = kmem_alloc(size, flags); | ||
75 | if (ptr) | ||
76 | memset((char *)ptr, 0, (int)size); | ||
77 | return ptr; | ||
78 | } | ||
79 | |||
80 | void | ||
81 | kmem_free(void *ptr, size_t size) | ||
82 | { | ||
83 | if (((unsigned long)ptr < VMALLOC_START) || | ||
84 | ((unsigned long)ptr >= VMALLOC_END)) { | ||
85 | kfree(ptr); | ||
86 | } else { | ||
87 | vfree(ptr); | ||
88 | } | ||
89 | } | ||
90 | |||
91 | void * | ||
92 | kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) | ||
93 | { | ||
94 | void *new; | ||
95 | |||
96 | new = kmem_alloc(newsize, flags); | ||
97 | if (ptr) { | ||
98 | if (new) | ||
99 | memcpy(new, ptr, | ||
100 | ((oldsize < newsize) ? oldsize : newsize)); | ||
101 | kmem_free(ptr, oldsize); | ||
102 | } | ||
103 | return new; | ||
104 | } | ||
105 | |||
106 | void * | ||
107 | kmem_zone_alloc(kmem_zone_t *zone, int flags) | ||
108 | { | ||
109 | int retries = 0; | ||
110 | int lflags = kmem_flags_convert(flags); | ||
111 | void *ptr; | ||
112 | |||
113 | do { | ||
114 | ptr = kmem_cache_alloc(zone, lflags); | ||
115 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | ||
116 | return ptr; | ||
117 | if (!(++retries % 100)) | ||
118 | printk(KERN_ERR "XFS: possible memory allocation " | ||
119 | "deadlock in %s (mode:0x%x)\n", | ||
120 | __FUNCTION__, lflags); | ||
121 | blk_congestion_wait(WRITE, HZ/50); | ||
122 | } while (1); | ||
123 | } | ||
124 | |||
125 | void * | ||
126 | kmem_zone_zalloc(kmem_zone_t *zone, int flags) | ||
127 | { | ||
128 | void *ptr; | ||
129 | |||
130 | ptr = kmem_zone_alloc(zone, flags); | ||
131 | if (ptr) | ||
132 | memset((char *)ptr, 0, kmem_cache_size(zone)); | ||
133 | return ptr; | ||
134 | } | ||
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h new file mode 100644 index 000000000000..1397b669b059 --- /dev/null +++ b/fs/xfs/linux-2.6/kmem.h | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUPPORT_KMEM_H__ | ||
33 | #define __XFS_SUPPORT_KMEM_H__ | ||
34 | |||
35 | #include <linux/slab.h> | ||
36 | #include <linux/sched.h> | ||
37 | #include <linux/mm.h> | ||
38 | |||
39 | /* | ||
40 | * memory management routines | ||
41 | */ | ||
42 | #define KM_SLEEP 0x0001 | ||
43 | #define KM_NOSLEEP 0x0002 | ||
44 | #define KM_NOFS 0x0004 | ||
45 | #define KM_MAYFAIL 0x0008 | ||
46 | |||
47 | #define kmem_zone kmem_cache_s | ||
48 | #define kmem_zone_t kmem_cache_t | ||
49 | |||
50 | typedef unsigned long xfs_pflags_t; | ||
51 | |||
52 | #define PFLAGS_TEST_NOIO() (current->flags & PF_NOIO) | ||
53 | #define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS) | ||
54 | |||
55 | #define PFLAGS_SET_NOIO() do { \ | ||
56 | current->flags |= PF_NOIO; \ | ||
57 | } while (0) | ||
58 | |||
59 | #define PFLAGS_CLEAR_NOIO() do { \ | ||
60 | current->flags &= ~PF_NOIO; \ | ||
61 | } while (0) | ||
62 | |||
63 | /* these could be nested, so we save state */ | ||
64 | #define PFLAGS_SET_FSTRANS(STATEP) do { \ | ||
65 | *(STATEP) = current->flags; \ | ||
66 | current->flags |= PF_FSTRANS; \ | ||
67 | } while (0) | ||
68 | |||
69 | #define PFLAGS_CLEAR_FSTRANS(STATEP) do { \ | ||
70 | *(STATEP) = current->flags; \ | ||
71 | current->flags &= ~PF_FSTRANS; \ | ||
72 | } while (0) | ||
73 | |||
74 | /* Restore the PF_FSTRANS state to what was saved in STATEP */ | ||
75 | #define PFLAGS_RESTORE_FSTRANS(STATEP) do { \ | ||
76 | current->flags = ((current->flags & ~PF_FSTRANS) | \ | ||
77 | (*(STATEP) & PF_FSTRANS)); \ | ||
78 | } while (0) | ||
79 | |||
80 | #define PFLAGS_DUP(OSTATEP, NSTATEP) do { \ | ||
81 | *(NSTATEP) = *(OSTATEP); \ | ||
82 | } while (0) | ||
83 | |||
84 | static __inline unsigned int kmem_flags_convert(int flags) | ||
85 | { | ||
86 | int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ | ||
87 | |||
88 | #ifdef DEBUG | ||
89 | if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { | ||
90 | printk(KERN_WARNING | ||
91 | "XFS: memory allocation with wrong flags (%x)\n", flags); | ||
92 | BUG(); | ||
93 | } | ||
94 | #endif | ||
95 | |||
96 | if (flags & KM_NOSLEEP) { | ||
97 | lflags |= GFP_ATOMIC; | ||
98 | } else { | ||
99 | lflags |= GFP_KERNEL; | ||
100 | |||
101 | /* avoid recusive callbacks to filesystem during transactions */ | ||
102 | if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS)) | ||
103 | lflags &= ~__GFP_FS; | ||
104 | } | ||
105 | |||
106 | return lflags; | ||
107 | } | ||
108 | |||
109 | static __inline kmem_zone_t * | ||
110 | kmem_zone_init(int size, char *zone_name) | ||
111 | { | ||
112 | return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL); | ||
113 | } | ||
114 | |||
115 | static __inline void | ||
116 | kmem_zone_free(kmem_zone_t *zone, void *ptr) | ||
117 | { | ||
118 | kmem_cache_free(zone, ptr); | ||
119 | } | ||
120 | |||
121 | static __inline void | ||
122 | kmem_zone_destroy(kmem_zone_t *zone) | ||
123 | { | ||
124 | if (zone && kmem_cache_destroy(zone)) | ||
125 | BUG(); | ||
126 | } | ||
127 | |||
128 | extern void *kmem_zone_zalloc(kmem_zone_t *, int); | ||
129 | extern void *kmem_zone_alloc(kmem_zone_t *, int); | ||
130 | |||
131 | extern void *kmem_alloc(size_t, int); | ||
132 | extern void *kmem_realloc(void *, size_t, size_t, int); | ||
133 | extern void *kmem_zalloc(size_t, int); | ||
134 | extern void kmem_free(void *, size_t); | ||
135 | |||
136 | typedef struct shrinker *kmem_shaker_t; | ||
137 | typedef int (*kmem_shake_func_t)(int, unsigned int); | ||
138 | |||
139 | static __inline kmem_shaker_t | ||
140 | kmem_shake_register(kmem_shake_func_t sfunc) | ||
141 | { | ||
142 | return set_shrinker(DEFAULT_SEEKS, sfunc); | ||
143 | } | ||
144 | |||
145 | static __inline void | ||
146 | kmem_shake_deregister(kmem_shaker_t shrinker) | ||
147 | { | ||
148 | remove_shrinker(shrinker); | ||
149 | } | ||
150 | |||
151 | static __inline int | ||
152 | kmem_shake_allow(unsigned int gfp_mask) | ||
153 | { | ||
154 | return (gfp_mask & __GFP_WAIT); | ||
155 | } | ||
156 | |||
157 | #endif /* __XFS_SUPPORT_KMEM_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h new file mode 100644 index 000000000000..d2c11a098ff2 --- /dev/null +++ b/fs/xfs/linux-2.6/mrlock.h | |||
@@ -0,0 +1,106 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUPPORT_MRLOCK_H__ | ||
33 | #define __XFS_SUPPORT_MRLOCK_H__ | ||
34 | |||
35 | #include <linux/rwsem.h> | ||
36 | |||
37 | enum { MR_NONE, MR_ACCESS, MR_UPDATE }; | ||
38 | |||
39 | typedef struct { | ||
40 | struct rw_semaphore mr_lock; | ||
41 | int mr_writer; | ||
42 | } mrlock_t; | ||
43 | |||
44 | #define mrinit(mrp, name) \ | ||
45 | ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) ) | ||
46 | #define mrlock_init(mrp, t,n,s) mrinit(mrp, n) | ||
47 | #define mrfree(mrp) do { } while (0) | ||
48 | #define mraccess(mrp) mraccessf(mrp, 0) | ||
49 | #define mrupdate(mrp) mrupdatef(mrp, 0) | ||
50 | |||
51 | static inline void mraccessf(mrlock_t *mrp, int flags) | ||
52 | { | ||
53 | down_read(&mrp->mr_lock); | ||
54 | } | ||
55 | |||
56 | static inline void mrupdatef(mrlock_t *mrp, int flags) | ||
57 | { | ||
58 | down_write(&mrp->mr_lock); | ||
59 | mrp->mr_writer = 1; | ||
60 | } | ||
61 | |||
62 | static inline int mrtryaccess(mrlock_t *mrp) | ||
63 | { | ||
64 | return down_read_trylock(&mrp->mr_lock); | ||
65 | } | ||
66 | |||
67 | static inline int mrtryupdate(mrlock_t *mrp) | ||
68 | { | ||
69 | if (!down_write_trylock(&mrp->mr_lock)) | ||
70 | return 0; | ||
71 | mrp->mr_writer = 1; | ||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | static inline void mrunlock(mrlock_t *mrp) | ||
76 | { | ||
77 | if (mrp->mr_writer) { | ||
78 | mrp->mr_writer = 0; | ||
79 | up_write(&mrp->mr_lock); | ||
80 | } else { | ||
81 | up_read(&mrp->mr_lock); | ||
82 | } | ||
83 | } | ||
84 | |||
85 | static inline void mrdemote(mrlock_t *mrp) | ||
86 | { | ||
87 | mrp->mr_writer = 0; | ||
88 | downgrade_write(&mrp->mr_lock); | ||
89 | } | ||
90 | |||
91 | #ifdef DEBUG | ||
92 | /* | ||
93 | * Debug-only routine, without some platform-specific asm code, we can | ||
94 | * now only answer requests regarding whether we hold the lock for write | ||
95 | * (reader state is outside our visibility, we only track writer state). | ||
96 | * Note: means !ismrlocked would give false positivies, so don't do that. | ||
97 | */ | ||
98 | static inline int ismrlocked(mrlock_t *mrp, int type) | ||
99 | { | ||
100 | if (mrp && type == MR_UPDATE) | ||
101 | return mrp->mr_writer; | ||
102 | return 1; | ||
103 | } | ||
104 | #endif | ||
105 | |||
106 | #endif /* __XFS_SUPPORT_MRLOCK_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h new file mode 100644 index 000000000000..0b296bb944cb --- /dev/null +++ b/fs/xfs/linux-2.6/mutex.h | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUPPORT_MUTEX_H__ | ||
33 | #define __XFS_SUPPORT_MUTEX_H__ | ||
34 | |||
35 | #include <linux/spinlock.h> | ||
36 | #include <asm/semaphore.h> | ||
37 | |||
38 | /* | ||
39 | * Map the mutex'es from IRIX to Linux semaphores. | ||
40 | * | ||
41 | * Destroy just simply initializes to -99 which should block all other | ||
42 | * callers. | ||
43 | */ | ||
44 | #define MUTEX_DEFAULT 0x0 | ||
45 | typedef struct semaphore mutex_t; | ||
46 | |||
47 | #define mutex_init(lock, type, name) sema_init(lock, 1) | ||
48 | #define mutex_destroy(lock) sema_init(lock, -99) | ||
49 | #define mutex_lock(lock, num) down(lock) | ||
50 | #define mutex_trylock(lock) (down_trylock(lock) ? 0 : 1) | ||
51 | #define mutex_unlock(lock) up(lock) | ||
52 | |||
53 | #endif /* __XFS_SUPPORT_MUTEX_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h new file mode 100644 index 000000000000..30b67b4e1cbf --- /dev/null +++ b/fs/xfs/linux-2.6/sema.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUPPORT_SEMA_H__ | ||
33 | #define __XFS_SUPPORT_SEMA_H__ | ||
34 | |||
35 | #include <linux/time.h> | ||
36 | #include <linux/wait.h> | ||
37 | #include <asm/atomic.h> | ||
38 | #include <asm/semaphore.h> | ||
39 | |||
40 | /* | ||
41 | * sema_t structure just maps to struct semaphore in Linux kernel. | ||
42 | */ | ||
43 | |||
44 | typedef struct semaphore sema_t; | ||
45 | |||
46 | #define init_sema(sp, val, c, d) sema_init(sp, val) | ||
47 | #define initsema(sp, val) sema_init(sp, val) | ||
48 | #define initnsema(sp, val, name) sema_init(sp, val) | ||
49 | #define psema(sp, b) down(sp) | ||
50 | #define vsema(sp) up(sp) | ||
51 | #define valusema(sp) (atomic_read(&(sp)->count)) | ||
52 | #define freesema(sema) | ||
53 | |||
54 | /* | ||
55 | * Map cpsema (try to get the sema) to down_trylock. We need to switch | ||
56 | * the return values since cpsema returns 1 (acquired) 0 (failed) and | ||
57 | * down_trylock returns the reverse 0 (acquired) 1 (failed). | ||
58 | */ | ||
59 | |||
60 | #define cpsema(sp) (down_trylock(sp) ? 0 : 1) | ||
61 | |||
62 | /* | ||
63 | * Didn't do cvsema(sp). Not sure how to map this to up/down/... | ||
64 | * It does a vsema if the values is < 0 other wise nothing. | ||
65 | */ | ||
66 | |||
67 | #endif /* __XFS_SUPPORT_SEMA_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h new file mode 100644 index 000000000000..bcf60a0b8df0 --- /dev/null +++ b/fs/xfs/linux-2.6/spin.h | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUPPORT_SPIN_H__ | ||
33 | #define __XFS_SUPPORT_SPIN_H__ | ||
34 | |||
35 | #include <linux/sched.h> /* preempt needs this */ | ||
36 | #include <linux/spinlock.h> | ||
37 | |||
38 | /* | ||
39 | * Map lock_t from IRIX to Linux spinlocks. | ||
40 | * | ||
41 | * We do not make use of lock_t from interrupt context, so we do not | ||
42 | * have to worry about disabling interrupts at all (unlike IRIX). | ||
43 | */ | ||
44 | |||
45 | typedef spinlock_t lock_t; | ||
46 | |||
47 | #define SPLDECL(s) unsigned long s | ||
48 | |||
49 | #define spinlock_init(lock, name) spin_lock_init(lock) | ||
50 | #define spinlock_destroy(lock) | ||
51 | #define mutex_spinlock(lock) ({ spin_lock(lock); 0; }) | ||
52 | #define mutex_spinunlock(lock, s) do { spin_unlock(lock); (void)s; } while (0) | ||
53 | #define nested_spinlock(lock) spin_lock(lock) | ||
54 | #define nested_spinunlock(lock) spin_unlock(lock) | ||
55 | |||
56 | #endif /* __XFS_SUPPORT_SPIN_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h new file mode 100644 index 000000000000..821d3167e05b --- /dev/null +++ b/fs/xfs/linux-2.6/sv.h | |||
@@ -0,0 +1,89 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUPPORT_SV_H__ | ||
33 | #define __XFS_SUPPORT_SV_H__ | ||
34 | |||
35 | #include <linux/wait.h> | ||
36 | #include <linux/sched.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | |||
39 | /* | ||
40 | * Synchronisation variables. | ||
41 | * | ||
42 | * (Parameters "pri", "svf" and "rts" are not implemented) | ||
43 | */ | ||
44 | |||
45 | typedef struct sv_s { | ||
46 | wait_queue_head_t waiters; | ||
47 | } sv_t; | ||
48 | |||
49 | #define SV_FIFO 0x0 /* sv_t is FIFO type */ | ||
50 | #define SV_LIFO 0x2 /* sv_t is LIFO type */ | ||
51 | #define SV_PRIO 0x4 /* sv_t is PRIO type */ | ||
52 | #define SV_KEYED 0x6 /* sv_t is KEYED type */ | ||
53 | #define SV_DEFAULT SV_FIFO | ||
54 | |||
55 | |||
56 | static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, | ||
57 | unsigned long timeout) | ||
58 | { | ||
59 | DECLARE_WAITQUEUE(wait, current); | ||
60 | |||
61 | add_wait_queue_exclusive(&sv->waiters, &wait); | ||
62 | __set_current_state(state); | ||
63 | spin_unlock(lock); | ||
64 | |||
65 | schedule_timeout(timeout); | ||
66 | |||
67 | remove_wait_queue(&sv->waiters, &wait); | ||
68 | } | ||
69 | |||
70 | #define init_sv(sv,type,name,flag) \ | ||
71 | init_waitqueue_head(&(sv)->waiters) | ||
72 | #define sv_init(sv,flag,name) \ | ||
73 | init_waitqueue_head(&(sv)->waiters) | ||
74 | #define sv_destroy(sv) \ | ||
75 | /*NOTHING*/ | ||
76 | #define sv_wait(sv, pri, lock, s) \ | ||
77 | _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) | ||
78 | #define sv_wait_sig(sv, pri, lock, s) \ | ||
79 | _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) | ||
80 | #define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \ | ||
81 | _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts)) | ||
82 | #define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \ | ||
83 | _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts)) | ||
84 | #define sv_signal(sv) \ | ||
85 | wake_up(&(sv)->waiters) | ||
86 | #define sv_broadcast(sv) \ | ||
87 | wake_up_all(&(sv)->waiters) | ||
88 | |||
89 | #endif /* __XFS_SUPPORT_SV_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h new file mode 100644 index 000000000000..6c6fd0faa8e1 --- /dev/null +++ b/fs/xfs/linux-2.6/time.h | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUPPORT_TIME_H__ | ||
33 | #define __XFS_SUPPORT_TIME_H__ | ||
34 | |||
35 | #include <linux/sched.h> | ||
36 | #include <linux/time.h> | ||
37 | |||
38 | typedef struct timespec timespec_t; | ||
39 | |||
40 | static inline void delay(long ticks) | ||
41 | { | ||
42 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
43 | schedule_timeout(ticks); | ||
44 | } | ||
45 | |||
46 | static inline void nanotime(struct timespec *tvp) | ||
47 | { | ||
48 | *tvp = CURRENT_TIME; | ||
49 | } | ||
50 | |||
51 | #endif /* __XFS_SUPPORT_TIME_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c new file mode 100644 index 000000000000..76a84758073a --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -0,0 +1,1275 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | #include "xfs_inum.h" | ||
35 | #include "xfs_log.h" | ||
36 | #include "xfs_sb.h" | ||
37 | #include "xfs_dir.h" | ||
38 | #include "xfs_dir2.h" | ||
39 | #include "xfs_trans.h" | ||
40 | #include "xfs_dmapi.h" | ||
41 | #include "xfs_mount.h" | ||
42 | #include "xfs_bmap_btree.h" | ||
43 | #include "xfs_alloc_btree.h" | ||
44 | #include "xfs_ialloc_btree.h" | ||
45 | #include "xfs_alloc.h" | ||
46 | #include "xfs_btree.h" | ||
47 | #include "xfs_attr_sf.h" | ||
48 | #include "xfs_dir_sf.h" | ||
49 | #include "xfs_dir2_sf.h" | ||
50 | #include "xfs_dinode.h" | ||
51 | #include "xfs_inode.h" | ||
52 | #include "xfs_error.h" | ||
53 | #include "xfs_rw.h" | ||
54 | #include "xfs_iomap.h" | ||
55 | #include <linux/mpage.h> | ||
56 | #include <linux/writeback.h> | ||
57 | |||
58 | STATIC void xfs_count_page_state(struct page *, int *, int *, int *); | ||
59 | STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *, | ||
60 | struct writeback_control *wbc, void *, int, int); | ||
61 | |||
62 | #if defined(XFS_RW_TRACE) | ||
63 | void | ||
64 | xfs_page_trace( | ||
65 | int tag, | ||
66 | struct inode *inode, | ||
67 | struct page *page, | ||
68 | int mask) | ||
69 | { | ||
70 | xfs_inode_t *ip; | ||
71 | bhv_desc_t *bdp; | ||
72 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
73 | loff_t isize = i_size_read(inode); | ||
74 | loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | ||
75 | int delalloc = -1, unmapped = -1, unwritten = -1; | ||
76 | |||
77 | if (page_has_buffers(page)) | ||
78 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
79 | |||
80 | bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); | ||
81 | ip = XFS_BHVTOI(bdp); | ||
82 | if (!ip->i_rwtrace) | ||
83 | return; | ||
84 | |||
85 | ktrace_enter(ip->i_rwtrace, | ||
86 | (void *)((unsigned long)tag), | ||
87 | (void *)ip, | ||
88 | (void *)inode, | ||
89 | (void *)page, | ||
90 | (void *)((unsigned long)mask), | ||
91 | (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), | ||
92 | (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), | ||
93 | (void *)((unsigned long)((isize >> 32) & 0xffffffff)), | ||
94 | (void *)((unsigned long)(isize & 0xffffffff)), | ||
95 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), | ||
96 | (void *)((unsigned long)(offset & 0xffffffff)), | ||
97 | (void *)((unsigned long)delalloc), | ||
98 | (void *)((unsigned long)unmapped), | ||
99 | (void *)((unsigned long)unwritten), | ||
100 | (void *)NULL, | ||
101 | (void *)NULL); | ||
102 | } | ||
103 | #else | ||
104 | #define xfs_page_trace(tag, inode, page, mask) | ||
105 | #endif | ||
106 | |||
107 | void | ||
108 | linvfs_unwritten_done( | ||
109 | struct buffer_head *bh, | ||
110 | int uptodate) | ||
111 | { | ||
112 | xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; | ||
113 | |||
114 | ASSERT(buffer_unwritten(bh)); | ||
115 | bh->b_end_io = NULL; | ||
116 | clear_buffer_unwritten(bh); | ||
117 | if (!uptodate) | ||
118 | pagebuf_ioerror(pb, EIO); | ||
119 | if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { | ||
120 | pagebuf_iodone(pb, 1, 1); | ||
121 | } | ||
122 | end_buffer_async_write(bh, uptodate); | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Issue transactions to convert a buffer range from unwritten | ||
127 | * to written extents (buffered IO). | ||
128 | */ | ||
129 | STATIC void | ||
130 | linvfs_unwritten_convert( | ||
131 | xfs_buf_t *bp) | ||
132 | { | ||
133 | vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); | ||
134 | int error; | ||
135 | |||
136 | BUG_ON(atomic_read(&bp->pb_hold) < 1); | ||
137 | VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp), | ||
138 | BMAPI_UNWRITTEN, NULL, NULL, error); | ||
139 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | ||
140 | XFS_BUF_CLR_IODONE_FUNC(bp); | ||
141 | XFS_BUF_UNDATAIO(bp); | ||
142 | iput(LINVFS_GET_IP(vp)); | ||
143 | pagebuf_iodone(bp, 0, 0); | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * Issue transactions to convert a buffer range from unwritten | ||
148 | * to written extents (direct IO). | ||
149 | */ | ||
150 | STATIC void | ||
151 | linvfs_unwritten_convert_direct( | ||
152 | struct inode *inode, | ||
153 | loff_t offset, | ||
154 | ssize_t size, | ||
155 | void *private) | ||
156 | { | ||
157 | ASSERT(!private || inode == (struct inode *)private); | ||
158 | |||
159 | /* private indicates an unwritten extent lay beneath this IO */ | ||
160 | if (private && size > 0) { | ||
161 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
162 | int error; | ||
163 | |||
164 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | STATIC int | ||
169 | xfs_map_blocks( | ||
170 | struct inode *inode, | ||
171 | loff_t offset, | ||
172 | ssize_t count, | ||
173 | xfs_iomap_t *mapp, | ||
174 | int flags) | ||
175 | { | ||
176 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
177 | int error, nmaps = 1; | ||
178 | |||
179 | VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error); | ||
180 | if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) | ||
181 | VMODIFY(vp); | ||
182 | return -error; | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * Finds the corresponding mapping in block @map array of the | ||
187 | * given @offset within a @page. | ||
188 | */ | ||
189 | STATIC xfs_iomap_t * | ||
190 | xfs_offset_to_map( | ||
191 | struct page *page, | ||
192 | xfs_iomap_t *iomapp, | ||
193 | unsigned long offset) | ||
194 | { | ||
195 | loff_t full_offset; /* offset from start of file */ | ||
196 | |||
197 | ASSERT(offset < PAGE_CACHE_SIZE); | ||
198 | |||
199 | full_offset = page->index; /* NB: using 64bit number */ | ||
200 | full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ | ||
201 | full_offset += offset; /* offset from page start */ | ||
202 | |||
203 | if (full_offset < iomapp->iomap_offset) | ||
204 | return NULL; | ||
205 | if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset) | ||
206 | return iomapp; | ||
207 | return NULL; | ||
208 | } | ||
209 | |||
210 | STATIC void | ||
211 | xfs_map_at_offset( | ||
212 | struct page *page, | ||
213 | struct buffer_head *bh, | ||
214 | unsigned long offset, | ||
215 | int block_bits, | ||
216 | xfs_iomap_t *iomapp) | ||
217 | { | ||
218 | xfs_daddr_t bn; | ||
219 | loff_t delta; | ||
220 | int sector_shift; | ||
221 | |||
222 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); | ||
223 | ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); | ||
224 | ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); | ||
225 | |||
226 | delta = page->index; | ||
227 | delta <<= PAGE_CACHE_SHIFT; | ||
228 | delta += offset; | ||
229 | delta -= iomapp->iomap_offset; | ||
230 | delta >>= block_bits; | ||
231 | |||
232 | sector_shift = block_bits - BBSHIFT; | ||
233 | bn = iomapp->iomap_bn >> sector_shift; | ||
234 | bn += delta; | ||
235 | BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); | ||
236 | ASSERT((bn << sector_shift) >= iomapp->iomap_bn); | ||
237 | |||
238 | lock_buffer(bh); | ||
239 | bh->b_blocknr = bn; | ||
240 | bh->b_bdev = iomapp->iomap_target->pbr_bdev; | ||
241 | set_buffer_mapped(bh); | ||
242 | clear_buffer_delay(bh); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Look for a page at index which is unlocked and contains our | ||
247 | * unwritten extent flagged buffers at its head. Returns page | ||
248 | * locked and with an extra reference count, and length of the | ||
249 | * unwritten extent component on this page that we can write, | ||
250 | * in units of filesystem blocks. | ||
251 | */ | ||
252 | STATIC struct page * | ||
253 | xfs_probe_unwritten_page( | ||
254 | struct address_space *mapping, | ||
255 | pgoff_t index, | ||
256 | xfs_iomap_t *iomapp, | ||
257 | xfs_buf_t *pb, | ||
258 | unsigned long max_offset, | ||
259 | unsigned long *fsbs, | ||
260 | unsigned int bbits) | ||
261 | { | ||
262 | struct page *page; | ||
263 | |||
264 | page = find_trylock_page(mapping, index); | ||
265 | if (!page) | ||
266 | return NULL; | ||
267 | if (PageWriteback(page)) | ||
268 | goto out; | ||
269 | |||
270 | if (page->mapping && page_has_buffers(page)) { | ||
271 | struct buffer_head *bh, *head; | ||
272 | unsigned long p_offset = 0; | ||
273 | |||
274 | *fsbs = 0; | ||
275 | bh = head = page_buffers(page); | ||
276 | do { | ||
277 | if (!buffer_unwritten(bh) || !buffer_uptodate(bh)) | ||
278 | break; | ||
279 | if (!xfs_offset_to_map(page, iomapp, p_offset)) | ||
280 | break; | ||
281 | if (p_offset >= max_offset) | ||
282 | break; | ||
283 | xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); | ||
284 | set_buffer_unwritten_io(bh); | ||
285 | bh->b_private = pb; | ||
286 | p_offset += bh->b_size; | ||
287 | (*fsbs)++; | ||
288 | } while ((bh = bh->b_this_page) != head); | ||
289 | |||
290 | if (p_offset) | ||
291 | return page; | ||
292 | } | ||
293 | |||
294 | out: | ||
295 | unlock_page(page); | ||
296 | return NULL; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * Look for a page at index which is unlocked and not mapped | ||
301 | * yet - clustering for mmap write case. | ||
302 | */ | ||
303 | STATIC unsigned int | ||
304 | xfs_probe_unmapped_page( | ||
305 | struct address_space *mapping, | ||
306 | pgoff_t index, | ||
307 | unsigned int pg_offset) | ||
308 | { | ||
309 | struct page *page; | ||
310 | int ret = 0; | ||
311 | |||
312 | page = find_trylock_page(mapping, index); | ||
313 | if (!page) | ||
314 | return 0; | ||
315 | if (PageWriteback(page)) | ||
316 | goto out; | ||
317 | |||
318 | if (page->mapping && PageDirty(page)) { | ||
319 | if (page_has_buffers(page)) { | ||
320 | struct buffer_head *bh, *head; | ||
321 | |||
322 | bh = head = page_buffers(page); | ||
323 | do { | ||
324 | if (buffer_mapped(bh) || !buffer_uptodate(bh)) | ||
325 | break; | ||
326 | ret += bh->b_size; | ||
327 | if (ret >= pg_offset) | ||
328 | break; | ||
329 | } while ((bh = bh->b_this_page) != head); | ||
330 | } else | ||
331 | ret = PAGE_CACHE_SIZE; | ||
332 | } | ||
333 | |||
334 | out: | ||
335 | unlock_page(page); | ||
336 | return ret; | ||
337 | } | ||
338 | |||
339 | STATIC unsigned int | ||
340 | xfs_probe_unmapped_cluster( | ||
341 | struct inode *inode, | ||
342 | struct page *startpage, | ||
343 | struct buffer_head *bh, | ||
344 | struct buffer_head *head) | ||
345 | { | ||
346 | pgoff_t tindex, tlast, tloff; | ||
347 | unsigned int pg_offset, len, total = 0; | ||
348 | struct address_space *mapping = inode->i_mapping; | ||
349 | |||
350 | /* First sum forwards in this page */ | ||
351 | do { | ||
352 | if (buffer_mapped(bh)) | ||
353 | break; | ||
354 | total += bh->b_size; | ||
355 | } while ((bh = bh->b_this_page) != head); | ||
356 | |||
357 | /* If we reached the end of the page, sum forwards in | ||
358 | * following pages. | ||
359 | */ | ||
360 | if (bh == head) { | ||
361 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
362 | /* Prune this back to avoid pathological behavior */ | ||
363 | tloff = min(tlast, startpage->index + 64); | ||
364 | for (tindex = startpage->index + 1; tindex < tloff; tindex++) { | ||
365 | len = xfs_probe_unmapped_page(mapping, tindex, | ||
366 | PAGE_CACHE_SIZE); | ||
367 | if (!len) | ||
368 | return total; | ||
369 | total += len; | ||
370 | } | ||
371 | if (tindex == tlast && | ||
372 | (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { | ||
373 | total += xfs_probe_unmapped_page(mapping, | ||
374 | tindex, pg_offset); | ||
375 | } | ||
376 | } | ||
377 | return total; | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * Probe for a given page (index) in the inode and test if it is delayed | ||
382 | * and without unwritten buffers. Returns page locked and with an extra | ||
383 | * reference count. | ||
384 | */ | ||
385 | STATIC struct page * | ||
386 | xfs_probe_delalloc_page( | ||
387 | struct inode *inode, | ||
388 | pgoff_t index) | ||
389 | { | ||
390 | struct page *page; | ||
391 | |||
392 | page = find_trylock_page(inode->i_mapping, index); | ||
393 | if (!page) | ||
394 | return NULL; | ||
395 | if (PageWriteback(page)) | ||
396 | goto out; | ||
397 | |||
398 | if (page->mapping && page_has_buffers(page)) { | ||
399 | struct buffer_head *bh, *head; | ||
400 | int acceptable = 0; | ||
401 | |||
402 | bh = head = page_buffers(page); | ||
403 | do { | ||
404 | if (buffer_unwritten(bh)) { | ||
405 | acceptable = 0; | ||
406 | break; | ||
407 | } else if (buffer_delay(bh)) { | ||
408 | acceptable = 1; | ||
409 | } | ||
410 | } while ((bh = bh->b_this_page) != head); | ||
411 | |||
412 | if (acceptable) | ||
413 | return page; | ||
414 | } | ||
415 | |||
416 | out: | ||
417 | unlock_page(page); | ||
418 | return NULL; | ||
419 | } | ||
420 | |||
421 | STATIC int | ||
422 | xfs_map_unwritten( | ||
423 | struct inode *inode, | ||
424 | struct page *start_page, | ||
425 | struct buffer_head *head, | ||
426 | struct buffer_head *curr, | ||
427 | unsigned long p_offset, | ||
428 | int block_bits, | ||
429 | xfs_iomap_t *iomapp, | ||
430 | struct writeback_control *wbc, | ||
431 | int startio, | ||
432 | int all_bh) | ||
433 | { | ||
434 | struct buffer_head *bh = curr; | ||
435 | xfs_iomap_t *tmp; | ||
436 | xfs_buf_t *pb; | ||
437 | loff_t offset, size; | ||
438 | unsigned long nblocks = 0; | ||
439 | |||
440 | offset = start_page->index; | ||
441 | offset <<= PAGE_CACHE_SHIFT; | ||
442 | offset += p_offset; | ||
443 | |||
444 | /* get an "empty" pagebuf to manage IO completion | ||
445 | * Proper values will be set before returning */ | ||
446 | pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0); | ||
447 | if (!pb) | ||
448 | return -EAGAIN; | ||
449 | |||
450 | /* Take a reference to the inode to prevent it from | ||
451 | * being reclaimed while we have outstanding unwritten | ||
452 | * extent IO on it. | ||
453 | */ | ||
454 | if ((igrab(inode)) != inode) { | ||
455 | pagebuf_free(pb); | ||
456 | return -EAGAIN; | ||
457 | } | ||
458 | |||
459 | /* Set the count to 1 initially, this will stop an I/O | ||
460 | * completion callout which happens before we have started | ||
461 | * all the I/O from calling pagebuf_iodone too early. | ||
462 | */ | ||
463 | atomic_set(&pb->pb_io_remaining, 1); | ||
464 | |||
465 | /* First map forwards in the page consecutive buffers | ||
466 | * covering this unwritten extent | ||
467 | */ | ||
468 | do { | ||
469 | if (!buffer_unwritten(bh)) | ||
470 | break; | ||
471 | tmp = xfs_offset_to_map(start_page, iomapp, p_offset); | ||
472 | if (!tmp) | ||
473 | break; | ||
474 | xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); | ||
475 | set_buffer_unwritten_io(bh); | ||
476 | bh->b_private = pb; | ||
477 | p_offset += bh->b_size; | ||
478 | nblocks++; | ||
479 | } while ((bh = bh->b_this_page) != head); | ||
480 | |||
481 | atomic_add(nblocks, &pb->pb_io_remaining); | ||
482 | |||
483 | /* If we reached the end of the page, map forwards in any | ||
484 | * following pages which are also covered by this extent. | ||
485 | */ | ||
486 | if (bh == head) { | ||
487 | struct address_space *mapping = inode->i_mapping; | ||
488 | pgoff_t tindex, tloff, tlast; | ||
489 | unsigned long bs; | ||
490 | unsigned int pg_offset, bbits = inode->i_blkbits; | ||
491 | struct page *page; | ||
492 | |||
493 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
494 | tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT; | ||
495 | tloff = min(tlast, tloff); | ||
496 | for (tindex = start_page->index + 1; tindex < tloff; tindex++) { | ||
497 | page = xfs_probe_unwritten_page(mapping, | ||
498 | tindex, iomapp, pb, | ||
499 | PAGE_CACHE_SIZE, &bs, bbits); | ||
500 | if (!page) | ||
501 | break; | ||
502 | nblocks += bs; | ||
503 | atomic_add(bs, &pb->pb_io_remaining); | ||
504 | xfs_convert_page(inode, page, iomapp, wbc, pb, | ||
505 | startio, all_bh); | ||
506 | /* stop if converting the next page might add | ||
507 | * enough blocks that the corresponding byte | ||
508 | * count won't fit in our ulong page buf length */ | ||
509 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | ||
510 | goto enough; | ||
511 | } | ||
512 | |||
513 | if (tindex == tlast && | ||
514 | (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { | ||
515 | page = xfs_probe_unwritten_page(mapping, | ||
516 | tindex, iomapp, pb, | ||
517 | pg_offset, &bs, bbits); | ||
518 | if (page) { | ||
519 | nblocks += bs; | ||
520 | atomic_add(bs, &pb->pb_io_remaining); | ||
521 | xfs_convert_page(inode, page, iomapp, wbc, pb, | ||
522 | startio, all_bh); | ||
523 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | ||
524 | goto enough; | ||
525 | } | ||
526 | } | ||
527 | } | ||
528 | |||
529 | enough: | ||
530 | size = nblocks; /* NB: using 64bit number here */ | ||
531 | size <<= block_bits; /* convert fsb's to byte range */ | ||
532 | |||
533 | XFS_BUF_DATAIO(pb); | ||
534 | XFS_BUF_ASYNC(pb); | ||
535 | XFS_BUF_SET_SIZE(pb, size); | ||
536 | XFS_BUF_SET_COUNT(pb, size); | ||
537 | XFS_BUF_SET_OFFSET(pb, offset); | ||
538 | XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)); | ||
539 | XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert); | ||
540 | |||
541 | if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { | ||
542 | pagebuf_iodone(pb, 1, 1); | ||
543 | } | ||
544 | |||
545 | return 0; | ||
546 | } | ||
547 | |||
548 | STATIC void | ||
549 | xfs_submit_page( | ||
550 | struct page *page, | ||
551 | struct writeback_control *wbc, | ||
552 | struct buffer_head *bh_arr[], | ||
553 | int bh_count, | ||
554 | int probed_page, | ||
555 | int clear_dirty) | ||
556 | { | ||
557 | struct buffer_head *bh; | ||
558 | int i; | ||
559 | |||
560 | BUG_ON(PageWriteback(page)); | ||
561 | set_page_writeback(page); | ||
562 | if (clear_dirty) | ||
563 | clear_page_dirty(page); | ||
564 | unlock_page(page); | ||
565 | |||
566 | if (bh_count) { | ||
567 | for (i = 0; i < bh_count; i++) { | ||
568 | bh = bh_arr[i]; | ||
569 | mark_buffer_async_write(bh); | ||
570 | if (buffer_unwritten(bh)) | ||
571 | set_buffer_unwritten_io(bh); | ||
572 | set_buffer_uptodate(bh); | ||
573 | clear_buffer_dirty(bh); | ||
574 | } | ||
575 | |||
576 | for (i = 0; i < bh_count; i++) | ||
577 | submit_bh(WRITE, bh_arr[i]); | ||
578 | |||
579 | if (probed_page && clear_dirty) | ||
580 | wbc->nr_to_write--; /* Wrote an "extra" page */ | ||
581 | } else { | ||
582 | end_page_writeback(page); | ||
583 | wbc->pages_skipped++; /* We didn't write this page */ | ||
584 | } | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * Allocate & map buffers for page given the extent map. Write it out. | ||
589 | * except for the original page of a writepage, this is called on | ||
590 | * delalloc/unwritten pages only, for the original page it is possible | ||
591 | * that the page has no mapping at all. | ||
592 | */ | ||
593 | STATIC void | ||
594 | xfs_convert_page( | ||
595 | struct inode *inode, | ||
596 | struct page *page, | ||
597 | xfs_iomap_t *iomapp, | ||
598 | struct writeback_control *wbc, | ||
599 | void *private, | ||
600 | int startio, | ||
601 | int all_bh) | ||
602 | { | ||
603 | struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; | ||
604 | xfs_iomap_t *mp = iomapp, *tmp; | ||
605 | unsigned long end, offset; | ||
606 | pgoff_t end_index; | ||
607 | int i = 0, index = 0; | ||
608 | int bbits = inode->i_blkbits; | ||
609 | |||
610 | end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
611 | if (page->index < end_index) { | ||
612 | end = PAGE_CACHE_SIZE; | ||
613 | } else { | ||
614 | end = i_size_read(inode) & (PAGE_CACHE_SIZE-1); | ||
615 | } | ||
616 | bh = head = page_buffers(page); | ||
617 | do { | ||
618 | offset = i << bbits; | ||
619 | if (offset >= end) | ||
620 | break; | ||
621 | if (!(PageUptodate(page) || buffer_uptodate(bh))) | ||
622 | continue; | ||
623 | if (buffer_mapped(bh) && all_bh && | ||
624 | !(buffer_unwritten(bh) || buffer_delay(bh))) { | ||
625 | if (startio) { | ||
626 | lock_buffer(bh); | ||
627 | bh_arr[index++] = bh; | ||
628 | } | ||
629 | continue; | ||
630 | } | ||
631 | tmp = xfs_offset_to_map(page, mp, offset); | ||
632 | if (!tmp) | ||
633 | continue; | ||
634 | ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); | ||
635 | ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); | ||
636 | |||
637 | /* If this is a new unwritten extent buffer (i.e. one | ||
638 | * that we haven't passed in private data for, we must | ||
639 | * now map this buffer too. | ||
640 | */ | ||
641 | if (buffer_unwritten(bh) && !bh->b_end_io) { | ||
642 | ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); | ||
643 | xfs_map_unwritten(inode, page, head, bh, offset, | ||
644 | bbits, tmp, wbc, startio, all_bh); | ||
645 | } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { | ||
646 | xfs_map_at_offset(page, bh, offset, bbits, tmp); | ||
647 | if (buffer_unwritten(bh)) { | ||
648 | set_buffer_unwritten_io(bh); | ||
649 | bh->b_private = private; | ||
650 | ASSERT(private); | ||
651 | } | ||
652 | } | ||
653 | if (startio) { | ||
654 | bh_arr[index++] = bh; | ||
655 | } else { | ||
656 | set_buffer_dirty(bh); | ||
657 | unlock_buffer(bh); | ||
658 | mark_buffer_dirty(bh); | ||
659 | } | ||
660 | } while (i++, (bh = bh->b_this_page) != head); | ||
661 | |||
662 | if (startio) { | ||
663 | xfs_submit_page(page, wbc, bh_arr, index, 1, index == i); | ||
664 | } else { | ||
665 | unlock_page(page); | ||
666 | } | ||
667 | } | ||
668 | |||
669 | /* | ||
670 | * Convert & write out a cluster of pages in the same extent as defined | ||
671 | * by mp and following the start page. | ||
672 | */ | ||
673 | STATIC void | ||
674 | xfs_cluster_write( | ||
675 | struct inode *inode, | ||
676 | pgoff_t tindex, | ||
677 | xfs_iomap_t *iomapp, | ||
678 | struct writeback_control *wbc, | ||
679 | int startio, | ||
680 | int all_bh, | ||
681 | pgoff_t tlast) | ||
682 | { | ||
683 | struct page *page; | ||
684 | |||
685 | for (; tindex <= tlast; tindex++) { | ||
686 | page = xfs_probe_delalloc_page(inode, tindex); | ||
687 | if (!page) | ||
688 | break; | ||
689 | xfs_convert_page(inode, page, iomapp, wbc, NULL, | ||
690 | startio, all_bh); | ||
691 | } | ||
692 | } | ||
693 | |||
694 | /* | ||
695 | * Calling this without startio set means we are being asked to make a dirty | ||
696 | * page ready for freeing it's buffers. When called with startio set then | ||
697 | * we are coming from writepage. | ||
698 | * | ||
699 | * When called with startio set it is important that we write the WHOLE | ||
700 | * page if possible. | ||
701 | * The bh->b_state's cannot know if any of the blocks or which block for | ||
702 | * that matter are dirty due to mmap writes, and therefore bh uptodate is | ||
703 | * only vaild if the page itself isn't completely uptodate. Some layers | ||
704 | * may clear the page dirty flag prior to calling write page, under the | ||
705 | * assumption the entire page will be written out; by not writing out the | ||
706 | * whole page the page can be reused before all valid dirty data is | ||
707 | * written out. Note: in the case of a page that has been dirty'd by | ||
708 | * mapwrite and but partially setup by block_prepare_write the | ||
709 | * bh->b_states's will not agree and only ones setup by BPW/BCW will have | ||
710 | * valid state, thus the whole page must be written out thing. | ||
711 | */ | ||
712 | |||
713 | STATIC int | ||
714 | xfs_page_state_convert( | ||
715 | struct inode *inode, | ||
716 | struct page *page, | ||
717 | struct writeback_control *wbc, | ||
718 | int startio, | ||
719 | int unmapped) /* also implies page uptodate */ | ||
720 | { | ||
721 | struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; | ||
722 | xfs_iomap_t *iomp, iomap; | ||
723 | loff_t offset; | ||
724 | unsigned long p_offset = 0; | ||
725 | __uint64_t end_offset; | ||
726 | pgoff_t end_index, last_index, tlast; | ||
727 | int len, err, i, cnt = 0, uptodate = 1; | ||
728 | int flags = startio ? 0 : BMAPI_TRYLOCK; | ||
729 | int page_dirty, delalloc = 0; | ||
730 | |||
731 | /* Is this page beyond the end of the file? */ | ||
732 | offset = i_size_read(inode); | ||
733 | end_index = offset >> PAGE_CACHE_SHIFT; | ||
734 | last_index = (offset - 1) >> PAGE_CACHE_SHIFT; | ||
735 | if (page->index >= end_index) { | ||
736 | if ((page->index >= end_index + 1) || | ||
737 | !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { | ||
738 | err = -EIO; | ||
739 | goto error; | ||
740 | } | ||
741 | } | ||
742 | |||
743 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | ||
744 | end_offset = min_t(unsigned long long, | ||
745 | offset + PAGE_CACHE_SIZE, i_size_read(inode)); | ||
746 | |||
747 | bh = head = page_buffers(page); | ||
748 | iomp = NULL; | ||
749 | |||
750 | /* | ||
751 | * page_dirty is initially a count of buffers on the page and | ||
752 | * is decrememted as we move each into a cleanable state. | ||
753 | */ | ||
754 | len = bh->b_size; | ||
755 | page_dirty = PAGE_CACHE_SIZE / len; | ||
756 | |||
757 | do { | ||
758 | if (offset >= end_offset) | ||
759 | break; | ||
760 | if (!buffer_uptodate(bh)) | ||
761 | uptodate = 0; | ||
762 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) | ||
763 | continue; | ||
764 | |||
765 | if (iomp) { | ||
766 | iomp = xfs_offset_to_map(page, &iomap, p_offset); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * First case, map an unwritten extent and prepare for | ||
771 | * extent state conversion transaction on completion. | ||
772 | */ | ||
773 | if (buffer_unwritten(bh)) { | ||
774 | if (!startio) | ||
775 | continue; | ||
776 | if (!iomp) { | ||
777 | err = xfs_map_blocks(inode, offset, len, &iomap, | ||
778 | BMAPI_READ|BMAPI_IGNSTATE); | ||
779 | if (err) { | ||
780 | goto error; | ||
781 | } | ||
782 | iomp = xfs_offset_to_map(page, &iomap, | ||
783 | p_offset); | ||
784 | } | ||
785 | if (iomp) { | ||
786 | if (!bh->b_end_io) { | ||
787 | err = xfs_map_unwritten(inode, page, | ||
788 | head, bh, p_offset, | ||
789 | inode->i_blkbits, iomp, | ||
790 | wbc, startio, unmapped); | ||
791 | if (err) { | ||
792 | goto error; | ||
793 | } | ||
794 | } else { | ||
795 | set_bit(BH_Lock, &bh->b_state); | ||
796 | } | ||
797 | BUG_ON(!buffer_locked(bh)); | ||
798 | bh_arr[cnt++] = bh; | ||
799 | page_dirty--; | ||
800 | } | ||
801 | /* | ||
802 | * Second case, allocate space for a delalloc buffer. | ||
803 | * We can return EAGAIN here in the release page case. | ||
804 | */ | ||
805 | } else if (buffer_delay(bh)) { | ||
806 | if (!iomp) { | ||
807 | delalloc = 1; | ||
808 | err = xfs_map_blocks(inode, offset, len, &iomap, | ||
809 | BMAPI_ALLOCATE | flags); | ||
810 | if (err) { | ||
811 | goto error; | ||
812 | } | ||
813 | iomp = xfs_offset_to_map(page, &iomap, | ||
814 | p_offset); | ||
815 | } | ||
816 | if (iomp) { | ||
817 | xfs_map_at_offset(page, bh, p_offset, | ||
818 | inode->i_blkbits, iomp); | ||
819 | if (startio) { | ||
820 | bh_arr[cnt++] = bh; | ||
821 | } else { | ||
822 | set_buffer_dirty(bh); | ||
823 | unlock_buffer(bh); | ||
824 | mark_buffer_dirty(bh); | ||
825 | } | ||
826 | page_dirty--; | ||
827 | } | ||
828 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | ||
829 | (unmapped || startio)) { | ||
830 | |||
831 | if (!buffer_mapped(bh)) { | ||
832 | int size; | ||
833 | |||
834 | /* | ||
835 | * Getting here implies an unmapped buffer | ||
836 | * was found, and we are in a path where we | ||
837 | * need to write the whole page out. | ||
838 | */ | ||
839 | if (!iomp) { | ||
840 | size = xfs_probe_unmapped_cluster( | ||
841 | inode, page, bh, head); | ||
842 | err = xfs_map_blocks(inode, offset, | ||
843 | size, &iomap, | ||
844 | BMAPI_WRITE|BMAPI_MMAP); | ||
845 | if (err) { | ||
846 | goto error; | ||
847 | } | ||
848 | iomp = xfs_offset_to_map(page, &iomap, | ||
849 | p_offset); | ||
850 | } | ||
851 | if (iomp) { | ||
852 | xfs_map_at_offset(page, | ||
853 | bh, p_offset, | ||
854 | inode->i_blkbits, iomp); | ||
855 | if (startio) { | ||
856 | bh_arr[cnt++] = bh; | ||
857 | } else { | ||
858 | set_buffer_dirty(bh); | ||
859 | unlock_buffer(bh); | ||
860 | mark_buffer_dirty(bh); | ||
861 | } | ||
862 | page_dirty--; | ||
863 | } | ||
864 | } else if (startio) { | ||
865 | if (buffer_uptodate(bh) && | ||
866 | !test_and_set_bit(BH_Lock, &bh->b_state)) { | ||
867 | bh_arr[cnt++] = bh; | ||
868 | page_dirty--; | ||
869 | } | ||
870 | } | ||
871 | } | ||
872 | } while (offset += len, p_offset += len, | ||
873 | ((bh = bh->b_this_page) != head)); | ||
874 | |||
875 | if (uptodate && bh == head) | ||
876 | SetPageUptodate(page); | ||
877 | |||
878 | if (startio) | ||
879 | xfs_submit_page(page, wbc, bh_arr, cnt, 0, 1); | ||
880 | |||
881 | if (iomp) { | ||
882 | tlast = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> | ||
883 | PAGE_CACHE_SHIFT; | ||
884 | if (delalloc && (tlast > last_index)) | ||
885 | tlast = last_index; | ||
886 | xfs_cluster_write(inode, page->index + 1, iomp, wbc, | ||
887 | startio, unmapped, tlast); | ||
888 | } | ||
889 | |||
890 | return page_dirty; | ||
891 | |||
892 | error: | ||
893 | for (i = 0; i < cnt; i++) { | ||
894 | unlock_buffer(bh_arr[i]); | ||
895 | } | ||
896 | |||
897 | /* | ||
898 | * If it's delalloc and we have nowhere to put it, | ||
899 | * throw it away, unless the lower layers told | ||
900 | * us to try again. | ||
901 | */ | ||
902 | if (err != -EAGAIN) { | ||
903 | if (!unmapped) { | ||
904 | block_invalidatepage(page, 0); | ||
905 | } | ||
906 | ClearPageUptodate(page); | ||
907 | } | ||
908 | return err; | ||
909 | } | ||
910 | |||
911 | STATIC int | ||
912 | __linvfs_get_block( | ||
913 | struct inode *inode, | ||
914 | sector_t iblock, | ||
915 | unsigned long blocks, | ||
916 | struct buffer_head *bh_result, | ||
917 | int create, | ||
918 | int direct, | ||
919 | bmapi_flags_t flags) | ||
920 | { | ||
921 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
922 | xfs_iomap_t iomap; | ||
923 | int retpbbm = 1; | ||
924 | int error; | ||
925 | ssize_t size; | ||
926 | loff_t offset = (loff_t)iblock << inode->i_blkbits; | ||
927 | |||
928 | if (blocks) | ||
929 | size = blocks << inode->i_blkbits; | ||
930 | else | ||
931 | size = 1 << inode->i_blkbits; | ||
932 | |||
933 | VOP_BMAP(vp, offset, size, | ||
934 | create ? flags : BMAPI_READ, &iomap, &retpbbm, error); | ||
935 | if (error) | ||
936 | return -error; | ||
937 | |||
938 | if (retpbbm == 0) | ||
939 | return 0; | ||
940 | |||
941 | if (iomap.iomap_bn != IOMAP_DADDR_NULL) { | ||
942 | xfs_daddr_t bn; | ||
943 | loff_t delta; | ||
944 | |||
945 | /* For unwritten extents do not report a disk address on | ||
946 | * the read case (treat as if we're reading into a hole). | ||
947 | */ | ||
948 | if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { | ||
949 | delta = offset - iomap.iomap_offset; | ||
950 | delta >>= inode->i_blkbits; | ||
951 | |||
952 | bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT); | ||
953 | bn += delta; | ||
954 | BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME)); | ||
955 | bh_result->b_blocknr = bn; | ||
956 | set_buffer_mapped(bh_result); | ||
957 | } | ||
958 | if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { | ||
959 | if (direct) | ||
960 | bh_result->b_private = inode; | ||
961 | set_buffer_unwritten(bh_result); | ||
962 | set_buffer_delay(bh_result); | ||
963 | } | ||
964 | } | ||
965 | |||
966 | /* If this is a realtime file, data might be on a new device */ | ||
967 | bh_result->b_bdev = iomap.iomap_target->pbr_bdev; | ||
968 | |||
969 | /* If we previously allocated a block out beyond eof and | ||
970 | * we are now coming back to use it then we will need to | ||
971 | * flag it as new even if it has a disk address. | ||
972 | */ | ||
973 | if (create && | ||
974 | ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || | ||
975 | (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) { | ||
976 | set_buffer_new(bh_result); | ||
977 | } | ||
978 | |||
979 | if (iomap.iomap_flags & IOMAP_DELAY) { | ||
980 | BUG_ON(direct); | ||
981 | if (create) { | ||
982 | set_buffer_uptodate(bh_result); | ||
983 | set_buffer_mapped(bh_result); | ||
984 | set_buffer_delay(bh_result); | ||
985 | } | ||
986 | } | ||
987 | |||
988 | if (blocks) { | ||
989 | bh_result->b_size = (ssize_t)min( | ||
990 | (loff_t)(iomap.iomap_bsize - iomap.iomap_delta), | ||
991 | (loff_t)(blocks << inode->i_blkbits)); | ||
992 | } | ||
993 | |||
994 | return 0; | ||
995 | } | ||
996 | |||
997 | int | ||
998 | linvfs_get_block( | ||
999 | struct inode *inode, | ||
1000 | sector_t iblock, | ||
1001 | struct buffer_head *bh_result, | ||
1002 | int create) | ||
1003 | { | ||
1004 | return __linvfs_get_block(inode, iblock, 0, bh_result, | ||
1005 | create, 0, BMAPI_WRITE); | ||
1006 | } | ||
1007 | |||
1008 | STATIC int | ||
1009 | linvfs_get_blocks_direct( | ||
1010 | struct inode *inode, | ||
1011 | sector_t iblock, | ||
1012 | unsigned long max_blocks, | ||
1013 | struct buffer_head *bh_result, | ||
1014 | int create) | ||
1015 | { | ||
1016 | return __linvfs_get_block(inode, iblock, max_blocks, bh_result, | ||
1017 | create, 1, BMAPI_WRITE|BMAPI_DIRECT); | ||
1018 | } | ||
1019 | |||
1020 | STATIC ssize_t | ||
1021 | linvfs_direct_IO( | ||
1022 | int rw, | ||
1023 | struct kiocb *iocb, | ||
1024 | const struct iovec *iov, | ||
1025 | loff_t offset, | ||
1026 | unsigned long nr_segs) | ||
1027 | { | ||
1028 | struct file *file = iocb->ki_filp; | ||
1029 | struct inode *inode = file->f_mapping->host; | ||
1030 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
1031 | xfs_iomap_t iomap; | ||
1032 | int maps = 1; | ||
1033 | int error; | ||
1034 | |||
1035 | VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); | ||
1036 | if (error) | ||
1037 | return -error; | ||
1038 | |||
1039 | return blockdev_direct_IO_own_locking(rw, iocb, inode, | ||
1040 | iomap.iomap_target->pbr_bdev, | ||
1041 | iov, offset, nr_segs, | ||
1042 | linvfs_get_blocks_direct, | ||
1043 | linvfs_unwritten_convert_direct); | ||
1044 | } | ||
1045 | |||
1046 | |||
1047 | STATIC sector_t | ||
1048 | linvfs_bmap( | ||
1049 | struct address_space *mapping, | ||
1050 | sector_t block) | ||
1051 | { | ||
1052 | struct inode *inode = (struct inode *)mapping->host; | ||
1053 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
1054 | int error; | ||
1055 | |||
1056 | vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address); | ||
1057 | |||
1058 | VOP_RWLOCK(vp, VRWLOCK_READ); | ||
1059 | VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); | ||
1060 | VOP_RWUNLOCK(vp, VRWLOCK_READ); | ||
1061 | return generic_block_bmap(mapping, block, linvfs_get_block); | ||
1062 | } | ||
1063 | |||
1064 | STATIC int | ||
1065 | linvfs_readpage( | ||
1066 | struct file *unused, | ||
1067 | struct page *page) | ||
1068 | { | ||
1069 | return mpage_readpage(page, linvfs_get_block); | ||
1070 | } | ||
1071 | |||
1072 | STATIC int | ||
1073 | linvfs_readpages( | ||
1074 | struct file *unused, | ||
1075 | struct address_space *mapping, | ||
1076 | struct list_head *pages, | ||
1077 | unsigned nr_pages) | ||
1078 | { | ||
1079 | return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block); | ||
1080 | } | ||
1081 | |||
1082 | STATIC void | ||
1083 | xfs_count_page_state( | ||
1084 | struct page *page, | ||
1085 | int *delalloc, | ||
1086 | int *unmapped, | ||
1087 | int *unwritten) | ||
1088 | { | ||
1089 | struct buffer_head *bh, *head; | ||
1090 | |||
1091 | *delalloc = *unmapped = *unwritten = 0; | ||
1092 | |||
1093 | bh = head = page_buffers(page); | ||
1094 | do { | ||
1095 | if (buffer_uptodate(bh) && !buffer_mapped(bh)) | ||
1096 | (*unmapped) = 1; | ||
1097 | else if (buffer_unwritten(bh) && !buffer_delay(bh)) | ||
1098 | clear_buffer_unwritten(bh); | ||
1099 | else if (buffer_unwritten(bh)) | ||
1100 | (*unwritten) = 1; | ||
1101 | else if (buffer_delay(bh)) | ||
1102 | (*delalloc) = 1; | ||
1103 | } while ((bh = bh->b_this_page) != head); | ||
1104 | } | ||
1105 | |||
1106 | |||
1107 | /* | ||
1108 | * writepage: Called from one of two places: | ||
1109 | * | ||
1110 | * 1. we are flushing a delalloc buffer head. | ||
1111 | * | ||
1112 | * 2. we are writing out a dirty page. Typically the page dirty | ||
1113 | * state is cleared before we get here. In this case is it | ||
1114 | * conceivable we have no buffer heads. | ||
1115 | * | ||
1116 | * For delalloc space on the page we need to allocate space and | ||
1117 | * flush it. For unmapped buffer heads on the page we should | ||
1118 | * allocate space if the page is uptodate. For any other dirty | ||
1119 | * buffer heads on the page we should flush them. | ||
1120 | * | ||
1121 | * If we detect that a transaction would be required to flush | ||
1122 | * the page, we have to check the process flags first, if we | ||
1123 | * are already in a transaction or disk I/O during allocations | ||
1124 | * is off, we need to fail the writepage and redirty the page. | ||
1125 | */ | ||
1126 | |||
1127 | STATIC int | ||
1128 | linvfs_writepage( | ||
1129 | struct page *page, | ||
1130 | struct writeback_control *wbc) | ||
1131 | { | ||
1132 | int error; | ||
1133 | int need_trans; | ||
1134 | int delalloc, unmapped, unwritten; | ||
1135 | struct inode *inode = page->mapping->host; | ||
1136 | |||
1137 | xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); | ||
1138 | |||
1139 | /* | ||
1140 | * We need a transaction if: | ||
1141 | * 1. There are delalloc buffers on the page | ||
1142 | * 2. The page is uptodate and we have unmapped buffers | ||
1143 | * 3. The page is uptodate and we have no buffers | ||
1144 | * 4. There are unwritten buffers on the page | ||
1145 | */ | ||
1146 | |||
1147 | if (!page_has_buffers(page)) { | ||
1148 | unmapped = 1; | ||
1149 | need_trans = 1; | ||
1150 | } else { | ||
1151 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
1152 | if (!PageUptodate(page)) | ||
1153 | unmapped = 0; | ||
1154 | need_trans = delalloc + unmapped + unwritten; | ||
1155 | } | ||
1156 | |||
1157 | /* | ||
1158 | * If we need a transaction and the process flags say | ||
1159 | * we are already in a transaction, or no IO is allowed | ||
1160 | * then mark the page dirty again and leave the page | ||
1161 | * as is. | ||
1162 | */ | ||
1163 | if (PFLAGS_TEST_FSTRANS() && need_trans) | ||
1164 | goto out_fail; | ||
1165 | |||
1166 | /* | ||
1167 | * Delay hooking up buffer heads until we have | ||
1168 | * made our go/no-go decision. | ||
1169 | */ | ||
1170 | if (!page_has_buffers(page)) | ||
1171 | create_empty_buffers(page, 1 << inode->i_blkbits, 0); | ||
1172 | |||
1173 | /* | ||
1174 | * Convert delayed allocate, unwritten or unmapped space | ||
1175 | * to real space and flush out to disk. | ||
1176 | */ | ||
1177 | error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); | ||
1178 | if (error == -EAGAIN) | ||
1179 | goto out_fail; | ||
1180 | if (unlikely(error < 0)) | ||
1181 | goto out_unlock; | ||
1182 | |||
1183 | return 0; | ||
1184 | |||
1185 | out_fail: | ||
1186 | redirty_page_for_writepage(wbc, page); | ||
1187 | unlock_page(page); | ||
1188 | return 0; | ||
1189 | out_unlock: | ||
1190 | unlock_page(page); | ||
1191 | return error; | ||
1192 | } | ||
1193 | |||
1194 | /* | ||
1195 | * Called to move a page into cleanable state - and from there | ||
1196 | * to be released. Possibly the page is already clean. We always | ||
1197 | * have buffer heads in this call. | ||
1198 | * | ||
1199 | * Returns 0 if the page is ok to release, 1 otherwise. | ||
1200 | * | ||
1201 | * Possible scenarios are: | ||
1202 | * | ||
1203 | * 1. We are being called to release a page which has been written | ||
1204 | * to via regular I/O. buffer heads will be dirty and possibly | ||
1205 | * delalloc. If no delalloc buffer heads in this case then we | ||
1206 | * can just return zero. | ||
1207 | * | ||
1208 | * 2. We are called to release a page which has been written via | ||
1209 | * mmap, all we need to do is ensure there is no delalloc | ||
1210 | * state in the buffer heads, if not we can let the caller | ||
1211 | * free them and we should come back later via writepage. | ||
1212 | */ | ||
1213 | STATIC int | ||
1214 | linvfs_release_page( | ||
1215 | struct page *page, | ||
1216 | int gfp_mask) | ||
1217 | { | ||
1218 | struct inode *inode = page->mapping->host; | ||
1219 | int dirty, delalloc, unmapped, unwritten; | ||
1220 | struct writeback_control wbc = { | ||
1221 | .sync_mode = WB_SYNC_ALL, | ||
1222 | .nr_to_write = 1, | ||
1223 | }; | ||
1224 | |||
1225 | xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask); | ||
1226 | |||
1227 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
1228 | if (!delalloc && !unwritten) | ||
1229 | goto free_buffers; | ||
1230 | |||
1231 | if (!(gfp_mask & __GFP_FS)) | ||
1232 | return 0; | ||
1233 | |||
1234 | /* If we are already inside a transaction or the thread cannot | ||
1235 | * do I/O, we cannot release this page. | ||
1236 | */ | ||
1237 | if (PFLAGS_TEST_FSTRANS()) | ||
1238 | return 0; | ||
1239 | |||
1240 | /* | ||
1241 | * Convert delalloc space to real space, do not flush the | ||
1242 | * data out to disk, that will be done by the caller. | ||
1243 | * Never need to allocate space here - we will always | ||
1244 | * come back to writepage in that case. | ||
1245 | */ | ||
1246 | dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); | ||
1247 | if (dirty == 0 && !unwritten) | ||
1248 | goto free_buffers; | ||
1249 | return 0; | ||
1250 | |||
1251 | free_buffers: | ||
1252 | return try_to_free_buffers(page); | ||
1253 | } | ||
1254 | |||
1255 | STATIC int | ||
1256 | linvfs_prepare_write( | ||
1257 | struct file *file, | ||
1258 | struct page *page, | ||
1259 | unsigned int from, | ||
1260 | unsigned int to) | ||
1261 | { | ||
1262 | return block_prepare_write(page, from, to, linvfs_get_block); | ||
1263 | } | ||
1264 | |||
1265 | struct address_space_operations linvfs_aops = { | ||
1266 | .readpage = linvfs_readpage, | ||
1267 | .readpages = linvfs_readpages, | ||
1268 | .writepage = linvfs_writepage, | ||
1269 | .sync_page = block_sync_page, | ||
1270 | .releasepage = linvfs_release_page, | ||
1271 | .prepare_write = linvfs_prepare_write, | ||
1272 | .commit_write = generic_commit_write, | ||
1273 | .bmap = linvfs_bmap, | ||
1274 | .direct_IO = linvfs_direct_IO, | ||
1275 | }; | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c new file mode 100644 index 000000000000..23e0eb67fc25 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -0,0 +1,1980 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * The xfs_buf.c code provides an abstract buffer cache model on top | ||
35 | * of the Linux page cache. Cached metadata blocks for a file system | ||
36 | * are hashed to the inode for the block device. xfs_buf.c assembles | ||
37 | * buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O. | ||
38 | * | ||
39 | * Written by Steve Lord, Jim Mostek, Russell Cattelan | ||
40 | * and Rajagopal Ananthanarayanan ("ananth") at SGI. | ||
41 | * | ||
42 | */ | ||
43 | |||
44 | #include <linux/stddef.h> | ||
45 | #include <linux/errno.h> | ||
46 | #include <linux/slab.h> | ||
47 | #include <linux/pagemap.h> | ||
48 | #include <linux/init.h> | ||
49 | #include <linux/vmalloc.h> | ||
50 | #include <linux/bio.h> | ||
51 | #include <linux/sysctl.h> | ||
52 | #include <linux/proc_fs.h> | ||
53 | #include <linux/workqueue.h> | ||
54 | #include <linux/percpu.h> | ||
55 | #include <linux/blkdev.h> | ||
56 | #include <linux/hash.h> | ||
57 | |||
58 | #include "xfs_linux.h" | ||
59 | |||
60 | /* | ||
61 | * File wide globals | ||
62 | */ | ||
63 | |||
64 | STATIC kmem_cache_t *pagebuf_cache; | ||
65 | STATIC kmem_shaker_t pagebuf_shake; | ||
66 | STATIC int pagebuf_daemon_wakeup(int, unsigned int); | ||
67 | STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); | ||
68 | STATIC struct workqueue_struct *pagebuf_logio_workqueue; | ||
69 | STATIC struct workqueue_struct *pagebuf_dataio_workqueue; | ||
70 | |||
71 | /* | ||
72 | * Pagebuf debugging | ||
73 | */ | ||
74 | |||
75 | #ifdef PAGEBUF_TRACE | ||
76 | void | ||
77 | pagebuf_trace( | ||
78 | xfs_buf_t *pb, | ||
79 | char *id, | ||
80 | void *data, | ||
81 | void *ra) | ||
82 | { | ||
83 | ktrace_enter(pagebuf_trace_buf, | ||
84 | pb, id, | ||
85 | (void *)(unsigned long)pb->pb_flags, | ||
86 | (void *)(unsigned long)pb->pb_hold.counter, | ||
87 | (void *)(unsigned long)pb->pb_sema.count.counter, | ||
88 | (void *)current, | ||
89 | data, ra, | ||
90 | (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff), | ||
91 | (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff), | ||
92 | (void *)(unsigned long)pb->pb_buffer_length, | ||
93 | NULL, NULL, NULL, NULL, NULL); | ||
94 | } | ||
95 | ktrace_t *pagebuf_trace_buf; | ||
96 | #define PAGEBUF_TRACE_SIZE 4096 | ||
97 | #define PB_TRACE(pb, id, data) \ | ||
98 | pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0)) | ||
99 | #else | ||
100 | #define PB_TRACE(pb, id, data) do { } while (0) | ||
101 | #endif | ||
102 | |||
103 | #ifdef PAGEBUF_LOCK_TRACKING | ||
104 | # define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid) | ||
105 | # define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1) | ||
106 | # define PB_GET_OWNER(pb) ((pb)->pb_last_holder) | ||
107 | #else | ||
108 | # define PB_SET_OWNER(pb) do { } while (0) | ||
109 | # define PB_CLEAR_OWNER(pb) do { } while (0) | ||
110 | # define PB_GET_OWNER(pb) do { } while (0) | ||
111 | #endif | ||
112 | |||
113 | /* | ||
114 | * Pagebuf allocation / freeing. | ||
115 | */ | ||
116 | |||
117 | #define pb_to_gfp(flags) \ | ||
118 | ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ | ||
119 | ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) | ||
120 | |||
121 | #define pb_to_km(flags) \ | ||
122 | (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) | ||
123 | |||
124 | |||
125 | #define pagebuf_allocate(flags) \ | ||
126 | kmem_zone_alloc(pagebuf_cache, pb_to_km(flags)) | ||
127 | #define pagebuf_deallocate(pb) \ | ||
128 | kmem_zone_free(pagebuf_cache, (pb)); | ||
129 | |||
130 | /* | ||
131 | * Page Region interfaces. | ||
132 | * | ||
133 | * For pages in filesystems where the blocksize is smaller than the | ||
134 | * pagesize, we use the page->private field (long) to hold a bitmap | ||
135 | * of uptodate regions within the page. | ||
136 | * | ||
137 | * Each such region is "bytes per page / bits per long" bytes long. | ||
138 | * | ||
139 | * NBPPR == number-of-bytes-per-page-region | ||
140 | * BTOPR == bytes-to-page-region (rounded up) | ||
141 | * BTOPRT == bytes-to-page-region-truncated (rounded down) | ||
142 | */ | ||
143 | #if (BITS_PER_LONG == 32) | ||
144 | #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ | ||
145 | #elif (BITS_PER_LONG == 64) | ||
146 | #define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ | ||
147 | #else | ||
148 | #error BITS_PER_LONG must be 32 or 64 | ||
149 | #endif | ||
150 | #define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) | ||
151 | #define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) | ||
152 | #define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) | ||
153 | |||
154 | STATIC unsigned long | ||
155 | page_region_mask( | ||
156 | size_t offset, | ||
157 | size_t length) | ||
158 | { | ||
159 | unsigned long mask; | ||
160 | int first, final; | ||
161 | |||
162 | first = BTOPR(offset); | ||
163 | final = BTOPRT(offset + length - 1); | ||
164 | first = min(first, final); | ||
165 | |||
166 | mask = ~0UL; | ||
167 | mask <<= BITS_PER_LONG - (final - first); | ||
168 | mask >>= BITS_PER_LONG - (final); | ||
169 | |||
170 | ASSERT(offset + length <= PAGE_CACHE_SIZE); | ||
171 | ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); | ||
172 | |||
173 | return mask; | ||
174 | } | ||
175 | |||
176 | STATIC inline void | ||
177 | set_page_region( | ||
178 | struct page *page, | ||
179 | size_t offset, | ||
180 | size_t length) | ||
181 | { | ||
182 | page->private |= page_region_mask(offset, length); | ||
183 | if (page->private == ~0UL) | ||
184 | SetPageUptodate(page); | ||
185 | } | ||
186 | |||
187 | STATIC inline int | ||
188 | test_page_region( | ||
189 | struct page *page, | ||
190 | size_t offset, | ||
191 | size_t length) | ||
192 | { | ||
193 | unsigned long mask = page_region_mask(offset, length); | ||
194 | |||
195 | return (mask && (page->private & mask) == mask); | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * Mapping of multi-page buffers into contiguous virtual space | ||
200 | */ | ||
201 | |||
202 | typedef struct a_list { | ||
203 | void *vm_addr; | ||
204 | struct a_list *next; | ||
205 | } a_list_t; | ||
206 | |||
207 | STATIC a_list_t *as_free_head; | ||
208 | STATIC int as_list_len; | ||
209 | STATIC DEFINE_SPINLOCK(as_lock); | ||
210 | |||
211 | /* | ||
212 | * Try to batch vunmaps because they are costly. | ||
213 | */ | ||
214 | STATIC void | ||
215 | free_address( | ||
216 | void *addr) | ||
217 | { | ||
218 | a_list_t *aentry; | ||
219 | |||
220 | aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH); | ||
221 | if (likely(aentry)) { | ||
222 | spin_lock(&as_lock); | ||
223 | aentry->next = as_free_head; | ||
224 | aentry->vm_addr = addr; | ||
225 | as_free_head = aentry; | ||
226 | as_list_len++; | ||
227 | spin_unlock(&as_lock); | ||
228 | } else { | ||
229 | vunmap(addr); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | STATIC void | ||
234 | purge_addresses(void) | ||
235 | { | ||
236 | a_list_t *aentry, *old; | ||
237 | |||
238 | if (as_free_head == NULL) | ||
239 | return; | ||
240 | |||
241 | spin_lock(&as_lock); | ||
242 | aentry = as_free_head; | ||
243 | as_free_head = NULL; | ||
244 | as_list_len = 0; | ||
245 | spin_unlock(&as_lock); | ||
246 | |||
247 | while ((old = aentry) != NULL) { | ||
248 | vunmap(aentry->vm_addr); | ||
249 | aentry = aentry->next; | ||
250 | kfree(old); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Internal pagebuf object manipulation | ||
256 | */ | ||
257 | |||
258 | STATIC void | ||
259 | _pagebuf_initialize( | ||
260 | xfs_buf_t *pb, | ||
261 | xfs_buftarg_t *target, | ||
262 | loff_t range_base, | ||
263 | size_t range_length, | ||
264 | page_buf_flags_t flags) | ||
265 | { | ||
266 | /* | ||
267 | * We don't want certain flags to appear in pb->pb_flags. | ||
268 | */ | ||
269 | flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD); | ||
270 | |||
271 | memset(pb, 0, sizeof(xfs_buf_t)); | ||
272 | atomic_set(&pb->pb_hold, 1); | ||
273 | init_MUTEX_LOCKED(&pb->pb_iodonesema); | ||
274 | INIT_LIST_HEAD(&pb->pb_list); | ||
275 | INIT_LIST_HEAD(&pb->pb_hash_list); | ||
276 | init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */ | ||
277 | PB_SET_OWNER(pb); | ||
278 | pb->pb_target = target; | ||
279 | pb->pb_file_offset = range_base; | ||
280 | /* | ||
281 | * Set buffer_length and count_desired to the same value initially. | ||
282 | * I/O routines should use count_desired, which will be the same in | ||
283 | * most cases but may be reset (e.g. XFS recovery). | ||
284 | */ | ||
285 | pb->pb_buffer_length = pb->pb_count_desired = range_length; | ||
286 | pb->pb_flags = flags | PBF_NONE; | ||
287 | pb->pb_bn = XFS_BUF_DADDR_NULL; | ||
288 | atomic_set(&pb->pb_pin_count, 0); | ||
289 | init_waitqueue_head(&pb->pb_waiters); | ||
290 | |||
291 | XFS_STATS_INC(pb_create); | ||
292 | PB_TRACE(pb, "initialize", target); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * Allocate a page array capable of holding a specified number | ||
297 | * of pages, and point the page buf at it. | ||
298 | */ | ||
299 | STATIC int | ||
300 | _pagebuf_get_pages( | ||
301 | xfs_buf_t *pb, | ||
302 | int page_count, | ||
303 | page_buf_flags_t flags) | ||
304 | { | ||
305 | /* Make sure that we have a page list */ | ||
306 | if (pb->pb_pages == NULL) { | ||
307 | pb->pb_offset = page_buf_poff(pb->pb_file_offset); | ||
308 | pb->pb_page_count = page_count; | ||
309 | if (page_count <= PB_PAGES) { | ||
310 | pb->pb_pages = pb->pb_page_array; | ||
311 | } else { | ||
312 | pb->pb_pages = kmem_alloc(sizeof(struct page *) * | ||
313 | page_count, pb_to_km(flags)); | ||
314 | if (pb->pb_pages == NULL) | ||
315 | return -ENOMEM; | ||
316 | } | ||
317 | memset(pb->pb_pages, 0, sizeof(struct page *) * page_count); | ||
318 | } | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Frees pb_pages if it was malloced. | ||
324 | */ | ||
325 | STATIC void | ||
326 | _pagebuf_free_pages( | ||
327 | xfs_buf_t *bp) | ||
328 | { | ||
329 | if (bp->pb_pages != bp->pb_page_array) { | ||
330 | kmem_free(bp->pb_pages, | ||
331 | bp->pb_page_count * sizeof(struct page *)); | ||
332 | } | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Releases the specified buffer. | ||
337 | * | ||
338 | * The modification state of any associated pages is left unchanged. | ||
339 | * The buffer most not be on any hash - use pagebuf_rele instead for | ||
340 | * hashed and refcounted buffers | ||
341 | */ | ||
342 | void | ||
343 | pagebuf_free( | ||
344 | xfs_buf_t *bp) | ||
345 | { | ||
346 | PB_TRACE(bp, "free", 0); | ||
347 | |||
348 | ASSERT(list_empty(&bp->pb_hash_list)); | ||
349 | |||
350 | if (bp->pb_flags & _PBF_PAGE_CACHE) { | ||
351 | uint i; | ||
352 | |||
353 | if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1)) | ||
354 | free_address(bp->pb_addr - bp->pb_offset); | ||
355 | |||
356 | for (i = 0; i < bp->pb_page_count; i++) | ||
357 | page_cache_release(bp->pb_pages[i]); | ||
358 | _pagebuf_free_pages(bp); | ||
359 | } else if (bp->pb_flags & _PBF_KMEM_ALLOC) { | ||
360 | /* | ||
361 | * XXX(hch): bp->pb_count_desired might be incorrect (see | ||
362 | * pagebuf_associate_memory for details), but fortunately | ||
363 | * the Linux version of kmem_free ignores the len argument.. | ||
364 | */ | ||
365 | kmem_free(bp->pb_addr, bp->pb_count_desired); | ||
366 | _pagebuf_free_pages(bp); | ||
367 | } | ||
368 | |||
369 | pagebuf_deallocate(bp); | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Finds all pages for buffer in question and builds it's page list. | ||
374 | */ | ||
375 | STATIC int | ||
376 | _pagebuf_lookup_pages( | ||
377 | xfs_buf_t *bp, | ||
378 | uint flags) | ||
379 | { | ||
380 | struct address_space *mapping = bp->pb_target->pbr_mapping; | ||
381 | size_t blocksize = bp->pb_target->pbr_bsize; | ||
382 | size_t size = bp->pb_count_desired; | ||
383 | size_t nbytes, offset; | ||
384 | int gfp_mask = pb_to_gfp(flags); | ||
385 | unsigned short page_count, i; | ||
386 | pgoff_t first; | ||
387 | loff_t end; | ||
388 | int error; | ||
389 | |||
390 | end = bp->pb_file_offset + bp->pb_buffer_length; | ||
391 | page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset); | ||
392 | |||
393 | error = _pagebuf_get_pages(bp, page_count, flags); | ||
394 | if (unlikely(error)) | ||
395 | return error; | ||
396 | bp->pb_flags |= _PBF_PAGE_CACHE; | ||
397 | |||
398 | offset = bp->pb_offset; | ||
399 | first = bp->pb_file_offset >> PAGE_CACHE_SHIFT; | ||
400 | |||
401 | for (i = 0; i < bp->pb_page_count; i++) { | ||
402 | struct page *page; | ||
403 | uint retries = 0; | ||
404 | |||
405 | retry: | ||
406 | page = find_or_create_page(mapping, first + i, gfp_mask); | ||
407 | if (unlikely(page == NULL)) { | ||
408 | if (flags & PBF_READ_AHEAD) { | ||
409 | bp->pb_page_count = i; | ||
410 | for (i = 0; i < bp->pb_page_count; i++) | ||
411 | unlock_page(bp->pb_pages[i]); | ||
412 | return -ENOMEM; | ||
413 | } | ||
414 | |||
415 | /* | ||
416 | * This could deadlock. | ||
417 | * | ||
418 | * But until all the XFS lowlevel code is revamped to | ||
419 | * handle buffer allocation failures we can't do much. | ||
420 | */ | ||
421 | if (!(++retries % 100)) | ||
422 | printk(KERN_ERR | ||
423 | "XFS: possible memory allocation " | ||
424 | "deadlock in %s (mode:0x%x)\n", | ||
425 | __FUNCTION__, gfp_mask); | ||
426 | |||
427 | XFS_STATS_INC(pb_page_retries); | ||
428 | pagebuf_daemon_wakeup(0, gfp_mask); | ||
429 | blk_congestion_wait(WRITE, HZ/50); | ||
430 | goto retry; | ||
431 | } | ||
432 | |||
433 | XFS_STATS_INC(pb_page_found); | ||
434 | |||
435 | nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); | ||
436 | size -= nbytes; | ||
437 | |||
438 | if (!PageUptodate(page)) { | ||
439 | page_count--; | ||
440 | if (blocksize >= PAGE_CACHE_SIZE) { | ||
441 | if (flags & PBF_READ) | ||
442 | bp->pb_locked = 1; | ||
443 | } else if (!PagePrivate(page)) { | ||
444 | if (test_page_region(page, offset, nbytes)) | ||
445 | page_count++; | ||
446 | } | ||
447 | } | ||
448 | |||
449 | bp->pb_pages[i] = page; | ||
450 | offset = 0; | ||
451 | } | ||
452 | |||
453 | if (!bp->pb_locked) { | ||
454 | for (i = 0; i < bp->pb_page_count; i++) | ||
455 | unlock_page(bp->pb_pages[i]); | ||
456 | } | ||
457 | |||
458 | if (page_count) { | ||
459 | /* if we have any uptodate pages, mark that in the buffer */ | ||
460 | bp->pb_flags &= ~PBF_NONE; | ||
461 | |||
462 | /* if some pages aren't uptodate, mark that in the buffer */ | ||
463 | if (page_count != bp->pb_page_count) | ||
464 | bp->pb_flags |= PBF_PARTIAL; | ||
465 | } | ||
466 | |||
467 | PB_TRACE(bp, "lookup_pages", (long)page_count); | ||
468 | return error; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Map buffer into kernel address-space if nessecary. | ||
473 | */ | ||
474 | STATIC int | ||
475 | _pagebuf_map_pages( | ||
476 | xfs_buf_t *bp, | ||
477 | uint flags) | ||
478 | { | ||
479 | /* A single page buffer is always mappable */ | ||
480 | if (bp->pb_page_count == 1) { | ||
481 | bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset; | ||
482 | bp->pb_flags |= PBF_MAPPED; | ||
483 | } else if (flags & PBF_MAPPED) { | ||
484 | if (as_list_len > 64) | ||
485 | purge_addresses(); | ||
486 | bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count, | ||
487 | VM_MAP, PAGE_KERNEL); | ||
488 | if (unlikely(bp->pb_addr == NULL)) | ||
489 | return -ENOMEM; | ||
490 | bp->pb_addr += bp->pb_offset; | ||
491 | bp->pb_flags |= PBF_MAPPED; | ||
492 | } | ||
493 | |||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * Finding and Reading Buffers | ||
499 | */ | ||
500 | |||
501 | /* | ||
502 | * _pagebuf_find | ||
503 | * | ||
504 | * Looks up, and creates if absent, a lockable buffer for | ||
505 | * a given range of an inode. The buffer is returned | ||
506 | * locked. If other overlapping buffers exist, they are | ||
507 | * released before the new buffer is created and locked, | ||
508 | * which may imply that this call will block until those buffers | ||
509 | * are unlocked. No I/O is implied by this call. | ||
510 | */ | ||
511 | xfs_buf_t * | ||
512 | _pagebuf_find( | ||
513 | xfs_buftarg_t *btp, /* block device target */ | ||
514 | loff_t ioff, /* starting offset of range */ | ||
515 | size_t isize, /* length of range */ | ||
516 | page_buf_flags_t flags, /* PBF_TRYLOCK */ | ||
517 | xfs_buf_t *new_pb)/* newly allocated buffer */ | ||
518 | { | ||
519 | loff_t range_base; | ||
520 | size_t range_length; | ||
521 | xfs_bufhash_t *hash; | ||
522 | xfs_buf_t *pb, *n; | ||
523 | |||
524 | range_base = (ioff << BBSHIFT); | ||
525 | range_length = (isize << BBSHIFT); | ||
526 | |||
527 | /* Check for IOs smaller than the sector size / not sector aligned */ | ||
528 | ASSERT(!(range_length < (1 << btp->pbr_sshift))); | ||
529 | ASSERT(!(range_base & (loff_t)btp->pbr_smask)); | ||
530 | |||
531 | hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; | ||
532 | |||
533 | spin_lock(&hash->bh_lock); | ||
534 | |||
535 | list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) { | ||
536 | ASSERT(btp == pb->pb_target); | ||
537 | if (pb->pb_file_offset == range_base && | ||
538 | pb->pb_buffer_length == range_length) { | ||
539 | /* | ||
540 | * If we look at something bring it to the | ||
541 | * front of the list for next time. | ||
542 | */ | ||
543 | atomic_inc(&pb->pb_hold); | ||
544 | list_move(&pb->pb_hash_list, &hash->bh_list); | ||
545 | goto found; | ||
546 | } | ||
547 | } | ||
548 | |||
549 | /* No match found */ | ||
550 | if (new_pb) { | ||
551 | _pagebuf_initialize(new_pb, btp, range_base, | ||
552 | range_length, flags); | ||
553 | new_pb->pb_hash = hash; | ||
554 | list_add(&new_pb->pb_hash_list, &hash->bh_list); | ||
555 | } else { | ||
556 | XFS_STATS_INC(pb_miss_locked); | ||
557 | } | ||
558 | |||
559 | spin_unlock(&hash->bh_lock); | ||
560 | return new_pb; | ||
561 | |||
562 | found: | ||
563 | spin_unlock(&hash->bh_lock); | ||
564 | |||
565 | /* Attempt to get the semaphore without sleeping, | ||
566 | * if this does not work then we need to drop the | ||
567 | * spinlock and do a hard attempt on the semaphore. | ||
568 | */ | ||
569 | if (down_trylock(&pb->pb_sema)) { | ||
570 | if (!(flags & PBF_TRYLOCK)) { | ||
571 | /* wait for buffer ownership */ | ||
572 | PB_TRACE(pb, "get_lock", 0); | ||
573 | pagebuf_lock(pb); | ||
574 | XFS_STATS_INC(pb_get_locked_waited); | ||
575 | } else { | ||
576 | /* We asked for a trylock and failed, no need | ||
577 | * to look at file offset and length here, we | ||
578 | * know that this pagebuf at least overlaps our | ||
579 | * pagebuf and is locked, therefore our buffer | ||
580 | * either does not exist, or is this buffer | ||
581 | */ | ||
582 | |||
583 | pagebuf_rele(pb); | ||
584 | XFS_STATS_INC(pb_busy_locked); | ||
585 | return (NULL); | ||
586 | } | ||
587 | } else { | ||
588 | /* trylock worked */ | ||
589 | PB_SET_OWNER(pb); | ||
590 | } | ||
591 | |||
592 | if (pb->pb_flags & PBF_STALE) | ||
593 | pb->pb_flags &= PBF_MAPPED; | ||
594 | PB_TRACE(pb, "got_lock", 0); | ||
595 | XFS_STATS_INC(pb_get_locked); | ||
596 | return (pb); | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * xfs_buf_get_flags assembles a buffer covering the specified range. | ||
601 | * | ||
602 | * Storage in memory for all portions of the buffer will be allocated, | ||
603 | * although backing storage may not be. | ||
604 | */ | ||
605 | xfs_buf_t * | ||
606 | xfs_buf_get_flags( /* allocate a buffer */ | ||
607 | xfs_buftarg_t *target,/* target for buffer */ | ||
608 | loff_t ioff, /* starting offset of range */ | ||
609 | size_t isize, /* length of range */ | ||
610 | page_buf_flags_t flags) /* PBF_TRYLOCK */ | ||
611 | { | ||
612 | xfs_buf_t *pb, *new_pb; | ||
613 | int error = 0, i; | ||
614 | |||
615 | new_pb = pagebuf_allocate(flags); | ||
616 | if (unlikely(!new_pb)) | ||
617 | return NULL; | ||
618 | |||
619 | pb = _pagebuf_find(target, ioff, isize, flags, new_pb); | ||
620 | if (pb == new_pb) { | ||
621 | error = _pagebuf_lookup_pages(pb, flags); | ||
622 | if (error) | ||
623 | goto no_buffer; | ||
624 | } else { | ||
625 | pagebuf_deallocate(new_pb); | ||
626 | if (unlikely(pb == NULL)) | ||
627 | return NULL; | ||
628 | } | ||
629 | |||
630 | for (i = 0; i < pb->pb_page_count; i++) | ||
631 | mark_page_accessed(pb->pb_pages[i]); | ||
632 | |||
633 | if (!(pb->pb_flags & PBF_MAPPED)) { | ||
634 | error = _pagebuf_map_pages(pb, flags); | ||
635 | if (unlikely(error)) { | ||
636 | printk(KERN_WARNING "%s: failed to map pages\n", | ||
637 | __FUNCTION__); | ||
638 | goto no_buffer; | ||
639 | } | ||
640 | } | ||
641 | |||
642 | XFS_STATS_INC(pb_get); | ||
643 | |||
644 | /* | ||
645 | * Always fill in the block number now, the mapped cases can do | ||
646 | * their own overlay of this later. | ||
647 | */ | ||
648 | pb->pb_bn = ioff; | ||
649 | pb->pb_count_desired = pb->pb_buffer_length; | ||
650 | |||
651 | PB_TRACE(pb, "get", (unsigned long)flags); | ||
652 | return pb; | ||
653 | |||
654 | no_buffer: | ||
655 | if (flags & (PBF_LOCK | PBF_TRYLOCK)) | ||
656 | pagebuf_unlock(pb); | ||
657 | pagebuf_rele(pb); | ||
658 | return NULL; | ||
659 | } | ||
660 | |||
661 | xfs_buf_t * | ||
662 | xfs_buf_read_flags( | ||
663 | xfs_buftarg_t *target, | ||
664 | loff_t ioff, | ||
665 | size_t isize, | ||
666 | page_buf_flags_t flags) | ||
667 | { | ||
668 | xfs_buf_t *pb; | ||
669 | |||
670 | flags |= PBF_READ; | ||
671 | |||
672 | pb = xfs_buf_get_flags(target, ioff, isize, flags); | ||
673 | if (pb) { | ||
674 | if (PBF_NOT_DONE(pb)) { | ||
675 | PB_TRACE(pb, "read", (unsigned long)flags); | ||
676 | XFS_STATS_INC(pb_get_read); | ||
677 | pagebuf_iostart(pb, flags); | ||
678 | } else if (flags & PBF_ASYNC) { | ||
679 | PB_TRACE(pb, "read_async", (unsigned long)flags); | ||
680 | /* | ||
681 | * Read ahead call which is already satisfied, | ||
682 | * drop the buffer | ||
683 | */ | ||
684 | goto no_buffer; | ||
685 | } else { | ||
686 | PB_TRACE(pb, "read_done", (unsigned long)flags); | ||
687 | /* We do not want read in the flags */ | ||
688 | pb->pb_flags &= ~PBF_READ; | ||
689 | } | ||
690 | } | ||
691 | |||
692 | return pb; | ||
693 | |||
694 | no_buffer: | ||
695 | if (flags & (PBF_LOCK | PBF_TRYLOCK)) | ||
696 | pagebuf_unlock(pb); | ||
697 | pagebuf_rele(pb); | ||
698 | return NULL; | ||
699 | } | ||
700 | |||
701 | /* | ||
702 | * Create a skeletal pagebuf (no pages associated with it). | ||
703 | */ | ||
704 | xfs_buf_t * | ||
705 | pagebuf_lookup( | ||
706 | xfs_buftarg_t *target, | ||
707 | loff_t ioff, | ||
708 | size_t isize, | ||
709 | page_buf_flags_t flags) | ||
710 | { | ||
711 | xfs_buf_t *pb; | ||
712 | |||
713 | pb = pagebuf_allocate(flags); | ||
714 | if (pb) { | ||
715 | _pagebuf_initialize(pb, target, ioff, isize, flags); | ||
716 | } | ||
717 | return pb; | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * If we are not low on memory then do the readahead in a deadlock | ||
722 | * safe manner. | ||
723 | */ | ||
724 | void | ||
725 | pagebuf_readahead( | ||
726 | xfs_buftarg_t *target, | ||
727 | loff_t ioff, | ||
728 | size_t isize, | ||
729 | page_buf_flags_t flags) | ||
730 | { | ||
731 | struct backing_dev_info *bdi; | ||
732 | |||
733 | bdi = target->pbr_mapping->backing_dev_info; | ||
734 | if (bdi_read_congested(bdi)) | ||
735 | return; | ||
736 | |||
737 | flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD); | ||
738 | xfs_buf_read_flags(target, ioff, isize, flags); | ||
739 | } | ||
740 | |||
741 | xfs_buf_t * | ||
742 | pagebuf_get_empty( | ||
743 | size_t len, | ||
744 | xfs_buftarg_t *target) | ||
745 | { | ||
746 | xfs_buf_t *pb; | ||
747 | |||
748 | pb = pagebuf_allocate(0); | ||
749 | if (pb) | ||
750 | _pagebuf_initialize(pb, target, 0, len, 0); | ||
751 | return pb; | ||
752 | } | ||
753 | |||
754 | static inline struct page * | ||
755 | mem_to_page( | ||
756 | void *addr) | ||
757 | { | ||
758 | if (((unsigned long)addr < VMALLOC_START) || | ||
759 | ((unsigned long)addr >= VMALLOC_END)) { | ||
760 | return virt_to_page(addr); | ||
761 | } else { | ||
762 | return vmalloc_to_page(addr); | ||
763 | } | ||
764 | } | ||
765 | |||
766 | int | ||
767 | pagebuf_associate_memory( | ||
768 | xfs_buf_t *pb, | ||
769 | void *mem, | ||
770 | size_t len) | ||
771 | { | ||
772 | int rval; | ||
773 | int i = 0; | ||
774 | size_t ptr; | ||
775 | size_t end, end_cur; | ||
776 | off_t offset; | ||
777 | int page_count; | ||
778 | |||
779 | page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; | ||
780 | offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK); | ||
781 | if (offset && (len > PAGE_CACHE_SIZE)) | ||
782 | page_count++; | ||
783 | |||
784 | /* Free any previous set of page pointers */ | ||
785 | if (pb->pb_pages) | ||
786 | _pagebuf_free_pages(pb); | ||
787 | |||
788 | pb->pb_pages = NULL; | ||
789 | pb->pb_addr = mem; | ||
790 | |||
791 | rval = _pagebuf_get_pages(pb, page_count, 0); | ||
792 | if (rval) | ||
793 | return rval; | ||
794 | |||
795 | pb->pb_offset = offset; | ||
796 | ptr = (size_t) mem & PAGE_CACHE_MASK; | ||
797 | end = PAGE_CACHE_ALIGN((size_t) mem + len); | ||
798 | end_cur = end; | ||
799 | /* set up first page */ | ||
800 | pb->pb_pages[0] = mem_to_page(mem); | ||
801 | |||
802 | ptr += PAGE_CACHE_SIZE; | ||
803 | pb->pb_page_count = ++i; | ||
804 | while (ptr < end) { | ||
805 | pb->pb_pages[i] = mem_to_page((void *)ptr); | ||
806 | pb->pb_page_count = ++i; | ||
807 | ptr += PAGE_CACHE_SIZE; | ||
808 | } | ||
809 | pb->pb_locked = 0; | ||
810 | |||
811 | pb->pb_count_desired = pb->pb_buffer_length = len; | ||
812 | pb->pb_flags |= PBF_MAPPED; | ||
813 | |||
814 | return 0; | ||
815 | } | ||
816 | |||
817 | xfs_buf_t * | ||
818 | pagebuf_get_no_daddr( | ||
819 | size_t len, | ||
820 | xfs_buftarg_t *target) | ||
821 | { | ||
822 | size_t malloc_len = len; | ||
823 | xfs_buf_t *bp; | ||
824 | void *data; | ||
825 | int error; | ||
826 | |||
827 | bp = pagebuf_allocate(0); | ||
828 | if (unlikely(bp == NULL)) | ||
829 | goto fail; | ||
830 | _pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO); | ||
831 | |||
832 | try_again: | ||
833 | data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); | ||
834 | if (unlikely(data == NULL)) | ||
835 | goto fail_free_buf; | ||
836 | |||
837 | /* check whether alignment matches.. */ | ||
838 | if ((__psunsigned_t)data != | ||
839 | ((__psunsigned_t)data & ~target->pbr_smask)) { | ||
840 | /* .. else double the size and try again */ | ||
841 | kmem_free(data, malloc_len); | ||
842 | malloc_len <<= 1; | ||
843 | goto try_again; | ||
844 | } | ||
845 | |||
846 | error = pagebuf_associate_memory(bp, data, len); | ||
847 | if (error) | ||
848 | goto fail_free_mem; | ||
849 | bp->pb_flags |= _PBF_KMEM_ALLOC; | ||
850 | |||
851 | pagebuf_unlock(bp); | ||
852 | |||
853 | PB_TRACE(bp, "no_daddr", data); | ||
854 | return bp; | ||
855 | fail_free_mem: | ||
856 | kmem_free(data, malloc_len); | ||
857 | fail_free_buf: | ||
858 | pagebuf_free(bp); | ||
859 | fail: | ||
860 | return NULL; | ||
861 | } | ||
862 | |||
863 | /* | ||
864 | * pagebuf_hold | ||
865 | * | ||
866 | * Increment reference count on buffer, to hold the buffer concurrently | ||
867 | * with another thread which may release (free) the buffer asynchronously. | ||
868 | * | ||
869 | * Must hold the buffer already to call this function. | ||
870 | */ | ||
871 | void | ||
872 | pagebuf_hold( | ||
873 | xfs_buf_t *pb) | ||
874 | { | ||
875 | atomic_inc(&pb->pb_hold); | ||
876 | PB_TRACE(pb, "hold", 0); | ||
877 | } | ||
878 | |||
879 | /* | ||
880 | * pagebuf_rele | ||
881 | * | ||
882 | * pagebuf_rele releases a hold on the specified buffer. If the | ||
883 | * the hold count is 1, pagebuf_rele calls pagebuf_free. | ||
884 | */ | ||
885 | void | ||
886 | pagebuf_rele( | ||
887 | xfs_buf_t *pb) | ||
888 | { | ||
889 | xfs_bufhash_t *hash = pb->pb_hash; | ||
890 | |||
891 | PB_TRACE(pb, "rele", pb->pb_relse); | ||
892 | |||
893 | /* | ||
894 | * pagebuf_lookup buffers are not hashed, not delayed write, | ||
895 | * and don't have their own release routines. Special case. | ||
896 | */ | ||
897 | if (unlikely(!hash)) { | ||
898 | ASSERT(!pb->pb_relse); | ||
899 | if (atomic_dec_and_test(&pb->pb_hold)) | ||
900 | xfs_buf_free(pb); | ||
901 | return; | ||
902 | } | ||
903 | |||
904 | if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { | ||
905 | int do_free = 1; | ||
906 | |||
907 | if (pb->pb_relse) { | ||
908 | atomic_inc(&pb->pb_hold); | ||
909 | spin_unlock(&hash->bh_lock); | ||
910 | (*(pb->pb_relse)) (pb); | ||
911 | spin_lock(&hash->bh_lock); | ||
912 | do_free = 0; | ||
913 | } | ||
914 | |||
915 | if (pb->pb_flags & PBF_DELWRI) { | ||
916 | pb->pb_flags |= PBF_ASYNC; | ||
917 | atomic_inc(&pb->pb_hold); | ||
918 | pagebuf_delwri_queue(pb, 0); | ||
919 | do_free = 0; | ||
920 | } else if (pb->pb_flags & PBF_FS_MANAGED) { | ||
921 | do_free = 0; | ||
922 | } | ||
923 | |||
924 | if (do_free) { | ||
925 | list_del_init(&pb->pb_hash_list); | ||
926 | spin_unlock(&hash->bh_lock); | ||
927 | pagebuf_free(pb); | ||
928 | } else { | ||
929 | spin_unlock(&hash->bh_lock); | ||
930 | } | ||
931 | } | ||
932 | } | ||
933 | |||
934 | |||
935 | /* | ||
936 | * Mutual exclusion on buffers. Locking model: | ||
937 | * | ||
938 | * Buffers associated with inodes for which buffer locking | ||
939 | * is not enabled are not protected by semaphores, and are | ||
940 | * assumed to be exclusively owned by the caller. There is a | ||
941 | * spinlock in the buffer, used by the caller when concurrent | ||
942 | * access is possible. | ||
943 | */ | ||
944 | |||
945 | /* | ||
946 | * pagebuf_cond_lock | ||
947 | * | ||
948 | * pagebuf_cond_lock locks a buffer object, if it is not already locked. | ||
949 | * Note that this in no way | ||
950 | * locks the underlying pages, so it is only useful for synchronizing | ||
951 | * concurrent use of page buffer objects, not for synchronizing independent | ||
952 | * access to the underlying pages. | ||
953 | */ | ||
954 | int | ||
955 | pagebuf_cond_lock( /* lock buffer, if not locked */ | ||
956 | /* returns -EBUSY if locked) */ | ||
957 | xfs_buf_t *pb) | ||
958 | { | ||
959 | int locked; | ||
960 | |||
961 | locked = down_trylock(&pb->pb_sema) == 0; | ||
962 | if (locked) { | ||
963 | PB_SET_OWNER(pb); | ||
964 | } | ||
965 | PB_TRACE(pb, "cond_lock", (long)locked); | ||
966 | return(locked ? 0 : -EBUSY); | ||
967 | } | ||
968 | |||
969 | #if defined(DEBUG) || defined(XFS_BLI_TRACE) | ||
970 | /* | ||
971 | * pagebuf_lock_value | ||
972 | * | ||
973 | * Return lock value for a pagebuf | ||
974 | */ | ||
975 | int | ||
976 | pagebuf_lock_value( | ||
977 | xfs_buf_t *pb) | ||
978 | { | ||
979 | return(atomic_read(&pb->pb_sema.count)); | ||
980 | } | ||
981 | #endif | ||
982 | |||
983 | /* | ||
984 | * pagebuf_lock | ||
985 | * | ||
986 | * pagebuf_lock locks a buffer object. Note that this in no way | ||
987 | * locks the underlying pages, so it is only useful for synchronizing | ||
988 | * concurrent use of page buffer objects, not for synchronizing independent | ||
989 | * access to the underlying pages. | ||
990 | */ | ||
991 | int | ||
992 | pagebuf_lock( | ||
993 | xfs_buf_t *pb) | ||
994 | { | ||
995 | PB_TRACE(pb, "lock", 0); | ||
996 | if (atomic_read(&pb->pb_io_remaining)) | ||
997 | blk_run_address_space(pb->pb_target->pbr_mapping); | ||
998 | down(&pb->pb_sema); | ||
999 | PB_SET_OWNER(pb); | ||
1000 | PB_TRACE(pb, "locked", 0); | ||
1001 | return 0; | ||
1002 | } | ||
1003 | |||
1004 | /* | ||
1005 | * pagebuf_unlock | ||
1006 | * | ||
1007 | * pagebuf_unlock releases the lock on the buffer object created by | ||
1008 | * pagebuf_lock or pagebuf_cond_lock (not any | ||
1009 | * pinning of underlying pages created by pagebuf_pin). | ||
1010 | */ | ||
1011 | void | ||
1012 | pagebuf_unlock( /* unlock buffer */ | ||
1013 | xfs_buf_t *pb) /* buffer to unlock */ | ||
1014 | { | ||
1015 | PB_CLEAR_OWNER(pb); | ||
1016 | up(&pb->pb_sema); | ||
1017 | PB_TRACE(pb, "unlock", 0); | ||
1018 | } | ||
1019 | |||
1020 | |||
1021 | /* | ||
1022 | * Pinning Buffer Storage in Memory | ||
1023 | */ | ||
1024 | |||
1025 | /* | ||
1026 | * pagebuf_pin | ||
1027 | * | ||
1028 | * pagebuf_pin locks all of the memory represented by a buffer in | ||
1029 | * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for | ||
1030 | * the same or different buffers affecting a given page, will | ||
1031 | * properly count the number of outstanding "pin" requests. The | ||
1032 | * buffer may be released after the pagebuf_pin and a different | ||
1033 | * buffer used when calling pagebuf_unpin, if desired. | ||
1034 | * pagebuf_pin should be used by the file system when it wants be | ||
1035 | * assured that no attempt will be made to force the affected | ||
1036 | * memory to disk. It does not assure that a given logical page | ||
1037 | * will not be moved to a different physical page. | ||
1038 | */ | ||
1039 | void | ||
1040 | pagebuf_pin( | ||
1041 | xfs_buf_t *pb) | ||
1042 | { | ||
1043 | atomic_inc(&pb->pb_pin_count); | ||
1044 | PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter); | ||
1045 | } | ||
1046 | |||
1047 | /* | ||
1048 | * pagebuf_unpin | ||
1049 | * | ||
1050 | * pagebuf_unpin reverses the locking of memory performed by | ||
1051 | * pagebuf_pin. Note that both functions affected the logical | ||
1052 | * pages associated with the buffer, not the buffer itself. | ||
1053 | */ | ||
1054 | void | ||
1055 | pagebuf_unpin( | ||
1056 | xfs_buf_t *pb) | ||
1057 | { | ||
1058 | if (atomic_dec_and_test(&pb->pb_pin_count)) { | ||
1059 | wake_up_all(&pb->pb_waiters); | ||
1060 | } | ||
1061 | PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter); | ||
1062 | } | ||
1063 | |||
1064 | int | ||
1065 | pagebuf_ispin( | ||
1066 | xfs_buf_t *pb) | ||
1067 | { | ||
1068 | return atomic_read(&pb->pb_pin_count); | ||
1069 | } | ||
1070 | |||
1071 | /* | ||
1072 | * pagebuf_wait_unpin | ||
1073 | * | ||
1074 | * pagebuf_wait_unpin waits until all of the memory associated | ||
1075 | * with the buffer is not longer locked in memory. It returns | ||
1076 | * immediately if none of the affected pages are locked. | ||
1077 | */ | ||
1078 | static inline void | ||
1079 | _pagebuf_wait_unpin( | ||
1080 | xfs_buf_t *pb) | ||
1081 | { | ||
1082 | DECLARE_WAITQUEUE (wait, current); | ||
1083 | |||
1084 | if (atomic_read(&pb->pb_pin_count) == 0) | ||
1085 | return; | ||
1086 | |||
1087 | add_wait_queue(&pb->pb_waiters, &wait); | ||
1088 | for (;;) { | ||
1089 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1090 | if (atomic_read(&pb->pb_pin_count) == 0) | ||
1091 | break; | ||
1092 | if (atomic_read(&pb->pb_io_remaining)) | ||
1093 | blk_run_address_space(pb->pb_target->pbr_mapping); | ||
1094 | schedule(); | ||
1095 | } | ||
1096 | remove_wait_queue(&pb->pb_waiters, &wait); | ||
1097 | set_current_state(TASK_RUNNING); | ||
1098 | } | ||
1099 | |||
1100 | /* | ||
1101 | * Buffer Utility Routines | ||
1102 | */ | ||
1103 | |||
1104 | /* | ||
1105 | * pagebuf_iodone | ||
1106 | * | ||
1107 | * pagebuf_iodone marks a buffer for which I/O is in progress | ||
1108 | * done with respect to that I/O. The pb_iodone routine, if | ||
1109 | * present, will be called as a side-effect. | ||
1110 | */ | ||
1111 | STATIC void | ||
1112 | pagebuf_iodone_work( | ||
1113 | void *v) | ||
1114 | { | ||
1115 | xfs_buf_t *bp = (xfs_buf_t *)v; | ||
1116 | |||
1117 | if (bp->pb_iodone) | ||
1118 | (*(bp->pb_iodone))(bp); | ||
1119 | else if (bp->pb_flags & PBF_ASYNC) | ||
1120 | xfs_buf_relse(bp); | ||
1121 | } | ||
1122 | |||
1123 | void | ||
1124 | pagebuf_iodone( | ||
1125 | xfs_buf_t *pb, | ||
1126 | int dataio, | ||
1127 | int schedule) | ||
1128 | { | ||
1129 | pb->pb_flags &= ~(PBF_READ | PBF_WRITE); | ||
1130 | if (pb->pb_error == 0) { | ||
1131 | pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE); | ||
1132 | } | ||
1133 | |||
1134 | PB_TRACE(pb, "iodone", pb->pb_iodone); | ||
1135 | |||
1136 | if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { | ||
1137 | if (schedule) { | ||
1138 | INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); | ||
1139 | queue_work(dataio ? pagebuf_dataio_workqueue : | ||
1140 | pagebuf_logio_workqueue, &pb->pb_iodone_work); | ||
1141 | } else { | ||
1142 | pagebuf_iodone_work(pb); | ||
1143 | } | ||
1144 | } else { | ||
1145 | up(&pb->pb_iodonesema); | ||
1146 | } | ||
1147 | } | ||
1148 | |||
1149 | /* | ||
1150 | * pagebuf_ioerror | ||
1151 | * | ||
1152 | * pagebuf_ioerror sets the error code for a buffer. | ||
1153 | */ | ||
1154 | void | ||
1155 | pagebuf_ioerror( /* mark/clear buffer error flag */ | ||
1156 | xfs_buf_t *pb, /* buffer to mark */ | ||
1157 | int error) /* error to store (0 if none) */ | ||
1158 | { | ||
1159 | ASSERT(error >= 0 && error <= 0xffff); | ||
1160 | pb->pb_error = (unsigned short)error; | ||
1161 | PB_TRACE(pb, "ioerror", (unsigned long)error); | ||
1162 | } | ||
1163 | |||
1164 | /* | ||
1165 | * pagebuf_iostart | ||
1166 | * | ||
1167 | * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied. | ||
1168 | * If necessary, it will arrange for any disk space allocation required, | ||
1169 | * and it will break up the request if the block mappings require it. | ||
1170 | * The pb_iodone routine in the buffer supplied will only be called | ||
1171 | * when all of the subsidiary I/O requests, if any, have been completed. | ||
1172 | * pagebuf_iostart calls the pagebuf_ioinitiate routine or | ||
1173 | * pagebuf_iorequest, if the former routine is not defined, to start | ||
1174 | * the I/O on a given low-level request. | ||
1175 | */ | ||
1176 | int | ||
1177 | pagebuf_iostart( /* start I/O on a buffer */ | ||
1178 | xfs_buf_t *pb, /* buffer to start */ | ||
1179 | page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */ | ||
1180 | /* PBF_WRITE, PBF_DELWRI, */ | ||
1181 | /* PBF_DONT_BLOCK */ | ||
1182 | { | ||
1183 | int status = 0; | ||
1184 | |||
1185 | PB_TRACE(pb, "iostart", (unsigned long)flags); | ||
1186 | |||
1187 | if (flags & PBF_DELWRI) { | ||
1188 | pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC); | ||
1189 | pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC); | ||
1190 | pagebuf_delwri_queue(pb, 1); | ||
1191 | return status; | ||
1192 | } | ||
1193 | |||
1194 | pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \ | ||
1195 | PBF_READ_AHEAD | _PBF_RUN_QUEUES); | ||
1196 | pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \ | ||
1197 | PBF_READ_AHEAD | _PBF_RUN_QUEUES); | ||
1198 | |||
1199 | BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL); | ||
1200 | |||
1201 | /* For writes allow an alternate strategy routine to precede | ||
1202 | * the actual I/O request (which may not be issued at all in | ||
1203 | * a shutdown situation, for example). | ||
1204 | */ | ||
1205 | status = (flags & PBF_WRITE) ? | ||
1206 | pagebuf_iostrategy(pb) : pagebuf_iorequest(pb); | ||
1207 | |||
1208 | /* Wait for I/O if we are not an async request. | ||
1209 | * Note: async I/O request completion will release the buffer, | ||
1210 | * and that can already be done by this point. So using the | ||
1211 | * buffer pointer from here on, after async I/O, is invalid. | ||
1212 | */ | ||
1213 | if (!status && !(flags & PBF_ASYNC)) | ||
1214 | status = pagebuf_iowait(pb); | ||
1215 | |||
1216 | return status; | ||
1217 | } | ||
1218 | |||
1219 | /* | ||
1220 | * Helper routine for pagebuf_iorequest | ||
1221 | */ | ||
1222 | |||
1223 | STATIC __inline__ int | ||
1224 | _pagebuf_iolocked( | ||
1225 | xfs_buf_t *pb) | ||
1226 | { | ||
1227 | ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); | ||
1228 | if (pb->pb_flags & PBF_READ) | ||
1229 | return pb->pb_locked; | ||
1230 | return 0; | ||
1231 | } | ||
1232 | |||
1233 | STATIC __inline__ void | ||
1234 | _pagebuf_iodone( | ||
1235 | xfs_buf_t *pb, | ||
1236 | int schedule) | ||
1237 | { | ||
1238 | if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { | ||
1239 | pb->pb_locked = 0; | ||
1240 | pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule); | ||
1241 | } | ||
1242 | } | ||
1243 | |||
1244 | STATIC int | ||
1245 | bio_end_io_pagebuf( | ||
1246 | struct bio *bio, | ||
1247 | unsigned int bytes_done, | ||
1248 | int error) | ||
1249 | { | ||
1250 | xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; | ||
1251 | unsigned int i, blocksize = pb->pb_target->pbr_bsize; | ||
1252 | struct bio_vec *bvec = bio->bi_io_vec; | ||
1253 | |||
1254 | if (bio->bi_size) | ||
1255 | return 1; | ||
1256 | |||
1257 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
1258 | pb->pb_error = EIO; | ||
1259 | |||
1260 | for (i = 0; i < bio->bi_vcnt; i++, bvec++) { | ||
1261 | struct page *page = bvec->bv_page; | ||
1262 | |||
1263 | if (pb->pb_error) { | ||
1264 | SetPageError(page); | ||
1265 | } else if (blocksize == PAGE_CACHE_SIZE) { | ||
1266 | SetPageUptodate(page); | ||
1267 | } else if (!PagePrivate(page) && | ||
1268 | (pb->pb_flags & _PBF_PAGE_CACHE)) { | ||
1269 | set_page_region(page, bvec->bv_offset, bvec->bv_len); | ||
1270 | } | ||
1271 | |||
1272 | if (_pagebuf_iolocked(pb)) { | ||
1273 | unlock_page(page); | ||
1274 | } | ||
1275 | } | ||
1276 | |||
1277 | _pagebuf_iodone(pb, 1); | ||
1278 | bio_put(bio); | ||
1279 | return 0; | ||
1280 | } | ||
1281 | |||
1282 | STATIC void | ||
1283 | _pagebuf_ioapply( | ||
1284 | xfs_buf_t *pb) | ||
1285 | { | ||
1286 | int i, rw, map_i, total_nr_pages, nr_pages; | ||
1287 | struct bio *bio; | ||
1288 | int offset = pb->pb_offset; | ||
1289 | int size = pb->pb_count_desired; | ||
1290 | sector_t sector = pb->pb_bn; | ||
1291 | unsigned int blocksize = pb->pb_target->pbr_bsize; | ||
1292 | int locking = _pagebuf_iolocked(pb); | ||
1293 | |||
1294 | total_nr_pages = pb->pb_page_count; | ||
1295 | map_i = 0; | ||
1296 | |||
1297 | if (pb->pb_flags & _PBF_RUN_QUEUES) { | ||
1298 | pb->pb_flags &= ~_PBF_RUN_QUEUES; | ||
1299 | rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC; | ||
1300 | } else { | ||
1301 | rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; | ||
1302 | } | ||
1303 | |||
1304 | /* Special code path for reading a sub page size pagebuf in -- | ||
1305 | * we populate up the whole page, and hence the other metadata | ||
1306 | * in the same page. This optimization is only valid when the | ||
1307 | * filesystem block size and the page size are equal. | ||
1308 | */ | ||
1309 | if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) && | ||
1310 | (pb->pb_flags & PBF_READ) && locking && | ||
1311 | (blocksize == PAGE_CACHE_SIZE)) { | ||
1312 | bio = bio_alloc(GFP_NOIO, 1); | ||
1313 | |||
1314 | bio->bi_bdev = pb->pb_target->pbr_bdev; | ||
1315 | bio->bi_sector = sector - (offset >> BBSHIFT); | ||
1316 | bio->bi_end_io = bio_end_io_pagebuf; | ||
1317 | bio->bi_private = pb; | ||
1318 | |||
1319 | bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0); | ||
1320 | size = 0; | ||
1321 | |||
1322 | atomic_inc(&pb->pb_io_remaining); | ||
1323 | |||
1324 | goto submit_io; | ||
1325 | } | ||
1326 | |||
1327 | /* Lock down the pages which we need to for the request */ | ||
1328 | if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { | ||
1329 | for (i = 0; size; i++) { | ||
1330 | int nbytes = PAGE_CACHE_SIZE - offset; | ||
1331 | struct page *page = pb->pb_pages[i]; | ||
1332 | |||
1333 | if (nbytes > size) | ||
1334 | nbytes = size; | ||
1335 | |||
1336 | lock_page(page); | ||
1337 | |||
1338 | size -= nbytes; | ||
1339 | offset = 0; | ||
1340 | } | ||
1341 | offset = pb->pb_offset; | ||
1342 | size = pb->pb_count_desired; | ||
1343 | } | ||
1344 | |||
1345 | next_chunk: | ||
1346 | atomic_inc(&pb->pb_io_remaining); | ||
1347 | nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); | ||
1348 | if (nr_pages > total_nr_pages) | ||
1349 | nr_pages = total_nr_pages; | ||
1350 | |||
1351 | bio = bio_alloc(GFP_NOIO, nr_pages); | ||
1352 | bio->bi_bdev = pb->pb_target->pbr_bdev; | ||
1353 | bio->bi_sector = sector; | ||
1354 | bio->bi_end_io = bio_end_io_pagebuf; | ||
1355 | bio->bi_private = pb; | ||
1356 | |||
1357 | for (; size && nr_pages; nr_pages--, map_i++) { | ||
1358 | int nbytes = PAGE_CACHE_SIZE - offset; | ||
1359 | |||
1360 | if (nbytes > size) | ||
1361 | nbytes = size; | ||
1362 | |||
1363 | if (bio_add_page(bio, pb->pb_pages[map_i], | ||
1364 | nbytes, offset) < nbytes) | ||
1365 | break; | ||
1366 | |||
1367 | offset = 0; | ||
1368 | sector += nbytes >> BBSHIFT; | ||
1369 | size -= nbytes; | ||
1370 | total_nr_pages--; | ||
1371 | } | ||
1372 | |||
1373 | submit_io: | ||
1374 | if (likely(bio->bi_size)) { | ||
1375 | submit_bio(rw, bio); | ||
1376 | if (size) | ||
1377 | goto next_chunk; | ||
1378 | } else { | ||
1379 | bio_put(bio); | ||
1380 | pagebuf_ioerror(pb, EIO); | ||
1381 | } | ||
1382 | } | ||
1383 | |||
1384 | /* | ||
1385 | * pagebuf_iorequest -- the core I/O request routine. | ||
1386 | */ | ||
1387 | int | ||
1388 | pagebuf_iorequest( /* start real I/O */ | ||
1389 | xfs_buf_t *pb) /* buffer to convey to device */ | ||
1390 | { | ||
1391 | PB_TRACE(pb, "iorequest", 0); | ||
1392 | |||
1393 | if (pb->pb_flags & PBF_DELWRI) { | ||
1394 | pagebuf_delwri_queue(pb, 1); | ||
1395 | return 0; | ||
1396 | } | ||
1397 | |||
1398 | if (pb->pb_flags & PBF_WRITE) { | ||
1399 | _pagebuf_wait_unpin(pb); | ||
1400 | } | ||
1401 | |||
1402 | pagebuf_hold(pb); | ||
1403 | |||
1404 | /* Set the count to 1 initially, this will stop an I/O | ||
1405 | * completion callout which happens before we have started | ||
1406 | * all the I/O from calling pagebuf_iodone too early. | ||
1407 | */ | ||
1408 | atomic_set(&pb->pb_io_remaining, 1); | ||
1409 | _pagebuf_ioapply(pb); | ||
1410 | _pagebuf_iodone(pb, 0); | ||
1411 | |||
1412 | pagebuf_rele(pb); | ||
1413 | return 0; | ||
1414 | } | ||
1415 | |||
1416 | /* | ||
1417 | * pagebuf_iowait | ||
1418 | * | ||
1419 | * pagebuf_iowait waits for I/O to complete on the buffer supplied. | ||
1420 | * It returns immediately if no I/O is pending. In any case, it returns | ||
1421 | * the error code, if any, or 0 if there is no error. | ||
1422 | */ | ||
1423 | int | ||
1424 | pagebuf_iowait( | ||
1425 | xfs_buf_t *pb) | ||
1426 | { | ||
1427 | PB_TRACE(pb, "iowait", 0); | ||
1428 | if (atomic_read(&pb->pb_io_remaining)) | ||
1429 | blk_run_address_space(pb->pb_target->pbr_mapping); | ||
1430 | down(&pb->pb_iodonesema); | ||
1431 | PB_TRACE(pb, "iowaited", (long)pb->pb_error); | ||
1432 | return pb->pb_error; | ||
1433 | } | ||
1434 | |||
1435 | caddr_t | ||
1436 | pagebuf_offset( | ||
1437 | xfs_buf_t *pb, | ||
1438 | size_t offset) | ||
1439 | { | ||
1440 | struct page *page; | ||
1441 | |||
1442 | offset += pb->pb_offset; | ||
1443 | |||
1444 | page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; | ||
1445 | return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1)); | ||
1446 | } | ||
1447 | |||
1448 | /* | ||
1449 | * pagebuf_iomove | ||
1450 | * | ||
1451 | * Move data into or out of a buffer. | ||
1452 | */ | ||
1453 | void | ||
1454 | pagebuf_iomove( | ||
1455 | xfs_buf_t *pb, /* buffer to process */ | ||
1456 | size_t boff, /* starting buffer offset */ | ||
1457 | size_t bsize, /* length to copy */ | ||
1458 | caddr_t data, /* data address */ | ||
1459 | page_buf_rw_t mode) /* read/write flag */ | ||
1460 | { | ||
1461 | size_t bend, cpoff, csize; | ||
1462 | struct page *page; | ||
1463 | |||
1464 | bend = boff + bsize; | ||
1465 | while (boff < bend) { | ||
1466 | page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; | ||
1467 | cpoff = page_buf_poff(boff + pb->pb_offset); | ||
1468 | csize = min_t(size_t, | ||
1469 | PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); | ||
1470 | |||
1471 | ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); | ||
1472 | |||
1473 | switch (mode) { | ||
1474 | case PBRW_ZERO: | ||
1475 | memset(page_address(page) + cpoff, 0, csize); | ||
1476 | break; | ||
1477 | case PBRW_READ: | ||
1478 | memcpy(data, page_address(page) + cpoff, csize); | ||
1479 | break; | ||
1480 | case PBRW_WRITE: | ||
1481 | memcpy(page_address(page) + cpoff, data, csize); | ||
1482 | } | ||
1483 | |||
1484 | boff += csize; | ||
1485 | data += csize; | ||
1486 | } | ||
1487 | } | ||
1488 | |||
1489 | /* | ||
1490 | * Handling of buftargs. | ||
1491 | */ | ||
1492 | |||
1493 | /* | ||
1494 | * Wait for any bufs with callbacks that have been submitted but | ||
1495 | * have not yet returned... walk the hash list for the target. | ||
1496 | */ | ||
1497 | void | ||
1498 | xfs_wait_buftarg( | ||
1499 | xfs_buftarg_t *btp) | ||
1500 | { | ||
1501 | xfs_buf_t *bp, *n; | ||
1502 | xfs_bufhash_t *hash; | ||
1503 | uint i; | ||
1504 | |||
1505 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | ||
1506 | hash = &btp->bt_hash[i]; | ||
1507 | again: | ||
1508 | spin_lock(&hash->bh_lock); | ||
1509 | list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) { | ||
1510 | ASSERT(btp == bp->pb_target); | ||
1511 | if (!(bp->pb_flags & PBF_FS_MANAGED)) { | ||
1512 | spin_unlock(&hash->bh_lock); | ||
1513 | delay(100); | ||
1514 | goto again; | ||
1515 | } | ||
1516 | } | ||
1517 | spin_unlock(&hash->bh_lock); | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | /* | ||
1522 | * Allocate buffer hash table for a given target. | ||
1523 | * For devices containing metadata (i.e. not the log/realtime devices) | ||
1524 | * we need to allocate a much larger hash table. | ||
1525 | */ | ||
1526 | STATIC void | ||
1527 | xfs_alloc_bufhash( | ||
1528 | xfs_buftarg_t *btp, | ||
1529 | int external) | ||
1530 | { | ||
1531 | unsigned int i; | ||
1532 | |||
1533 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ | ||
1534 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; | ||
1535 | btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * | ||
1536 | sizeof(xfs_bufhash_t), KM_SLEEP); | ||
1537 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | ||
1538 | spin_lock_init(&btp->bt_hash[i].bh_lock); | ||
1539 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); | ||
1540 | } | ||
1541 | } | ||
1542 | |||
1543 | STATIC void | ||
1544 | xfs_free_bufhash( | ||
1545 | xfs_buftarg_t *btp) | ||
1546 | { | ||
1547 | kmem_free(btp->bt_hash, | ||
1548 | (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t)); | ||
1549 | btp->bt_hash = NULL; | ||
1550 | } | ||
1551 | |||
1552 | void | ||
1553 | xfs_free_buftarg( | ||
1554 | xfs_buftarg_t *btp, | ||
1555 | int external) | ||
1556 | { | ||
1557 | xfs_flush_buftarg(btp, 1); | ||
1558 | if (external) | ||
1559 | xfs_blkdev_put(btp->pbr_bdev); | ||
1560 | xfs_free_bufhash(btp); | ||
1561 | iput(btp->pbr_mapping->host); | ||
1562 | kmem_free(btp, sizeof(*btp)); | ||
1563 | } | ||
1564 | |||
1565 | void | ||
1566 | xfs_incore_relse( | ||
1567 | xfs_buftarg_t *btp, | ||
1568 | int delwri_only, | ||
1569 | int wait) | ||
1570 | { | ||
1571 | invalidate_bdev(btp->pbr_bdev, 1); | ||
1572 | truncate_inode_pages(btp->pbr_mapping, 0LL); | ||
1573 | } | ||
1574 | |||
1575 | STATIC int | ||
1576 | xfs_setsize_buftarg_flags( | ||
1577 | xfs_buftarg_t *btp, | ||
1578 | unsigned int blocksize, | ||
1579 | unsigned int sectorsize, | ||
1580 | int verbose) | ||
1581 | { | ||
1582 | btp->pbr_bsize = blocksize; | ||
1583 | btp->pbr_sshift = ffs(sectorsize) - 1; | ||
1584 | btp->pbr_smask = sectorsize - 1; | ||
1585 | |||
1586 | if (set_blocksize(btp->pbr_bdev, sectorsize)) { | ||
1587 | printk(KERN_WARNING | ||
1588 | "XFS: Cannot set_blocksize to %u on device %s\n", | ||
1589 | sectorsize, XFS_BUFTARG_NAME(btp)); | ||
1590 | return EINVAL; | ||
1591 | } | ||
1592 | |||
1593 | if (verbose && | ||
1594 | (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { | ||
1595 | printk(KERN_WARNING | ||
1596 | "XFS: %u byte sectors in use on device %s. " | ||
1597 | "This is suboptimal; %u or greater is ideal.\n", | ||
1598 | sectorsize, XFS_BUFTARG_NAME(btp), | ||
1599 | (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); | ||
1600 | } | ||
1601 | |||
1602 | return 0; | ||
1603 | } | ||
1604 | |||
1605 | /* | ||
1606 | * When allocating the initial buffer target we have not yet | ||
1607 | * read in the superblock, so don't know what sized sectors | ||
1608 | * are being used is at this early stage. Play safe. | ||
1609 | */ | ||
1610 | STATIC int | ||
1611 | xfs_setsize_buftarg_early( | ||
1612 | xfs_buftarg_t *btp, | ||
1613 | struct block_device *bdev) | ||
1614 | { | ||
1615 | return xfs_setsize_buftarg_flags(btp, | ||
1616 | PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0); | ||
1617 | } | ||
1618 | |||
1619 | int | ||
1620 | xfs_setsize_buftarg( | ||
1621 | xfs_buftarg_t *btp, | ||
1622 | unsigned int blocksize, | ||
1623 | unsigned int sectorsize) | ||
1624 | { | ||
1625 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); | ||
1626 | } | ||
1627 | |||
1628 | STATIC int | ||
1629 | xfs_mapping_buftarg( | ||
1630 | xfs_buftarg_t *btp, | ||
1631 | struct block_device *bdev) | ||
1632 | { | ||
1633 | struct backing_dev_info *bdi; | ||
1634 | struct inode *inode; | ||
1635 | struct address_space *mapping; | ||
1636 | static struct address_space_operations mapping_aops = { | ||
1637 | .sync_page = block_sync_page, | ||
1638 | }; | ||
1639 | |||
1640 | inode = new_inode(bdev->bd_inode->i_sb); | ||
1641 | if (!inode) { | ||
1642 | printk(KERN_WARNING | ||
1643 | "XFS: Cannot allocate mapping inode for device %s\n", | ||
1644 | XFS_BUFTARG_NAME(btp)); | ||
1645 | return ENOMEM; | ||
1646 | } | ||
1647 | inode->i_mode = S_IFBLK; | ||
1648 | inode->i_bdev = bdev; | ||
1649 | inode->i_rdev = bdev->bd_dev; | ||
1650 | bdi = blk_get_backing_dev_info(bdev); | ||
1651 | if (!bdi) | ||
1652 | bdi = &default_backing_dev_info; | ||
1653 | mapping = &inode->i_data; | ||
1654 | mapping->a_ops = &mapping_aops; | ||
1655 | mapping->backing_dev_info = bdi; | ||
1656 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
1657 | btp->pbr_mapping = mapping; | ||
1658 | return 0; | ||
1659 | } | ||
1660 | |||
1661 | xfs_buftarg_t * | ||
1662 | xfs_alloc_buftarg( | ||
1663 | struct block_device *bdev, | ||
1664 | int external) | ||
1665 | { | ||
1666 | xfs_buftarg_t *btp; | ||
1667 | |||
1668 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); | ||
1669 | |||
1670 | btp->pbr_dev = bdev->bd_dev; | ||
1671 | btp->pbr_bdev = bdev; | ||
1672 | if (xfs_setsize_buftarg_early(btp, bdev)) | ||
1673 | goto error; | ||
1674 | if (xfs_mapping_buftarg(btp, bdev)) | ||
1675 | goto error; | ||
1676 | xfs_alloc_bufhash(btp, external); | ||
1677 | return btp; | ||
1678 | |||
1679 | error: | ||
1680 | kmem_free(btp, sizeof(*btp)); | ||
1681 | return NULL; | ||
1682 | } | ||
1683 | |||
1684 | |||
1685 | /* | ||
1686 | * Pagebuf delayed write buffer handling | ||
1687 | */ | ||
1688 | |||
1689 | STATIC LIST_HEAD(pbd_delwrite_queue); | ||
1690 | STATIC DEFINE_SPINLOCK(pbd_delwrite_lock); | ||
1691 | |||
1692 | STATIC void | ||
1693 | pagebuf_delwri_queue( | ||
1694 | xfs_buf_t *pb, | ||
1695 | int unlock) | ||
1696 | { | ||
1697 | PB_TRACE(pb, "delwri_q", (long)unlock); | ||
1698 | ASSERT(pb->pb_flags & PBF_DELWRI); | ||
1699 | |||
1700 | spin_lock(&pbd_delwrite_lock); | ||
1701 | /* If already in the queue, dequeue and place at tail */ | ||
1702 | if (!list_empty(&pb->pb_list)) { | ||
1703 | if (unlock) { | ||
1704 | atomic_dec(&pb->pb_hold); | ||
1705 | } | ||
1706 | list_del(&pb->pb_list); | ||
1707 | } | ||
1708 | |||
1709 | list_add_tail(&pb->pb_list, &pbd_delwrite_queue); | ||
1710 | pb->pb_queuetime = jiffies; | ||
1711 | spin_unlock(&pbd_delwrite_lock); | ||
1712 | |||
1713 | if (unlock) | ||
1714 | pagebuf_unlock(pb); | ||
1715 | } | ||
1716 | |||
1717 | void | ||
1718 | pagebuf_delwri_dequeue( | ||
1719 | xfs_buf_t *pb) | ||
1720 | { | ||
1721 | int dequeued = 0; | ||
1722 | |||
1723 | spin_lock(&pbd_delwrite_lock); | ||
1724 | if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { | ||
1725 | list_del_init(&pb->pb_list); | ||
1726 | dequeued = 1; | ||
1727 | } | ||
1728 | pb->pb_flags &= ~PBF_DELWRI; | ||
1729 | spin_unlock(&pbd_delwrite_lock); | ||
1730 | |||
1731 | if (dequeued) | ||
1732 | pagebuf_rele(pb); | ||
1733 | |||
1734 | PB_TRACE(pb, "delwri_dq", (long)dequeued); | ||
1735 | } | ||
1736 | |||
1737 | STATIC void | ||
1738 | pagebuf_runall_queues( | ||
1739 | struct workqueue_struct *queue) | ||
1740 | { | ||
1741 | flush_workqueue(queue); | ||
1742 | } | ||
1743 | |||
1744 | /* Defines for pagebuf daemon */ | ||
1745 | STATIC DECLARE_COMPLETION(pagebuf_daemon_done); | ||
1746 | STATIC struct task_struct *pagebuf_daemon_task; | ||
1747 | STATIC int pagebuf_daemon_active; | ||
1748 | STATIC int force_flush; | ||
1749 | |||
1750 | |||
1751 | STATIC int | ||
1752 | pagebuf_daemon_wakeup( | ||
1753 | int priority, | ||
1754 | unsigned int mask) | ||
1755 | { | ||
1756 | force_flush = 1; | ||
1757 | barrier(); | ||
1758 | wake_up_process(pagebuf_daemon_task); | ||
1759 | return 0; | ||
1760 | } | ||
1761 | |||
1762 | STATIC int | ||
1763 | pagebuf_daemon( | ||
1764 | void *data) | ||
1765 | { | ||
1766 | struct list_head tmp; | ||
1767 | unsigned long age; | ||
1768 | xfs_buftarg_t *target; | ||
1769 | xfs_buf_t *pb, *n; | ||
1770 | |||
1771 | /* Set up the thread */ | ||
1772 | daemonize("xfsbufd"); | ||
1773 | current->flags |= PF_MEMALLOC; | ||
1774 | |||
1775 | pagebuf_daemon_task = current; | ||
1776 | pagebuf_daemon_active = 1; | ||
1777 | barrier(); | ||
1778 | |||
1779 | INIT_LIST_HEAD(&tmp); | ||
1780 | do { | ||
1781 | try_to_freeze(PF_FREEZE); | ||
1782 | |||
1783 | set_current_state(TASK_INTERRUPTIBLE); | ||
1784 | schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100); | ||
1785 | |||
1786 | age = (xfs_buf_age_centisecs * HZ) / 100; | ||
1787 | spin_lock(&pbd_delwrite_lock); | ||
1788 | list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { | ||
1789 | PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); | ||
1790 | ASSERT(pb->pb_flags & PBF_DELWRI); | ||
1791 | |||
1792 | if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { | ||
1793 | if (!force_flush && | ||
1794 | time_before(jiffies, | ||
1795 | pb->pb_queuetime + age)) { | ||
1796 | pagebuf_unlock(pb); | ||
1797 | break; | ||
1798 | } | ||
1799 | |||
1800 | pb->pb_flags &= ~PBF_DELWRI; | ||
1801 | pb->pb_flags |= PBF_WRITE; | ||
1802 | list_move(&pb->pb_list, &tmp); | ||
1803 | } | ||
1804 | } | ||
1805 | spin_unlock(&pbd_delwrite_lock); | ||
1806 | |||
1807 | while (!list_empty(&tmp)) { | ||
1808 | pb = list_entry(tmp.next, xfs_buf_t, pb_list); | ||
1809 | target = pb->pb_target; | ||
1810 | |||
1811 | list_del_init(&pb->pb_list); | ||
1812 | pagebuf_iostrategy(pb); | ||
1813 | |||
1814 | blk_run_address_space(target->pbr_mapping); | ||
1815 | } | ||
1816 | |||
1817 | if (as_list_len > 0) | ||
1818 | purge_addresses(); | ||
1819 | |||
1820 | force_flush = 0; | ||
1821 | } while (pagebuf_daemon_active); | ||
1822 | |||
1823 | complete_and_exit(&pagebuf_daemon_done, 0); | ||
1824 | } | ||
1825 | |||
1826 | /* | ||
1827 | * Go through all incore buffers, and release buffers if they belong to | ||
1828 | * the given device. This is used in filesystem error handling to | ||
1829 | * preserve the consistency of its metadata. | ||
1830 | */ | ||
1831 | int | ||
1832 | xfs_flush_buftarg( | ||
1833 | xfs_buftarg_t *target, | ||
1834 | int wait) | ||
1835 | { | ||
1836 | struct list_head tmp; | ||
1837 | xfs_buf_t *pb, *n; | ||
1838 | int pincount = 0; | ||
1839 | |||
1840 | pagebuf_runall_queues(pagebuf_dataio_workqueue); | ||
1841 | pagebuf_runall_queues(pagebuf_logio_workqueue); | ||
1842 | |||
1843 | INIT_LIST_HEAD(&tmp); | ||
1844 | spin_lock(&pbd_delwrite_lock); | ||
1845 | list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { | ||
1846 | |||
1847 | if (pb->pb_target != target) | ||
1848 | continue; | ||
1849 | |||
1850 | ASSERT(pb->pb_flags & PBF_DELWRI); | ||
1851 | PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); | ||
1852 | if (pagebuf_ispin(pb)) { | ||
1853 | pincount++; | ||
1854 | continue; | ||
1855 | } | ||
1856 | |||
1857 | pb->pb_flags &= ~PBF_DELWRI; | ||
1858 | pb->pb_flags |= PBF_WRITE; | ||
1859 | list_move(&pb->pb_list, &tmp); | ||
1860 | } | ||
1861 | spin_unlock(&pbd_delwrite_lock); | ||
1862 | |||
1863 | /* | ||
1864 | * Dropped the delayed write list lock, now walk the temporary list | ||
1865 | */ | ||
1866 | list_for_each_entry_safe(pb, n, &tmp, pb_list) { | ||
1867 | if (wait) | ||
1868 | pb->pb_flags &= ~PBF_ASYNC; | ||
1869 | else | ||
1870 | list_del_init(&pb->pb_list); | ||
1871 | |||
1872 | pagebuf_lock(pb); | ||
1873 | pagebuf_iostrategy(pb); | ||
1874 | } | ||
1875 | |||
1876 | /* | ||
1877 | * Remaining list items must be flushed before returning | ||
1878 | */ | ||
1879 | while (!list_empty(&tmp)) { | ||
1880 | pb = list_entry(tmp.next, xfs_buf_t, pb_list); | ||
1881 | |||
1882 | list_del_init(&pb->pb_list); | ||
1883 | xfs_iowait(pb); | ||
1884 | xfs_buf_relse(pb); | ||
1885 | } | ||
1886 | |||
1887 | if (wait) | ||
1888 | blk_run_address_space(target->pbr_mapping); | ||
1889 | |||
1890 | return pincount; | ||
1891 | } | ||
1892 | |||
1893 | STATIC int | ||
1894 | pagebuf_daemon_start(void) | ||
1895 | { | ||
1896 | int rval; | ||
1897 | |||
1898 | pagebuf_logio_workqueue = create_workqueue("xfslogd"); | ||
1899 | if (!pagebuf_logio_workqueue) | ||
1900 | return -ENOMEM; | ||
1901 | |||
1902 | pagebuf_dataio_workqueue = create_workqueue("xfsdatad"); | ||
1903 | if (!pagebuf_dataio_workqueue) { | ||
1904 | destroy_workqueue(pagebuf_logio_workqueue); | ||
1905 | return -ENOMEM; | ||
1906 | } | ||
1907 | |||
1908 | rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES); | ||
1909 | if (rval < 0) { | ||
1910 | destroy_workqueue(pagebuf_logio_workqueue); | ||
1911 | destroy_workqueue(pagebuf_dataio_workqueue); | ||
1912 | } | ||
1913 | |||
1914 | return rval; | ||
1915 | } | ||
1916 | |||
1917 | /* | ||
1918 | * pagebuf_daemon_stop | ||
1919 | * | ||
1920 | * Note: do not mark as __exit, it is called from pagebuf_terminate. | ||
1921 | */ | ||
1922 | STATIC void | ||
1923 | pagebuf_daemon_stop(void) | ||
1924 | { | ||
1925 | pagebuf_daemon_active = 0; | ||
1926 | barrier(); | ||
1927 | wait_for_completion(&pagebuf_daemon_done); | ||
1928 | |||
1929 | destroy_workqueue(pagebuf_logio_workqueue); | ||
1930 | destroy_workqueue(pagebuf_dataio_workqueue); | ||
1931 | } | ||
1932 | |||
1933 | /* | ||
1934 | * Initialization and Termination | ||
1935 | */ | ||
1936 | |||
1937 | int __init | ||
1938 | pagebuf_init(void) | ||
1939 | { | ||
1940 | pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0, | ||
1941 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
1942 | if (pagebuf_cache == NULL) { | ||
1943 | printk("XFS: couldn't init xfs_buf_t cache\n"); | ||
1944 | pagebuf_terminate(); | ||
1945 | return -ENOMEM; | ||
1946 | } | ||
1947 | |||
1948 | #ifdef PAGEBUF_TRACE | ||
1949 | pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); | ||
1950 | #endif | ||
1951 | |||
1952 | pagebuf_daemon_start(); | ||
1953 | |||
1954 | pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup); | ||
1955 | if (pagebuf_shake == NULL) { | ||
1956 | pagebuf_terminate(); | ||
1957 | return -ENOMEM; | ||
1958 | } | ||
1959 | |||
1960 | return 0; | ||
1961 | } | ||
1962 | |||
1963 | |||
1964 | /* | ||
1965 | * pagebuf_terminate. | ||
1966 | * | ||
1967 | * Note: do not mark as __exit, this is also called from the __init code. | ||
1968 | */ | ||
1969 | void | ||
1970 | pagebuf_terminate(void) | ||
1971 | { | ||
1972 | pagebuf_daemon_stop(); | ||
1973 | |||
1974 | #ifdef PAGEBUF_TRACE | ||
1975 | ktrace_free(pagebuf_trace_buf); | ||
1976 | #endif | ||
1977 | |||
1978 | kmem_zone_destroy(pagebuf_cache); | ||
1979 | kmem_shake_deregister(pagebuf_shake); | ||
1980 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h new file mode 100644 index 000000000000..74deed8e6d90 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -0,0 +1,591 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI | ||
35 | */ | ||
36 | |||
37 | #ifndef __XFS_BUF_H__ | ||
38 | #define __XFS_BUF_H__ | ||
39 | |||
40 | #include <linux/config.h> | ||
41 | #include <linux/list.h> | ||
42 | #include <linux/types.h> | ||
43 | #include <linux/spinlock.h> | ||
44 | #include <asm/system.h> | ||
45 | #include <linux/mm.h> | ||
46 | #include <linux/fs.h> | ||
47 | #include <linux/buffer_head.h> | ||
48 | #include <linux/uio.h> | ||
49 | |||
50 | /* | ||
51 | * Base types | ||
52 | */ | ||
53 | |||
54 | #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) | ||
55 | |||
56 | #define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) | ||
57 | #define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) | ||
58 | #define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) | ||
59 | #define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) | ||
60 | |||
61 | typedef enum page_buf_rw_e { | ||
62 | PBRW_READ = 1, /* transfer into target memory */ | ||
63 | PBRW_WRITE = 2, /* transfer from target memory */ | ||
64 | PBRW_ZERO = 3 /* Zero target memory */ | ||
65 | } page_buf_rw_t; | ||
66 | |||
67 | |||
68 | typedef enum page_buf_flags_e { /* pb_flags values */ | ||
69 | PBF_READ = (1 << 0), /* buffer intended for reading from device */ | ||
70 | PBF_WRITE = (1 << 1), /* buffer intended for writing to device */ | ||
71 | PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */ | ||
72 | PBF_PARTIAL = (1 << 3), /* buffer partially read */ | ||
73 | PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ | ||
74 | PBF_NONE = (1 << 5), /* buffer not read at all */ | ||
75 | PBF_DELWRI = (1 << 6), /* buffer has dirty pages */ | ||
76 | PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ | ||
77 | PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ | ||
78 | PBF_FS_DATAIOD = (1 << 9), /* schedule IO completion on fs datad */ | ||
79 | PBF_FORCEIO = (1 << 10), /* ignore any cache state */ | ||
80 | PBF_FLUSH = (1 << 11), /* flush disk write cache */ | ||
81 | PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ | ||
82 | |||
83 | /* flags used only as arguments to access routines */ | ||
84 | PBF_LOCK = (1 << 14), /* lock requested */ | ||
85 | PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ | ||
86 | PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ | ||
87 | |||
88 | /* flags used only internally */ | ||
89 | _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ | ||
90 | _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ | ||
91 | _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ | ||
92 | } page_buf_flags_t; | ||
93 | |||
94 | #define PBF_UPDATE (PBF_READ | PBF_WRITE) | ||
95 | #define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0) | ||
96 | #define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0) | ||
97 | |||
98 | typedef struct xfs_bufhash { | ||
99 | struct list_head bh_list; | ||
100 | spinlock_t bh_lock; | ||
101 | } xfs_bufhash_t; | ||
102 | |||
103 | typedef struct xfs_buftarg { | ||
104 | dev_t pbr_dev; | ||
105 | struct block_device *pbr_bdev; | ||
106 | struct address_space *pbr_mapping; | ||
107 | unsigned int pbr_bsize; | ||
108 | unsigned int pbr_sshift; | ||
109 | size_t pbr_smask; | ||
110 | |||
111 | /* per-device buffer hash table */ | ||
112 | uint bt_hashmask; | ||
113 | uint bt_hashshift; | ||
114 | xfs_bufhash_t *bt_hash; | ||
115 | } xfs_buftarg_t; | ||
116 | |||
117 | /* | ||
118 | * xfs_buf_t: Buffer structure for page cache-based buffers | ||
119 | * | ||
120 | * This buffer structure is used by the page cache buffer management routines | ||
121 | * to refer to an assembly of pages forming a logical buffer. The actual I/O | ||
122 | * is performed with buffer_head structures, as required by drivers. | ||
123 | * | ||
124 | * The buffer structure is used on temporary basis only, and discarded when | ||
125 | * released. The real data storage is recorded in the page cache. Metadata is | ||
126 | * hashed to the block device on which the file system resides. | ||
127 | */ | ||
128 | |||
129 | struct xfs_buf; | ||
130 | |||
131 | /* call-back function on I/O completion */ | ||
132 | typedef void (*page_buf_iodone_t)(struct xfs_buf *); | ||
133 | /* call-back function on I/O completion */ | ||
134 | typedef void (*page_buf_relse_t)(struct xfs_buf *); | ||
135 | /* pre-write function */ | ||
136 | typedef int (*page_buf_bdstrat_t)(struct xfs_buf *); | ||
137 | |||
138 | #define PB_PAGES 2 | ||
139 | |||
140 | typedef struct xfs_buf { | ||
141 | struct semaphore pb_sema; /* semaphore for lockables */ | ||
142 | unsigned long pb_queuetime; /* time buffer was queued */ | ||
143 | atomic_t pb_pin_count; /* pin count */ | ||
144 | wait_queue_head_t pb_waiters; /* unpin waiters */ | ||
145 | struct list_head pb_list; | ||
146 | page_buf_flags_t pb_flags; /* status flags */ | ||
147 | struct list_head pb_hash_list; /* hash table list */ | ||
148 | xfs_bufhash_t *pb_hash; /* hash table list start */ | ||
149 | xfs_buftarg_t *pb_target; /* buffer target (device) */ | ||
150 | atomic_t pb_hold; /* reference count */ | ||
151 | xfs_daddr_t pb_bn; /* block number for I/O */ | ||
152 | loff_t pb_file_offset; /* offset in file */ | ||
153 | size_t pb_buffer_length; /* size of buffer in bytes */ | ||
154 | size_t pb_count_desired; /* desired transfer size */ | ||
155 | void *pb_addr; /* virtual address of buffer */ | ||
156 | struct work_struct pb_iodone_work; | ||
157 | atomic_t pb_io_remaining;/* #outstanding I/O requests */ | ||
158 | page_buf_iodone_t pb_iodone; /* I/O completion function */ | ||
159 | page_buf_relse_t pb_relse; /* releasing function */ | ||
160 | page_buf_bdstrat_t pb_strat; /* pre-write function */ | ||
161 | struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */ | ||
162 | void *pb_fspriv; | ||
163 | void *pb_fspriv2; | ||
164 | void *pb_fspriv3; | ||
165 | unsigned short pb_error; /* error code on I/O */ | ||
166 | unsigned short pb_locked; /* page array is locked */ | ||
167 | unsigned int pb_page_count; /* size of page array */ | ||
168 | unsigned int pb_offset; /* page offset in first page */ | ||
169 | struct page **pb_pages; /* array of page pointers */ | ||
170 | struct page *pb_page_array[PB_PAGES]; /* inline pages */ | ||
171 | #ifdef PAGEBUF_LOCK_TRACKING | ||
172 | int pb_last_holder; | ||
173 | #endif | ||
174 | } xfs_buf_t; | ||
175 | |||
176 | |||
177 | /* Finding and Reading Buffers */ | ||
178 | |||
179 | extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */ | ||
180 | /* the block is in memory */ | ||
181 | xfs_buftarg_t *, /* inode for block */ | ||
182 | loff_t, /* starting offset of range */ | ||
183 | size_t, /* length of range */ | ||
184 | page_buf_flags_t, /* PBF_LOCK */ | ||
185 | xfs_buf_t *); /* newly allocated buffer */ | ||
186 | |||
187 | #define xfs_incore(buftarg,blkno,len,lockit) \ | ||
188 | _pagebuf_find(buftarg, blkno ,len, lockit, NULL) | ||
189 | |||
190 | extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */ | ||
191 | xfs_buftarg_t *, /* inode for buffer */ | ||
192 | loff_t, /* starting offset of range */ | ||
193 | size_t, /* length of range */ | ||
194 | page_buf_flags_t); /* PBF_LOCK, PBF_READ, */ | ||
195 | /* PBF_ASYNC */ | ||
196 | |||
197 | #define xfs_buf_get(target, blkno, len, flags) \ | ||
198 | xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) | ||
199 | |||
200 | extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */ | ||
201 | xfs_buftarg_t *, /* inode for buffer */ | ||
202 | loff_t, /* starting offset of range */ | ||
203 | size_t, /* length of range */ | ||
204 | page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */ | ||
205 | |||
206 | #define xfs_buf_read(target, blkno, len, flags) \ | ||
207 | xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) | ||
208 | |||
209 | extern xfs_buf_t *pagebuf_lookup( | ||
210 | xfs_buftarg_t *, | ||
211 | loff_t, /* starting offset of range */ | ||
212 | size_t, /* length of range */ | ||
213 | page_buf_flags_t); /* PBF_READ, PBF_WRITE, */ | ||
214 | /* PBF_FORCEIO, */ | ||
215 | |||
216 | extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ | ||
217 | /* no memory or disk address */ | ||
218 | size_t len, | ||
219 | xfs_buftarg_t *); /* mount point "fake" inode */ | ||
220 | |||
221 | extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */ | ||
222 | /* without disk address */ | ||
223 | size_t len, | ||
224 | xfs_buftarg_t *); /* mount point "fake" inode */ | ||
225 | |||
226 | extern int pagebuf_associate_memory( | ||
227 | xfs_buf_t *, | ||
228 | void *, | ||
229 | size_t); | ||
230 | |||
231 | extern void pagebuf_hold( /* increment reference count */ | ||
232 | xfs_buf_t *); /* buffer to hold */ | ||
233 | |||
234 | extern void pagebuf_readahead( /* read ahead into cache */ | ||
235 | xfs_buftarg_t *, /* target for buffer (or NULL) */ | ||
236 | loff_t, /* starting offset of range */ | ||
237 | size_t, /* length of range */ | ||
238 | page_buf_flags_t); /* additional read flags */ | ||
239 | |||
240 | /* Releasing Buffers */ | ||
241 | |||
242 | extern void pagebuf_free( /* deallocate a buffer */ | ||
243 | xfs_buf_t *); /* buffer to deallocate */ | ||
244 | |||
245 | extern void pagebuf_rele( /* release hold on a buffer */ | ||
246 | xfs_buf_t *); /* buffer to release */ | ||
247 | |||
248 | /* Locking and Unlocking Buffers */ | ||
249 | |||
250 | extern int pagebuf_cond_lock( /* lock buffer, if not locked */ | ||
251 | /* (returns -EBUSY if locked) */ | ||
252 | xfs_buf_t *); /* buffer to lock */ | ||
253 | |||
254 | extern int pagebuf_lock_value( /* return count on lock */ | ||
255 | xfs_buf_t *); /* buffer to check */ | ||
256 | |||
257 | extern int pagebuf_lock( /* lock buffer */ | ||
258 | xfs_buf_t *); /* buffer to lock */ | ||
259 | |||
260 | extern void pagebuf_unlock( /* unlock buffer */ | ||
261 | xfs_buf_t *); /* buffer to unlock */ | ||
262 | |||
263 | /* Buffer Read and Write Routines */ | ||
264 | |||
265 | extern void pagebuf_iodone( /* mark buffer I/O complete */ | ||
266 | xfs_buf_t *, /* buffer to mark */ | ||
267 | int, /* use data/log helper thread. */ | ||
268 | int); /* run completion locally, or in | ||
269 | * a helper thread. */ | ||
270 | |||
271 | extern void pagebuf_ioerror( /* mark buffer in error (or not) */ | ||
272 | xfs_buf_t *, /* buffer to mark */ | ||
273 | int); /* error to store (0 if none) */ | ||
274 | |||
275 | extern int pagebuf_iostart( /* start I/O on a buffer */ | ||
276 | xfs_buf_t *, /* buffer to start */ | ||
277 | page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */ | ||
278 | /* PBF_READ, PBF_WRITE, */ | ||
279 | /* PBF_DELWRI */ | ||
280 | |||
281 | extern int pagebuf_iorequest( /* start real I/O */ | ||
282 | xfs_buf_t *); /* buffer to convey to device */ | ||
283 | |||
284 | extern int pagebuf_iowait( /* wait for buffer I/O done */ | ||
285 | xfs_buf_t *); /* buffer to wait on */ | ||
286 | |||
287 | extern void pagebuf_iomove( /* move data in/out of pagebuf */ | ||
288 | xfs_buf_t *, /* buffer to manipulate */ | ||
289 | size_t, /* starting buffer offset */ | ||
290 | size_t, /* length in buffer */ | ||
291 | caddr_t, /* data pointer */ | ||
292 | page_buf_rw_t); /* direction */ | ||
293 | |||
294 | static inline int pagebuf_iostrategy(xfs_buf_t *pb) | ||
295 | { | ||
296 | return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb); | ||
297 | } | ||
298 | |||
299 | static inline int pagebuf_geterror(xfs_buf_t *pb) | ||
300 | { | ||
301 | return pb ? pb->pb_error : ENOMEM; | ||
302 | } | ||
303 | |||
304 | /* Buffer Utility Routines */ | ||
305 | |||
306 | extern caddr_t pagebuf_offset( /* pointer at offset in buffer */ | ||
307 | xfs_buf_t *, /* buffer to offset into */ | ||
308 | size_t); /* offset */ | ||
309 | |||
310 | /* Pinning Buffer Storage in Memory */ | ||
311 | |||
312 | extern void pagebuf_pin( /* pin buffer in memory */ | ||
313 | xfs_buf_t *); /* buffer to pin */ | ||
314 | |||
315 | extern void pagebuf_unpin( /* unpin buffered data */ | ||
316 | xfs_buf_t *); /* buffer to unpin */ | ||
317 | |||
318 | extern int pagebuf_ispin( /* check if buffer is pinned */ | ||
319 | xfs_buf_t *); /* buffer to check */ | ||
320 | |||
321 | /* Delayed Write Buffer Routines */ | ||
322 | |||
323 | extern void pagebuf_delwri_dequeue(xfs_buf_t *); | ||
324 | |||
325 | /* Buffer Daemon Setup Routines */ | ||
326 | |||
327 | extern int pagebuf_init(void); | ||
328 | extern void pagebuf_terminate(void); | ||
329 | |||
330 | |||
331 | #ifdef PAGEBUF_TRACE | ||
332 | extern ktrace_t *pagebuf_trace_buf; | ||
333 | extern void pagebuf_trace( | ||
334 | xfs_buf_t *, /* buffer being traced */ | ||
335 | char *, /* description of operation */ | ||
336 | void *, /* arbitrary diagnostic value */ | ||
337 | void *); /* return address */ | ||
338 | #else | ||
339 | # define pagebuf_trace(pb, id, ptr, ra) do { } while (0) | ||
340 | #endif | ||
341 | |||
342 | #define pagebuf_target_name(target) \ | ||
343 | ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; }) | ||
344 | |||
345 | |||
346 | |||
347 | |||
348 | |||
349 | /* These are just for xfs_syncsub... it sets an internal variable | ||
350 | * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t | ||
351 | */ | ||
352 | #define XFS_B_ASYNC PBF_ASYNC | ||
353 | #define XFS_B_DELWRI PBF_DELWRI | ||
354 | #define XFS_B_READ PBF_READ | ||
355 | #define XFS_B_WRITE PBF_WRITE | ||
356 | #define XFS_B_STALE PBF_STALE | ||
357 | |||
358 | #define XFS_BUF_TRYLOCK PBF_TRYLOCK | ||
359 | #define XFS_INCORE_TRYLOCK PBF_TRYLOCK | ||
360 | #define XFS_BUF_LOCK PBF_LOCK | ||
361 | #define XFS_BUF_MAPPED PBF_MAPPED | ||
362 | |||
363 | #define BUF_BUSY PBF_DONT_BLOCK | ||
364 | |||
365 | #define XFS_BUF_BFLAGS(x) ((x)->pb_flags) | ||
366 | #define XFS_BUF_ZEROFLAGS(x) \ | ||
367 | ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI)) | ||
368 | |||
369 | #define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE) | ||
370 | #define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE) | ||
371 | #define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE) | ||
372 | #define XFS_BUF_SUPER_STALE(x) do { \ | ||
373 | XFS_BUF_STALE(x); \ | ||
374 | pagebuf_delwri_dequeue(x); \ | ||
375 | XFS_BUF_DONE(x); \ | ||
376 | } while (0) | ||
377 | |||
378 | #define XFS_BUF_MANAGE PBF_FS_MANAGED | ||
379 | #define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED) | ||
380 | |||
381 | #define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI) | ||
382 | #define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x) | ||
383 | #define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI) | ||
384 | |||
385 | #define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no) | ||
386 | #define XFS_BUF_GETERROR(x) pagebuf_geterror(x) | ||
387 | #define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0) | ||
388 | |||
389 | #define XFS_BUF_DONE(x) ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE)) | ||
390 | #define XFS_BUF_UNDONE(x) ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE) | ||
391 | #define XFS_BUF_ISDONE(x) (!(PBF_NOT_DONE(x))) | ||
392 | |||
393 | #define XFS_BUF_BUSY(x) ((x)->pb_flags |= PBF_FORCEIO) | ||
394 | #define XFS_BUF_UNBUSY(x) ((x)->pb_flags &= ~PBF_FORCEIO) | ||
395 | #define XFS_BUF_ISBUSY(x) (1) | ||
396 | |||
397 | #define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC) | ||
398 | #define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC) | ||
399 | #define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC) | ||
400 | |||
401 | #define XFS_BUF_FLUSH(x) ((x)->pb_flags |= PBF_FLUSH) | ||
402 | #define XFS_BUF_UNFLUSH(x) ((x)->pb_flags &= ~PBF_FLUSH) | ||
403 | #define XFS_BUF_ISFLUSH(x) ((x)->pb_flags & PBF_FLUSH) | ||
404 | |||
405 | #define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n") | ||
406 | #define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n") | ||
407 | #define XFS_BUF_ISSHUT(x) (0) | ||
408 | |||
409 | #define XFS_BUF_HOLD(x) pagebuf_hold(x) | ||
410 | #define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ) | ||
411 | #define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ) | ||
412 | #define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ) | ||
413 | |||
414 | #define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE) | ||
415 | #define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE) | ||
416 | #define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE) | ||
417 | |||
418 | #define XFS_BUF_ISUNINITIAL(x) (0) | ||
419 | #define XFS_BUF_UNUNINITIAL(x) (0) | ||
420 | |||
421 | #define XFS_BUF_BP_ISMAPPED(bp) 1 | ||
422 | |||
423 | #define XFS_BUF_DATAIO(x) ((x)->pb_flags |= PBF_FS_DATAIOD) | ||
424 | #define XFS_BUF_UNDATAIO(x) ((x)->pb_flags &= ~PBF_FS_DATAIOD) | ||
425 | |||
426 | #define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone | ||
427 | #define XFS_BUF_SET_IODONE_FUNC(buf, func) \ | ||
428 | (buf)->pb_iodone = (func) | ||
429 | #define XFS_BUF_CLR_IODONE_FUNC(buf) \ | ||
430 | (buf)->pb_iodone = NULL | ||
431 | #define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \ | ||
432 | (buf)->pb_strat = (func) | ||
433 | #define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \ | ||
434 | (buf)->pb_strat = NULL | ||
435 | |||
436 | #define XFS_BUF_FSPRIVATE(buf, type) \ | ||
437 | ((type)(buf)->pb_fspriv) | ||
438 | #define XFS_BUF_SET_FSPRIVATE(buf, value) \ | ||
439 | (buf)->pb_fspriv = (void *)(value) | ||
440 | #define XFS_BUF_FSPRIVATE2(buf, type) \ | ||
441 | ((type)(buf)->pb_fspriv2) | ||
442 | #define XFS_BUF_SET_FSPRIVATE2(buf, value) \ | ||
443 | (buf)->pb_fspriv2 = (void *)(value) | ||
444 | #define XFS_BUF_FSPRIVATE3(buf, type) \ | ||
445 | ((type)(buf)->pb_fspriv3) | ||
446 | #define XFS_BUF_SET_FSPRIVATE3(buf, value) \ | ||
447 | (buf)->pb_fspriv3 = (void *)(value) | ||
448 | #define XFS_BUF_SET_START(buf) | ||
449 | |||
450 | #define XFS_BUF_SET_BRELSE_FUNC(buf, value) \ | ||
451 | (buf)->pb_relse = (value) | ||
452 | |||
453 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) | ||
454 | |||
455 | extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) | ||
456 | { | ||
457 | if (bp->pb_flags & PBF_MAPPED) | ||
458 | return XFS_BUF_PTR(bp) + offset; | ||
459 | return (xfs_caddr_t) pagebuf_offset(bp, offset); | ||
460 | } | ||
461 | |||
462 | #define XFS_BUF_SET_PTR(bp, val, count) \ | ||
463 | pagebuf_associate_memory(bp, val, count) | ||
464 | #define XFS_BUF_ADDR(bp) ((bp)->pb_bn) | ||
465 | #define XFS_BUF_SET_ADDR(bp, blk) \ | ||
466 | ((bp)->pb_bn = (xfs_daddr_t)(blk)) | ||
467 | #define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset) | ||
468 | #define XFS_BUF_SET_OFFSET(bp, off) \ | ||
469 | ((bp)->pb_file_offset = (off)) | ||
470 | #define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired) | ||
471 | #define XFS_BUF_SET_COUNT(bp, cnt) \ | ||
472 | ((bp)->pb_count_desired = (cnt)) | ||
473 | #define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length) | ||
474 | #define XFS_BUF_SET_SIZE(bp, cnt) \ | ||
475 | ((bp)->pb_buffer_length = (cnt)) | ||
476 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) | ||
477 | #define XFS_BUF_SET_VTYPE(bp, type) | ||
478 | #define XFS_BUF_SET_REF(bp, ref) | ||
479 | |||
480 | #define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp) | ||
481 | |||
482 | #define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp) | ||
483 | #define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0) | ||
484 | #define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp) | ||
485 | #define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp) | ||
486 | #define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema); | ||
487 | |||
488 | /* setup the buffer target from a buftarg structure */ | ||
489 | #define XFS_BUF_SET_TARGET(bp, target) \ | ||
490 | (bp)->pb_target = (target) | ||
491 | #define XFS_BUF_TARGET(bp) ((bp)->pb_target) | ||
492 | #define XFS_BUFTARG_NAME(target) \ | ||
493 | pagebuf_target_name(target) | ||
494 | |||
495 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) | ||
496 | #define XFS_BUF_SET_VTYPE(bp, type) | ||
497 | #define XFS_BUF_SET_REF(bp, ref) | ||
498 | |||
499 | static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) | ||
500 | { | ||
501 | bp->pb_fspriv3 = mp; | ||
502 | bp->pb_strat = xfs_bdstrat_cb; | ||
503 | pagebuf_delwri_dequeue(bp); | ||
504 | return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES); | ||
505 | } | ||
506 | |||
507 | static inline void xfs_buf_relse(xfs_buf_t *bp) | ||
508 | { | ||
509 | if (!bp->pb_relse) | ||
510 | pagebuf_unlock(bp); | ||
511 | pagebuf_rele(bp); | ||
512 | } | ||
513 | |||
514 | #define xfs_bpin(bp) pagebuf_pin(bp) | ||
515 | #define xfs_bunpin(bp) pagebuf_unpin(bp) | ||
516 | |||
517 | #define xfs_buftrace(id, bp) \ | ||
518 | pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) | ||
519 | |||
520 | #define xfs_biodone(pb) \ | ||
521 | pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0) | ||
522 | |||
523 | #define xfs_biomove(pb, off, len, data, rw) \ | ||
524 | pagebuf_iomove((pb), (off), (len), (data), \ | ||
525 | ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ) | ||
526 | |||
527 | #define xfs_biozero(pb, off, len) \ | ||
528 | pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO) | ||
529 | |||
530 | |||
531 | static inline int XFS_bwrite(xfs_buf_t *pb) | ||
532 | { | ||
533 | int iowait = (pb->pb_flags & PBF_ASYNC) == 0; | ||
534 | int error = 0; | ||
535 | |||
536 | if (!iowait) | ||
537 | pb->pb_flags |= _PBF_RUN_QUEUES; | ||
538 | |||
539 | pagebuf_delwri_dequeue(pb); | ||
540 | pagebuf_iostrategy(pb); | ||
541 | if (iowait) { | ||
542 | error = pagebuf_iowait(pb); | ||
543 | xfs_buf_relse(pb); | ||
544 | } | ||
545 | return error; | ||
546 | } | ||
547 | |||
548 | #define XFS_bdwrite(pb) \ | ||
549 | pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC) | ||
550 | |||
551 | static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) | ||
552 | { | ||
553 | bp->pb_strat = xfs_bdstrat_cb; | ||
554 | bp->pb_fspriv3 = mp; | ||
555 | |||
556 | return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC); | ||
557 | } | ||
558 | |||
559 | #define XFS_bdstrat(bp) pagebuf_iorequest(bp) | ||
560 | |||
561 | #define xfs_iowait(pb) pagebuf_iowait(pb) | ||
562 | |||
563 | #define xfs_baread(target, rablkno, ralen) \ | ||
564 | pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK) | ||
565 | |||
566 | #define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target)) | ||
567 | #define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target)) | ||
568 | #define xfs_buf_free(bp) pagebuf_free(bp) | ||
569 | |||
570 | |||
571 | /* | ||
572 | * Handling of buftargs. | ||
573 | */ | ||
574 | |||
575 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); | ||
576 | extern void xfs_free_buftarg(xfs_buftarg_t *, int); | ||
577 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | ||
578 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | ||
579 | extern void xfs_incore_relse(xfs_buftarg_t *, int, int); | ||
580 | extern int xfs_flush_buftarg(xfs_buftarg_t *, int); | ||
581 | |||
582 | #define xfs_getsize_buftarg(buftarg) \ | ||
583 | block_size((buftarg)->pbr_bdev) | ||
584 | #define xfs_readonly_buftarg(buftarg) \ | ||
585 | bdev_read_only((buftarg)->pbr_bdev) | ||
586 | #define xfs_binval(buftarg) \ | ||
587 | xfs_flush_buftarg(buftarg, 1) | ||
588 | #define XFS_bflush(buftarg) \ | ||
589 | xfs_flush_buftarg(buftarg, 1) | ||
590 | |||
591 | #endif /* __XFS_BUF_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h new file mode 100644 index 000000000000..00c45849d41a --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_cred.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_CRED_H__ | ||
33 | #define __XFS_CRED_H__ | ||
34 | |||
35 | /* | ||
36 | * Credentials | ||
37 | */ | ||
38 | typedef struct cred { | ||
39 | /* EMPTY */ | ||
40 | } cred_t; | ||
41 | |||
42 | extern struct cred *sys_cred; | ||
43 | |||
44 | /* this is a hack.. (assums sys_cred is the only cred_t in the system) */ | ||
45 | static __inline int capable_cred(cred_t *cr, int cid) | ||
46 | { | ||
47 | return (cr == sys_cred) ? 1 : capable(cid); | ||
48 | } | ||
49 | |||
50 | #endif /* __XFS_CRED_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c new file mode 100644 index 000000000000..f372a1a5e168 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -0,0 +1,205 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | #include "xfs_types.h" | ||
35 | #include "xfs_dmapi.h" | ||
36 | #include "xfs_log.h" | ||
37 | #include "xfs_trans.h" | ||
38 | #include "xfs_sb.h" | ||
39 | #include "xfs_dir.h" | ||
40 | #include "xfs_mount.h" | ||
41 | #include "xfs_export.h" | ||
42 | |||
43 | /* | ||
44 | * XFS encode and decodes the fileid portion of NFS filehandles | ||
45 | * itself instead of letting the generic NFS code do it. This | ||
46 | * allows filesystems with 64 bit inode numbers to be exported. | ||
47 | * | ||
48 | * Note that a side effect is that xfs_vget() won't be passed a | ||
49 | * zero inode/generation pair under normal circumstances. As | ||
50 | * however a malicious client could send us such data, the check | ||
51 | * remains in that code. | ||
52 | */ | ||
53 | |||
54 | |||
55 | STATIC struct dentry * | ||
56 | linvfs_decode_fh( | ||
57 | struct super_block *sb, | ||
58 | __u32 *fh, | ||
59 | int fh_len, | ||
60 | int fileid_type, | ||
61 | int (*acceptable)( | ||
62 | void *context, | ||
63 | struct dentry *de), | ||
64 | void *context) | ||
65 | { | ||
66 | xfs_fid2_t ifid; | ||
67 | xfs_fid2_t pfid; | ||
68 | void *parent = NULL; | ||
69 | int is64 = 0; | ||
70 | __u32 *p = fh; | ||
71 | |||
72 | #if XFS_BIG_INUMS | ||
73 | is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG); | ||
74 | fileid_type &= ~XFS_FILEID_TYPE_64FLAG; | ||
75 | #endif | ||
76 | |||
77 | /* | ||
78 | * Note that we only accept fileids which are long enough | ||
79 | * rather than allow the parent generation number to default | ||
80 | * to zero. XFS considers zero a valid generation number not | ||
81 | * an invalid/wildcard value. There's little point printk'ing | ||
82 | * a warning here as we don't have the client information | ||
83 | * which would make such a warning useful. | ||
84 | */ | ||
85 | if (fileid_type > 2 || | ||
86 | fh_len < xfs_fileid_length((fileid_type == 2), is64)) | ||
87 | return NULL; | ||
88 | |||
89 | p = xfs_fileid_decode_fid2(p, &ifid, is64); | ||
90 | |||
91 | if (fileid_type == 2) { | ||
92 | p = xfs_fileid_decode_fid2(p, &pfid, is64); | ||
93 | parent = &pfid; | ||
94 | } | ||
95 | |||
96 | fh = (__u32 *)&ifid; | ||
97 | return find_exported_dentry(sb, fh, parent, acceptable, context); | ||
98 | } | ||
99 | |||
100 | |||
101 | STATIC int | ||
102 | linvfs_encode_fh( | ||
103 | struct dentry *dentry, | ||
104 | __u32 *fh, | ||
105 | int *max_len, | ||
106 | int connectable) | ||
107 | { | ||
108 | struct inode *inode = dentry->d_inode; | ||
109 | int type = 1; | ||
110 | __u32 *p = fh; | ||
111 | int len; | ||
112 | int is64 = 0; | ||
113 | #if XFS_BIG_INUMS | ||
114 | vfs_t *vfs = LINVFS_GET_VFS(inode->i_sb); | ||
115 | xfs_mount_t *mp = XFS_VFSTOM(vfs); | ||
116 | |||
117 | if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) { | ||
118 | /* filesystem may contain 64bit inode numbers */ | ||
119 | is64 = XFS_FILEID_TYPE_64FLAG; | ||
120 | } | ||
121 | #endif | ||
122 | |||
123 | /* Directories don't need their parent encoded, they have ".." */ | ||
124 | if (S_ISDIR(inode->i_mode)) | ||
125 | connectable = 0; | ||
126 | |||
127 | /* | ||
128 | * Only encode if there is enough space given. In practice | ||
129 | * this means we can't export a filesystem with 64bit inodes | ||
130 | * over NFSv2 with the subtree_check export option; the other | ||
131 | * seven combinations work. The real answer is "don't use v2". | ||
132 | */ | ||
133 | len = xfs_fileid_length(connectable, is64); | ||
134 | if (*max_len < len) | ||
135 | return 255; | ||
136 | *max_len = len; | ||
137 | |||
138 | p = xfs_fileid_encode_inode(p, inode, is64); | ||
139 | if (connectable) { | ||
140 | spin_lock(&dentry->d_lock); | ||
141 | p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64); | ||
142 | spin_unlock(&dentry->d_lock); | ||
143 | type = 2; | ||
144 | } | ||
145 | BUG_ON((p - fh) != len); | ||
146 | return type | is64; | ||
147 | } | ||
148 | |||
149 | STATIC struct dentry * | ||
150 | linvfs_get_dentry( | ||
151 | struct super_block *sb, | ||
152 | void *data) | ||
153 | { | ||
154 | vnode_t *vp; | ||
155 | struct inode *inode; | ||
156 | struct dentry *result; | ||
157 | vfs_t *vfsp = LINVFS_GET_VFS(sb); | ||
158 | int error; | ||
159 | |||
160 | VFS_VGET(vfsp, &vp, (fid_t *)data, error); | ||
161 | if (error || vp == NULL) | ||
162 | return ERR_PTR(-ESTALE) ; | ||
163 | |||
164 | inode = LINVFS_GET_IP(vp); | ||
165 | result = d_alloc_anon(inode); | ||
166 | if (!result) { | ||
167 | iput(inode); | ||
168 | return ERR_PTR(-ENOMEM); | ||
169 | } | ||
170 | return result; | ||
171 | } | ||
172 | |||
173 | STATIC struct dentry * | ||
174 | linvfs_get_parent( | ||
175 | struct dentry *child) | ||
176 | { | ||
177 | int error; | ||
178 | vnode_t *vp, *cvp; | ||
179 | struct dentry *parent; | ||
180 | struct dentry dotdot; | ||
181 | |||
182 | dotdot.d_name.name = ".."; | ||
183 | dotdot.d_name.len = 2; | ||
184 | dotdot.d_inode = NULL; | ||
185 | |||
186 | cvp = NULL; | ||
187 | vp = LINVFS_GET_VP(child->d_inode); | ||
188 | VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error); | ||
189 | if (unlikely(error)) | ||
190 | return ERR_PTR(-error); | ||
191 | |||
192 | parent = d_alloc_anon(LINVFS_GET_IP(cvp)); | ||
193 | if (unlikely(!parent)) { | ||
194 | VN_RELE(cvp); | ||
195 | return ERR_PTR(-ENOMEM); | ||
196 | } | ||
197 | return parent; | ||
198 | } | ||
199 | |||
200 | struct export_operations linvfs_export_ops = { | ||
201 | .decode_fh = linvfs_decode_fh, | ||
202 | .encode_fh = linvfs_encode_fh, | ||
203 | .get_parent = linvfs_get_parent, | ||
204 | .get_dentry = linvfs_get_dentry, | ||
205 | }; | ||
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h new file mode 100644 index 000000000000..60b2abac1c18 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_export.h | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_EXPORT_H__ | ||
33 | #define __XFS_EXPORT_H__ | ||
34 | |||
35 | /* | ||
36 | * Common defines for code related to exporting XFS filesystems over NFS. | ||
37 | * | ||
38 | * The NFS fileid goes out on the wire as an array of | ||
39 | * 32bit unsigned ints in host order. There are 5 possible | ||
40 | * formats. | ||
41 | * | ||
42 | * (1) fileid_type=0x00 | ||
43 | * (no fileid data; handled by the generic code) | ||
44 | * | ||
45 | * (2) fileid_type=0x01 | ||
46 | * inode-num | ||
47 | * generation | ||
48 | * | ||
49 | * (3) fileid_type=0x02 | ||
50 | * inode-num | ||
51 | * generation | ||
52 | * parent-inode-num | ||
53 | * parent-generation | ||
54 | * | ||
55 | * (4) fileid_type=0x81 | ||
56 | * inode-num-lo32 | ||
57 | * inode-num-hi32 | ||
58 | * generation | ||
59 | * | ||
60 | * (5) fileid_type=0x82 | ||
61 | * inode-num-lo32 | ||
62 | * inode-num-hi32 | ||
63 | * generation | ||
64 | * parent-inode-num-lo32 | ||
65 | * parent-inode-num-hi32 | ||
66 | * parent-generation | ||
67 | * | ||
68 | * Note, the NFS filehandle also includes an fsid portion which | ||
69 | * may have an inode number in it. That number is hardcoded to | ||
70 | * 32bits and there is no way for XFS to intercept it. In | ||
71 | * practice this means when exporting an XFS filesytem with 64bit | ||
72 | * inodes you should either export the mountpoint (rather than | ||
73 | * a subdirectory) or use the "fsid" export option. | ||
74 | */ | ||
75 | |||
76 | /* This flag goes on the wire. Don't play with it. */ | ||
77 | #define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ | ||
78 | |||
79 | /* Calculate the length in u32 units of the fileid data */ | ||
80 | static inline int | ||
81 | xfs_fileid_length(int hasparent, int is64) | ||
82 | { | ||
83 | return hasparent ? (is64 ? 6 : 4) : (is64 ? 3 : 2); | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Decode encoded inode information (either for the inode itself | ||
88 | * or the parent) into an xfs_fid2_t structure. Advances and | ||
89 | * returns the new data pointer | ||
90 | */ | ||
91 | static inline __u32 * | ||
92 | xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64) | ||
93 | { | ||
94 | fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len); | ||
95 | fid->fid_pad = 0; | ||
96 | fid->fid_ino = *p++; | ||
97 | #if XFS_BIG_INUMS | ||
98 | if (is64) | ||
99 | fid->fid_ino |= (((__u64)(*p++)) << 32); | ||
100 | #endif | ||
101 | fid->fid_gen = *p++; | ||
102 | return p; | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Encode inode information (either for the inode itself or the | ||
107 | * parent) into a fileid buffer. Advances and returns the new | ||
108 | * data pointer. | ||
109 | */ | ||
110 | static inline __u32 * | ||
111 | xfs_fileid_encode_inode(__u32 *p, struct inode *inode, int is64) | ||
112 | { | ||
113 | *p++ = (__u32)inode->i_ino; | ||
114 | #if XFS_BIG_INUMS | ||
115 | if (is64) | ||
116 | *p++ = (__u32)(inode->i_ino >> 32); | ||
117 | #endif | ||
118 | *p++ = inode->i_generation; | ||
119 | return p; | ||
120 | } | ||
121 | |||
122 | #endif /* __XFS_EXPORT_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c new file mode 100644 index 000000000000..9f057a4a5b06 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -0,0 +1,573 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | #include "xfs_inum.h" | ||
35 | #include "xfs_log.h" | ||
36 | #include "xfs_sb.h" | ||
37 | #include "xfs_dir.h" | ||
38 | #include "xfs_dir2.h" | ||
39 | #include "xfs_trans.h" | ||
40 | #include "xfs_dmapi.h" | ||
41 | #include "xfs_mount.h" | ||
42 | #include "xfs_bmap_btree.h" | ||
43 | #include "xfs_alloc_btree.h" | ||
44 | #include "xfs_ialloc_btree.h" | ||
45 | #include "xfs_alloc.h" | ||
46 | #include "xfs_btree.h" | ||
47 | #include "xfs_attr_sf.h" | ||
48 | #include "xfs_dir_sf.h" | ||
49 | #include "xfs_dir2_sf.h" | ||
50 | #include "xfs_dinode.h" | ||
51 | #include "xfs_inode.h" | ||
52 | #include "xfs_error.h" | ||
53 | #include "xfs_rw.h" | ||
54 | #include "xfs_ioctl32.h" | ||
55 | |||
56 | #include <linux/dcache.h> | ||
57 | #include <linux/smp_lock.h> | ||
58 | |||
59 | static struct vm_operations_struct linvfs_file_vm_ops; | ||
60 | |||
61 | |||
62 | STATIC inline ssize_t | ||
63 | __linvfs_read( | ||
64 | struct kiocb *iocb, | ||
65 | char __user *buf, | ||
66 | int ioflags, | ||
67 | size_t count, | ||
68 | loff_t pos) | ||
69 | { | ||
70 | struct iovec iov = {buf, count}; | ||
71 | struct file *file = iocb->ki_filp; | ||
72 | vnode_t *vp = LINVFS_GET_VP(file->f_dentry->d_inode); | ||
73 | ssize_t rval; | ||
74 | |||
75 | BUG_ON(iocb->ki_pos != pos); | ||
76 | |||
77 | if (unlikely(file->f_flags & O_DIRECT)) | ||
78 | ioflags |= IO_ISDIRECT; | ||
79 | VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval); | ||
80 | return rval; | ||
81 | } | ||
82 | |||
83 | |||
84 | STATIC ssize_t | ||
85 | linvfs_aio_read( | ||
86 | struct kiocb *iocb, | ||
87 | char __user *buf, | ||
88 | size_t count, | ||
89 | loff_t pos) | ||
90 | { | ||
91 | return __linvfs_read(iocb, buf, IO_ISAIO, count, pos); | ||
92 | } | ||
93 | |||
94 | STATIC ssize_t | ||
95 | linvfs_aio_read_invis( | ||
96 | struct kiocb *iocb, | ||
97 | char __user *buf, | ||
98 | size_t count, | ||
99 | loff_t pos) | ||
100 | { | ||
101 | return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); | ||
102 | } | ||
103 | |||
104 | |||
105 | STATIC inline ssize_t | ||
106 | __linvfs_write( | ||
107 | struct kiocb *iocb, | ||
108 | const char __user *buf, | ||
109 | int ioflags, | ||
110 | size_t count, | ||
111 | loff_t pos) | ||
112 | { | ||
113 | struct iovec iov = {(void __user *)buf, count}; | ||
114 | struct file *file = iocb->ki_filp; | ||
115 | struct inode *inode = file->f_mapping->host; | ||
116 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
117 | ssize_t rval; | ||
118 | |||
119 | BUG_ON(iocb->ki_pos != pos); | ||
120 | if (unlikely(file->f_flags & O_DIRECT)) | ||
121 | ioflags |= IO_ISDIRECT; | ||
122 | |||
123 | VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval); | ||
124 | return rval; | ||
125 | } | ||
126 | |||
127 | |||
128 | STATIC ssize_t | ||
129 | linvfs_aio_write( | ||
130 | struct kiocb *iocb, | ||
131 | const char __user *buf, | ||
132 | size_t count, | ||
133 | loff_t pos) | ||
134 | { | ||
135 | return __linvfs_write(iocb, buf, IO_ISAIO, count, pos); | ||
136 | } | ||
137 | |||
138 | STATIC ssize_t | ||
139 | linvfs_aio_write_invis( | ||
140 | struct kiocb *iocb, | ||
141 | const char __user *buf, | ||
142 | size_t count, | ||
143 | loff_t pos) | ||
144 | { | ||
145 | return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); | ||
146 | } | ||
147 | |||
148 | |||
149 | STATIC inline ssize_t | ||
150 | __linvfs_readv( | ||
151 | struct file *file, | ||
152 | const struct iovec *iov, | ||
153 | int ioflags, | ||
154 | unsigned long nr_segs, | ||
155 | loff_t *ppos) | ||
156 | { | ||
157 | struct inode *inode = file->f_mapping->host; | ||
158 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
159 | struct kiocb kiocb; | ||
160 | ssize_t rval; | ||
161 | |||
162 | init_sync_kiocb(&kiocb, file); | ||
163 | kiocb.ki_pos = *ppos; | ||
164 | |||
165 | if (unlikely(file->f_flags & O_DIRECT)) | ||
166 | ioflags |= IO_ISDIRECT; | ||
167 | VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval); | ||
168 | |||
169 | *ppos = kiocb.ki_pos; | ||
170 | return rval; | ||
171 | } | ||
172 | |||
173 | STATIC ssize_t | ||
174 | linvfs_readv( | ||
175 | struct file *file, | ||
176 | const struct iovec *iov, | ||
177 | unsigned long nr_segs, | ||
178 | loff_t *ppos) | ||
179 | { | ||
180 | return __linvfs_readv(file, iov, 0, nr_segs, ppos); | ||
181 | } | ||
182 | |||
183 | STATIC ssize_t | ||
184 | linvfs_readv_invis( | ||
185 | struct file *file, | ||
186 | const struct iovec *iov, | ||
187 | unsigned long nr_segs, | ||
188 | loff_t *ppos) | ||
189 | { | ||
190 | return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos); | ||
191 | } | ||
192 | |||
193 | |||
194 | STATIC inline ssize_t | ||
195 | __linvfs_writev( | ||
196 | struct file *file, | ||
197 | const struct iovec *iov, | ||
198 | int ioflags, | ||
199 | unsigned long nr_segs, | ||
200 | loff_t *ppos) | ||
201 | { | ||
202 | struct inode *inode = file->f_mapping->host; | ||
203 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
204 | struct kiocb kiocb; | ||
205 | ssize_t rval; | ||
206 | |||
207 | init_sync_kiocb(&kiocb, file); | ||
208 | kiocb.ki_pos = *ppos; | ||
209 | if (unlikely(file->f_flags & O_DIRECT)) | ||
210 | ioflags |= IO_ISDIRECT; | ||
211 | |||
212 | VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval); | ||
213 | |||
214 | *ppos = kiocb.ki_pos; | ||
215 | return rval; | ||
216 | } | ||
217 | |||
218 | |||
219 | STATIC ssize_t | ||
220 | linvfs_writev( | ||
221 | struct file *file, | ||
222 | const struct iovec *iov, | ||
223 | unsigned long nr_segs, | ||
224 | loff_t *ppos) | ||
225 | { | ||
226 | return __linvfs_writev(file, iov, 0, nr_segs, ppos); | ||
227 | } | ||
228 | |||
229 | STATIC ssize_t | ||
230 | linvfs_writev_invis( | ||
231 | struct file *file, | ||
232 | const struct iovec *iov, | ||
233 | unsigned long nr_segs, | ||
234 | loff_t *ppos) | ||
235 | { | ||
236 | return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos); | ||
237 | } | ||
238 | |||
239 | STATIC ssize_t | ||
240 | linvfs_sendfile( | ||
241 | struct file *filp, | ||
242 | loff_t *ppos, | ||
243 | size_t count, | ||
244 | read_actor_t actor, | ||
245 | void *target) | ||
246 | { | ||
247 | vnode_t *vp = LINVFS_GET_VP(filp->f_dentry->d_inode); | ||
248 | ssize_t rval; | ||
249 | |||
250 | VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval); | ||
251 | return rval; | ||
252 | } | ||
253 | |||
254 | |||
255 | STATIC int | ||
256 | linvfs_open( | ||
257 | struct inode *inode, | ||
258 | struct file *filp) | ||
259 | { | ||
260 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
261 | int error; | ||
262 | |||
263 | if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) | ||
264 | return -EFBIG; | ||
265 | |||
266 | ASSERT(vp); | ||
267 | VOP_OPEN(vp, NULL, error); | ||
268 | return -error; | ||
269 | } | ||
270 | |||
271 | |||
272 | STATIC int | ||
273 | linvfs_release( | ||
274 | struct inode *inode, | ||
275 | struct file *filp) | ||
276 | { | ||
277 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
278 | int error = 0; | ||
279 | |||
280 | if (vp) | ||
281 | VOP_RELEASE(vp, error); | ||
282 | return -error; | ||
283 | } | ||
284 | |||
285 | |||
286 | STATIC int | ||
287 | linvfs_fsync( | ||
288 | struct file *filp, | ||
289 | struct dentry *dentry, | ||
290 | int datasync) | ||
291 | { | ||
292 | struct inode *inode = dentry->d_inode; | ||
293 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
294 | int error; | ||
295 | int flags = FSYNC_WAIT; | ||
296 | |||
297 | if (datasync) | ||
298 | flags |= FSYNC_DATA; | ||
299 | |||
300 | ASSERT(vp); | ||
301 | VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error); | ||
302 | return -error; | ||
303 | } | ||
304 | |||
305 | /* | ||
306 | * linvfs_readdir maps to VOP_READDIR(). | ||
307 | * We need to build a uio, cred, ... | ||
308 | */ | ||
309 | |||
310 | #define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) | ||
311 | |||
312 | STATIC int | ||
313 | linvfs_readdir( | ||
314 | struct file *filp, | ||
315 | void *dirent, | ||
316 | filldir_t filldir) | ||
317 | { | ||
318 | int error = 0; | ||
319 | vnode_t *vp; | ||
320 | uio_t uio; | ||
321 | iovec_t iov; | ||
322 | int eof = 0; | ||
323 | caddr_t read_buf; | ||
324 | int namelen, size = 0; | ||
325 | size_t rlen = PAGE_CACHE_SIZE; | ||
326 | xfs_off_t start_offset, curr_offset; | ||
327 | xfs_dirent_t *dbp = NULL; | ||
328 | |||
329 | vp = LINVFS_GET_VP(filp->f_dentry->d_inode); | ||
330 | ASSERT(vp); | ||
331 | |||
332 | /* Try fairly hard to get memory */ | ||
333 | do { | ||
334 | if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL))) | ||
335 | break; | ||
336 | rlen >>= 1; | ||
337 | } while (rlen >= 1024); | ||
338 | |||
339 | if (read_buf == NULL) | ||
340 | return -ENOMEM; | ||
341 | |||
342 | uio.uio_iov = &iov; | ||
343 | uio.uio_segflg = UIO_SYSSPACE; | ||
344 | curr_offset = filp->f_pos; | ||
345 | if (filp->f_pos != 0x7fffffff) | ||
346 | uio.uio_offset = filp->f_pos; | ||
347 | else | ||
348 | uio.uio_offset = 0xffffffff; | ||
349 | |||
350 | while (!eof) { | ||
351 | uio.uio_resid = iov.iov_len = rlen; | ||
352 | iov.iov_base = read_buf; | ||
353 | uio.uio_iovcnt = 1; | ||
354 | |||
355 | start_offset = uio.uio_offset; | ||
356 | |||
357 | VOP_READDIR(vp, &uio, NULL, &eof, error); | ||
358 | if ((uio.uio_offset == start_offset) || error) { | ||
359 | size = 0; | ||
360 | break; | ||
361 | } | ||
362 | |||
363 | size = rlen - uio.uio_resid; | ||
364 | dbp = (xfs_dirent_t *)read_buf; | ||
365 | while (size > 0) { | ||
366 | namelen = strlen(dbp->d_name); | ||
367 | |||
368 | if (filldir(dirent, dbp->d_name, namelen, | ||
369 | (loff_t) curr_offset & 0x7fffffff, | ||
370 | (ino_t) dbp->d_ino, | ||
371 | DT_UNKNOWN)) { | ||
372 | goto done; | ||
373 | } | ||
374 | size -= dbp->d_reclen; | ||
375 | curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */; | ||
376 | dbp = nextdp(dbp); | ||
377 | } | ||
378 | } | ||
379 | done: | ||
380 | if (!error) { | ||
381 | if (size == 0) | ||
382 | filp->f_pos = uio.uio_offset & 0x7fffffff; | ||
383 | else if (dbp) | ||
384 | filp->f_pos = curr_offset; | ||
385 | } | ||
386 | |||
387 | kfree(read_buf); | ||
388 | return -error; | ||
389 | } | ||
390 | |||
391 | |||
392 | STATIC int | ||
393 | linvfs_file_mmap( | ||
394 | struct file *filp, | ||
395 | struct vm_area_struct *vma) | ||
396 | { | ||
397 | struct inode *ip = filp->f_dentry->d_inode; | ||
398 | vnode_t *vp = LINVFS_GET_VP(ip); | ||
399 | vattr_t va = { .va_mask = XFS_AT_UPDATIME }; | ||
400 | int error; | ||
401 | |||
402 | if (vp->v_vfsp->vfs_flag & VFS_DMI) { | ||
403 | xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); | ||
404 | |||
405 | error = -XFS_SEND_MMAP(mp, vma, 0); | ||
406 | if (error) | ||
407 | return error; | ||
408 | } | ||
409 | |||
410 | vma->vm_ops = &linvfs_file_vm_ops; | ||
411 | |||
412 | VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); | ||
413 | if (!error) | ||
414 | vn_revalidate(vp); /* update Linux inode flags */ | ||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | |||
419 | STATIC long | ||
420 | linvfs_ioctl( | ||
421 | struct file *filp, | ||
422 | unsigned int cmd, | ||
423 | unsigned long arg) | ||
424 | { | ||
425 | int error; | ||
426 | struct inode *inode = filp->f_dentry->d_inode; | ||
427 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
428 | |||
429 | VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error); | ||
430 | VMODIFY(vp); | ||
431 | |||
432 | /* NOTE: some of the ioctl's return positive #'s as a | ||
433 | * byte count indicating success, such as | ||
434 | * readlink_by_handle. So we don't "sign flip" | ||
435 | * like most other routines. This means true | ||
436 | * errors need to be returned as a negative value. | ||
437 | */ | ||
438 | return error; | ||
439 | } | ||
440 | |||
441 | STATIC long | ||
442 | linvfs_ioctl_invis( | ||
443 | struct file *filp, | ||
444 | unsigned int cmd, | ||
445 | unsigned long arg) | ||
446 | { | ||
447 | int error; | ||
448 | struct inode *inode = filp->f_dentry->d_inode; | ||
449 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
450 | |||
451 | ASSERT(vp); | ||
452 | VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error); | ||
453 | VMODIFY(vp); | ||
454 | |||
455 | /* NOTE: some of the ioctl's return positive #'s as a | ||
456 | * byte count indicating success, such as | ||
457 | * readlink_by_handle. So we don't "sign flip" | ||
458 | * like most other routines. This means true | ||
459 | * errors need to be returned as a negative value. | ||
460 | */ | ||
461 | return error; | ||
462 | } | ||
463 | |||
464 | #ifdef HAVE_VMOP_MPROTECT | ||
465 | STATIC int | ||
466 | linvfs_mprotect( | ||
467 | struct vm_area_struct *vma, | ||
468 | unsigned int newflags) | ||
469 | { | ||
470 | vnode_t *vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode); | ||
471 | int error = 0; | ||
472 | |||
473 | if (vp->v_vfsp->vfs_flag & VFS_DMI) { | ||
474 | if ((vma->vm_flags & VM_MAYSHARE) && | ||
475 | (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) { | ||
476 | xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); | ||
477 | |||
478 | error = XFS_SEND_MMAP(mp, vma, VM_WRITE); | ||
479 | } | ||
480 | } | ||
481 | return error; | ||
482 | } | ||
483 | #endif /* HAVE_VMOP_MPROTECT */ | ||
484 | |||
485 | #ifdef HAVE_FOP_OPEN_EXEC | ||
486 | /* If the user is attempting to execute a file that is offline then | ||
487 | * we have to trigger a DMAPI READ event before the file is marked as busy | ||
488 | * otherwise the invisible I/O will not be able to write to the file to bring | ||
489 | * it back online. | ||
490 | */ | ||
491 | STATIC int | ||
492 | linvfs_open_exec( | ||
493 | struct inode *inode) | ||
494 | { | ||
495 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
496 | xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); | ||
497 | int error = 0; | ||
498 | bhv_desc_t *bdp; | ||
499 | xfs_inode_t *ip; | ||
500 | |||
501 | if (vp->v_vfsp->vfs_flag & VFS_DMI) { | ||
502 | bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); | ||
503 | if (!bdp) { | ||
504 | error = -EINVAL; | ||
505 | goto open_exec_out; | ||
506 | } | ||
507 | ip = XFS_BHVTOI(bdp); | ||
508 | if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) { | ||
509 | error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, | ||
510 | 0, 0, 0, NULL); | ||
511 | } | ||
512 | } | ||
513 | open_exec_out: | ||
514 | return error; | ||
515 | } | ||
516 | #endif /* HAVE_FOP_OPEN_EXEC */ | ||
517 | |||
518 | struct file_operations linvfs_file_operations = { | ||
519 | .llseek = generic_file_llseek, | ||
520 | .read = do_sync_read, | ||
521 | .write = do_sync_write, | ||
522 | .readv = linvfs_readv, | ||
523 | .writev = linvfs_writev, | ||
524 | .aio_read = linvfs_aio_read, | ||
525 | .aio_write = linvfs_aio_write, | ||
526 | .sendfile = linvfs_sendfile, | ||
527 | .unlocked_ioctl = linvfs_ioctl, | ||
528 | #ifdef CONFIG_COMPAT | ||
529 | .compat_ioctl = xfs_compat_ioctl, | ||
530 | #endif | ||
531 | .mmap = linvfs_file_mmap, | ||
532 | .open = linvfs_open, | ||
533 | .release = linvfs_release, | ||
534 | .fsync = linvfs_fsync, | ||
535 | #ifdef HAVE_FOP_OPEN_EXEC | ||
536 | .open_exec = linvfs_open_exec, | ||
537 | #endif | ||
538 | }; | ||
539 | |||
540 | struct file_operations linvfs_invis_file_operations = { | ||
541 | .llseek = generic_file_llseek, | ||
542 | .read = do_sync_read, | ||
543 | .write = do_sync_write, | ||
544 | .readv = linvfs_readv_invis, | ||
545 | .writev = linvfs_writev_invis, | ||
546 | .aio_read = linvfs_aio_read_invis, | ||
547 | .aio_write = linvfs_aio_write_invis, | ||
548 | .sendfile = linvfs_sendfile, | ||
549 | .unlocked_ioctl = linvfs_ioctl_invis, | ||
550 | #ifdef CONFIG_COMPAT | ||
551 | .compat_ioctl = xfs_compat_invis_ioctl, | ||
552 | #endif | ||
553 | .mmap = linvfs_file_mmap, | ||
554 | .open = linvfs_open, | ||
555 | .release = linvfs_release, | ||
556 | .fsync = linvfs_fsync, | ||
557 | }; | ||
558 | |||
559 | |||
560 | struct file_operations linvfs_dir_operations = { | ||
561 | .read = generic_read_dir, | ||
562 | .readdir = linvfs_readdir, | ||
563 | .unlocked_ioctl = linvfs_ioctl, | ||
564 | .fsync = linvfs_fsync, | ||
565 | }; | ||
566 | |||
567 | static struct vm_operations_struct linvfs_file_vm_ops = { | ||
568 | .nopage = filemap_nopage, | ||
569 | .populate = filemap_populate, | ||
570 | #ifdef HAVE_VMOP_MPROTECT | ||
571 | .mprotect = linvfs_mprotect, | ||
572 | #endif | ||
573 | }; | ||
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c new file mode 100644 index 000000000000..05ebd30ec96f --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | |||
35 | /* | ||
36 | * Stub for no-op vnode operations that return error status. | ||
37 | */ | ||
38 | int | ||
39 | fs_noerr(void) | ||
40 | { | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | /* | ||
45 | * Operation unsupported under this file system. | ||
46 | */ | ||
47 | int | ||
48 | fs_nosys(void) | ||
49 | { | ||
50 | return ENOSYS; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Stub for inactive, strategy, and read/write lock/unlock. Does nothing. | ||
55 | */ | ||
56 | /* ARGSUSED */ | ||
57 | void | ||
58 | fs_noval(void) | ||
59 | { | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * vnode pcache layer for vnode_tosspages. | ||
64 | * 'last' parameter unused but left in for IRIX compatibility | ||
65 | */ | ||
66 | void | ||
67 | fs_tosspages( | ||
68 | bhv_desc_t *bdp, | ||
69 | xfs_off_t first, | ||
70 | xfs_off_t last, | ||
71 | int fiopt) | ||
72 | { | ||
73 | vnode_t *vp = BHV_TO_VNODE(bdp); | ||
74 | struct inode *ip = LINVFS_GET_IP(vp); | ||
75 | |||
76 | if (VN_CACHED(vp)) | ||
77 | truncate_inode_pages(ip->i_mapping, first); | ||
78 | } | ||
79 | |||
80 | |||
81 | /* | ||
82 | * vnode pcache layer for vnode_flushinval_pages. | ||
83 | * 'last' parameter unused but left in for IRIX compatibility | ||
84 | */ | ||
85 | void | ||
86 | fs_flushinval_pages( | ||
87 | bhv_desc_t *bdp, | ||
88 | xfs_off_t first, | ||
89 | xfs_off_t last, | ||
90 | int fiopt) | ||
91 | { | ||
92 | vnode_t *vp = BHV_TO_VNODE(bdp); | ||
93 | struct inode *ip = LINVFS_GET_IP(vp); | ||
94 | |||
95 | if (VN_CACHED(vp)) { | ||
96 | filemap_fdatawrite(ip->i_mapping); | ||
97 | filemap_fdatawait(ip->i_mapping); | ||
98 | |||
99 | truncate_inode_pages(ip->i_mapping, first); | ||
100 | } | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * vnode pcache layer for vnode_flush_pages. | ||
105 | * 'last' parameter unused but left in for IRIX compatibility | ||
106 | */ | ||
107 | int | ||
108 | fs_flush_pages( | ||
109 | bhv_desc_t *bdp, | ||
110 | xfs_off_t first, | ||
111 | xfs_off_t last, | ||
112 | uint64_t flags, | ||
113 | int fiopt) | ||
114 | { | ||
115 | vnode_t *vp = BHV_TO_VNODE(bdp); | ||
116 | struct inode *ip = LINVFS_GET_IP(vp); | ||
117 | |||
118 | if (VN_CACHED(vp)) { | ||
119 | filemap_fdatawrite(ip->i_mapping); | ||
120 | filemap_fdatawait(ip->i_mapping); | ||
121 | } | ||
122 | |||
123 | return 0; | ||
124 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h new file mode 100644 index 000000000000..2db9ddbd4567 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_fs_subr.h | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000, 2002 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUBR_H__ | ||
33 | #define __XFS_SUBR_H__ | ||
34 | |||
35 | /* | ||
36 | * Utilities shared among file system implementations. | ||
37 | */ | ||
38 | |||
39 | struct cred; | ||
40 | |||
41 | extern int fs_noerr(void); | ||
42 | extern int fs_nosys(void); | ||
43 | extern void fs_noval(void); | ||
44 | extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); | ||
45 | extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); | ||
46 | extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int); | ||
47 | |||
48 | #endif /* __XFS_FS_SUBR_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c new file mode 100644 index 000000000000..a6da5b4fd240 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_globals.c | |||
@@ -0,0 +1,74 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * This file contains globals needed by XFS that were normally defined | ||
35 | * somewhere else in IRIX. | ||
36 | */ | ||
37 | |||
38 | #include "xfs.h" | ||
39 | #include "xfs_cred.h" | ||
40 | #include "xfs_sysctl.h" | ||
41 | |||
42 | /* | ||
43 | * System memory size - used to scale certain data structures in XFS. | ||
44 | */ | ||
45 | unsigned long xfs_physmem; | ||
46 | |||
47 | /* | ||
48 | * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, | ||
49 | * other XFS code uses these values. Times are measured in centisecs (i.e. | ||
50 | * 100ths of a second). | ||
51 | */ | ||
52 | xfs_param_t xfs_params = { | ||
53 | /* MIN DFLT MAX */ | ||
54 | .restrict_chown = { 0, 1, 1 }, | ||
55 | .sgid_inherit = { 0, 0, 1 }, | ||
56 | .symlink_mode = { 0, 0, 1 }, | ||
57 | .panic_mask = { 0, 0, 127 }, | ||
58 | .error_level = { 0, 3, 11 }, | ||
59 | .syncd_timer = { 1*100, 30*100, 7200*100}, | ||
60 | .stats_clear = { 0, 0, 1 }, | ||
61 | .inherit_sync = { 0, 1, 1 }, | ||
62 | .inherit_nodump = { 0, 1, 1 }, | ||
63 | .inherit_noatim = { 0, 1, 1 }, | ||
64 | .xfs_buf_timer = { 100/2, 1*100, 30*100 }, | ||
65 | .xfs_buf_age = { 1*100, 15*100, 7200*100}, | ||
66 | .inherit_nosym = { 0, 0, 1 }, | ||
67 | .rotorstep = { 1, 1, 255 }, | ||
68 | }; | ||
69 | |||
70 | /* | ||
71 | * Global system credential structure. | ||
72 | */ | ||
73 | cred_t sys_cred_val, *sys_cred = &sys_cred_val; | ||
74 | |||
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h new file mode 100644 index 000000000000..e81e2f38a853 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_globals.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_GLOBALS_H__ | ||
33 | #define __XFS_GLOBALS_H__ | ||
34 | |||
35 | /* | ||
36 | * This file declares globals needed by XFS that were normally defined | ||
37 | * somewhere else in IRIX. | ||
38 | */ | ||
39 | |||
40 | extern uint64_t xfs_panic_mask; /* set to cause more panics */ | ||
41 | extern unsigned long xfs_physmem; | ||
42 | extern struct cred *sys_cred; | ||
43 | |||
44 | #endif /* __XFS_GLOBALS_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c new file mode 100644 index 000000000000..69809eef8a54 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -0,0 +1,1336 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | |||
35 | #include "xfs_fs.h" | ||
36 | #include "xfs_inum.h" | ||
37 | #include "xfs_log.h" | ||
38 | #include "xfs_trans.h" | ||
39 | #include "xfs_sb.h" | ||
40 | #include "xfs_dir.h" | ||
41 | #include "xfs_dir2.h" | ||
42 | #include "xfs_alloc.h" | ||
43 | #include "xfs_dmapi.h" | ||
44 | #include "xfs_mount.h" | ||
45 | #include "xfs_alloc_btree.h" | ||
46 | #include "xfs_bmap_btree.h" | ||
47 | #include "xfs_ialloc_btree.h" | ||
48 | #include "xfs_btree.h" | ||
49 | #include "xfs_ialloc.h" | ||
50 | #include "xfs_attr_sf.h" | ||
51 | #include "xfs_dir_sf.h" | ||
52 | #include "xfs_dir2_sf.h" | ||
53 | #include "xfs_dinode.h" | ||
54 | #include "xfs_inode.h" | ||
55 | #include "xfs_bmap.h" | ||
56 | #include "xfs_bit.h" | ||
57 | #include "xfs_rtalloc.h" | ||
58 | #include "xfs_error.h" | ||
59 | #include "xfs_itable.h" | ||
60 | #include "xfs_rw.h" | ||
61 | #include "xfs_acl.h" | ||
62 | #include "xfs_cap.h" | ||
63 | #include "xfs_mac.h" | ||
64 | #include "xfs_attr.h" | ||
65 | #include "xfs_buf_item.h" | ||
66 | #include "xfs_utils.h" | ||
67 | #include "xfs_dfrag.h" | ||
68 | #include "xfs_fsops.h" | ||
69 | |||
70 | #include <linux/dcache.h> | ||
71 | #include <linux/mount.h> | ||
72 | #include <linux/namei.h> | ||
73 | #include <linux/pagemap.h> | ||
74 | |||
75 | /* | ||
76 | * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to | ||
77 | * a file or fs handle. | ||
78 | * | ||
79 | * XFS_IOC_PATH_TO_FSHANDLE | ||
80 | * returns fs handle for a mount point or path within that mount point | ||
81 | * XFS_IOC_FD_TO_HANDLE | ||
82 | * returns full handle for a FD opened in user space | ||
83 | * XFS_IOC_PATH_TO_HANDLE | ||
84 | * returns full handle for a path | ||
85 | */ | ||
86 | STATIC int | ||
87 | xfs_find_handle( | ||
88 | unsigned int cmd, | ||
89 | void __user *arg) | ||
90 | { | ||
91 | int hsize; | ||
92 | xfs_handle_t handle; | ||
93 | xfs_fsop_handlereq_t hreq; | ||
94 | struct inode *inode; | ||
95 | struct vnode *vp; | ||
96 | |||
97 | if (copy_from_user(&hreq, arg, sizeof(hreq))) | ||
98 | return -XFS_ERROR(EFAULT); | ||
99 | |||
100 | memset((char *)&handle, 0, sizeof(handle)); | ||
101 | |||
102 | switch (cmd) { | ||
103 | case XFS_IOC_PATH_TO_FSHANDLE: | ||
104 | case XFS_IOC_PATH_TO_HANDLE: { | ||
105 | struct nameidata nd; | ||
106 | int error; | ||
107 | |||
108 | error = user_path_walk_link((const char __user *)hreq.path, &nd); | ||
109 | if (error) | ||
110 | return error; | ||
111 | |||
112 | ASSERT(nd.dentry); | ||
113 | ASSERT(nd.dentry->d_inode); | ||
114 | inode = igrab(nd.dentry->d_inode); | ||
115 | path_release(&nd); | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | case XFS_IOC_FD_TO_HANDLE: { | ||
120 | struct file *file; | ||
121 | |||
122 | file = fget(hreq.fd); | ||
123 | if (!file) | ||
124 | return -EBADF; | ||
125 | |||
126 | ASSERT(file->f_dentry); | ||
127 | ASSERT(file->f_dentry->d_inode); | ||
128 | inode = igrab(file->f_dentry->d_inode); | ||
129 | fput(file); | ||
130 | break; | ||
131 | } | ||
132 | |||
133 | default: | ||
134 | ASSERT(0); | ||
135 | return -XFS_ERROR(EINVAL); | ||
136 | } | ||
137 | |||
138 | if (inode->i_sb->s_magic != XFS_SB_MAGIC) { | ||
139 | /* we're not in XFS anymore, Toto */ | ||
140 | iput(inode); | ||
141 | return -XFS_ERROR(EINVAL); | ||
142 | } | ||
143 | |||
144 | /* we need the vnode */ | ||
145 | vp = LINVFS_GET_VP(inode); | ||
146 | if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { | ||
147 | iput(inode); | ||
148 | return -XFS_ERROR(EBADF); | ||
149 | } | ||
150 | |||
151 | /* now we can grab the fsid */ | ||
152 | memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); | ||
153 | hsize = sizeof(xfs_fsid_t); | ||
154 | |||
155 | if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { | ||
156 | xfs_inode_t *ip; | ||
157 | bhv_desc_t *bhv; | ||
158 | int lock_mode; | ||
159 | |||
160 | /* need to get access to the xfs_inode to read the generation */ | ||
161 | bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); | ||
162 | ASSERT(bhv); | ||
163 | ip = XFS_BHVTOI(bhv); | ||
164 | ASSERT(ip); | ||
165 | lock_mode = xfs_ilock_map_shared(ip); | ||
166 | |||
167 | /* fill in fid section of handle from inode */ | ||
168 | handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) - | ||
169 | sizeof(handle.ha_fid.xfs_fid_len); | ||
170 | handle.ha_fid.xfs_fid_pad = 0; | ||
171 | handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen; | ||
172 | handle.ha_fid.xfs_fid_ino = ip->i_ino; | ||
173 | |||
174 | xfs_iunlock_map_shared(ip, lock_mode); | ||
175 | |||
176 | hsize = XFS_HSIZE(handle); | ||
177 | } | ||
178 | |||
179 | /* now copy our handle into the user buffer & write out the size */ | ||
180 | if (copy_to_user(hreq.ohandle, &handle, hsize) || | ||
181 | copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { | ||
182 | iput(inode); | ||
183 | return -XFS_ERROR(EFAULT); | ||
184 | } | ||
185 | |||
186 | iput(inode); | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | |||
191 | /* | ||
192 | * Convert userspace handle data into vnode (and inode). | ||
193 | * We [ab]use the fact that all the fsop_handlereq ioctl calls | ||
194 | * have a data structure argument whose first component is always | ||
195 | * a xfs_fsop_handlereq_t, so we can cast to and from this type. | ||
196 | * This allows us to optimise the copy_from_user calls and gives | ||
197 | * a handy, shared routine. | ||
198 | * | ||
199 | * If no error, caller must always VN_RELE the returned vp. | ||
200 | */ | ||
201 | STATIC int | ||
202 | xfs_vget_fsop_handlereq( | ||
203 | xfs_mount_t *mp, | ||
204 | struct inode *parinode, /* parent inode pointer */ | ||
205 | xfs_fsop_handlereq_t *hreq, | ||
206 | vnode_t **vp, | ||
207 | struct inode **inode) | ||
208 | { | ||
209 | void __user *hanp; | ||
210 | size_t hlen; | ||
211 | xfs_fid_t *xfid; | ||
212 | xfs_handle_t *handlep; | ||
213 | xfs_handle_t handle; | ||
214 | xfs_inode_t *ip; | ||
215 | struct inode *inodep; | ||
216 | vnode_t *vpp; | ||
217 | xfs_ino_t ino; | ||
218 | __u32 igen; | ||
219 | int error; | ||
220 | |||
221 | /* | ||
222 | * Only allow handle opens under a directory. | ||
223 | */ | ||
224 | if (!S_ISDIR(parinode->i_mode)) | ||
225 | return XFS_ERROR(ENOTDIR); | ||
226 | |||
227 | hanp = hreq->ihandle; | ||
228 | hlen = hreq->ihandlen; | ||
229 | handlep = &handle; | ||
230 | |||
231 | if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) | ||
232 | return XFS_ERROR(EINVAL); | ||
233 | if (copy_from_user(handlep, hanp, hlen)) | ||
234 | return XFS_ERROR(EFAULT); | ||
235 | if (hlen < sizeof(*handlep)) | ||
236 | memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); | ||
237 | if (hlen > sizeof(handlep->ha_fsid)) { | ||
238 | if (handlep->ha_fid.xfs_fid_len != | ||
239 | (hlen - sizeof(handlep->ha_fsid) | ||
240 | - sizeof(handlep->ha_fid.xfs_fid_len)) | ||
241 | || handlep->ha_fid.xfs_fid_pad) | ||
242 | return XFS_ERROR(EINVAL); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Crack the handle, obtain the inode # & generation # | ||
247 | */ | ||
248 | xfid = (struct xfs_fid *)&handlep->ha_fid; | ||
249 | if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) { | ||
250 | ino = xfid->xfs_fid_ino; | ||
251 | igen = xfid->xfs_fid_gen; | ||
252 | } else { | ||
253 | return XFS_ERROR(EINVAL); | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Get the XFS inode, building a vnode to go with it. | ||
258 | */ | ||
259 | error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); | ||
260 | if (error) | ||
261 | return error; | ||
262 | if (ip == NULL) | ||
263 | return XFS_ERROR(EIO); | ||
264 | if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { | ||
265 | xfs_iput_new(ip, XFS_ILOCK_SHARED); | ||
266 | return XFS_ERROR(ENOENT); | ||
267 | } | ||
268 | |||
269 | vpp = XFS_ITOV(ip); | ||
270 | inodep = LINVFS_GET_IP(vpp); | ||
271 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
272 | |||
273 | *vp = vpp; | ||
274 | *inode = inodep; | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | STATIC int | ||
279 | xfs_open_by_handle( | ||
280 | xfs_mount_t *mp, | ||
281 | void __user *arg, | ||
282 | struct file *parfilp, | ||
283 | struct inode *parinode) | ||
284 | { | ||
285 | int error; | ||
286 | int new_fd; | ||
287 | int permflag; | ||
288 | struct file *filp; | ||
289 | struct inode *inode; | ||
290 | struct dentry *dentry; | ||
291 | vnode_t *vp; | ||
292 | xfs_fsop_handlereq_t hreq; | ||
293 | |||
294 | if (!capable(CAP_SYS_ADMIN)) | ||
295 | return -XFS_ERROR(EPERM); | ||
296 | if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) | ||
297 | return -XFS_ERROR(EFAULT); | ||
298 | |||
299 | error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode); | ||
300 | if (error) | ||
301 | return -error; | ||
302 | |||
303 | /* Restrict xfs_open_by_handle to directories & regular files. */ | ||
304 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { | ||
305 | iput(inode); | ||
306 | return -XFS_ERROR(EINVAL); | ||
307 | } | ||
308 | |||
309 | #if BITS_PER_LONG != 32 | ||
310 | hreq.oflags |= O_LARGEFILE; | ||
311 | #endif | ||
312 | /* Put open permission in namei format. */ | ||
313 | permflag = hreq.oflags; | ||
314 | if ((permflag+1) & O_ACCMODE) | ||
315 | permflag++; | ||
316 | if (permflag & O_TRUNC) | ||
317 | permflag |= 2; | ||
318 | |||
319 | if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && | ||
320 | (permflag & FMODE_WRITE) && IS_APPEND(inode)) { | ||
321 | iput(inode); | ||
322 | return -XFS_ERROR(EPERM); | ||
323 | } | ||
324 | |||
325 | if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { | ||
326 | iput(inode); | ||
327 | return -XFS_ERROR(EACCES); | ||
328 | } | ||
329 | |||
330 | /* Can't write directories. */ | ||
331 | if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { | ||
332 | iput(inode); | ||
333 | return -XFS_ERROR(EISDIR); | ||
334 | } | ||
335 | |||
336 | if ((new_fd = get_unused_fd()) < 0) { | ||
337 | iput(inode); | ||
338 | return new_fd; | ||
339 | } | ||
340 | |||
341 | dentry = d_alloc_anon(inode); | ||
342 | if (dentry == NULL) { | ||
343 | iput(inode); | ||
344 | put_unused_fd(new_fd); | ||
345 | return -XFS_ERROR(ENOMEM); | ||
346 | } | ||
347 | |||
348 | /* Ensure umount returns EBUSY on umounts while this file is open. */ | ||
349 | mntget(parfilp->f_vfsmnt); | ||
350 | |||
351 | /* Create file pointer. */ | ||
352 | filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags); | ||
353 | if (IS_ERR(filp)) { | ||
354 | put_unused_fd(new_fd); | ||
355 | return -XFS_ERROR(-PTR_ERR(filp)); | ||
356 | } | ||
357 | if (inode->i_mode & S_IFREG) | ||
358 | filp->f_op = &linvfs_invis_file_operations; | ||
359 | |||
360 | fd_install(new_fd, filp); | ||
361 | return new_fd; | ||
362 | } | ||
363 | |||
364 | STATIC int | ||
365 | xfs_readlink_by_handle( | ||
366 | xfs_mount_t *mp, | ||
367 | void __user *arg, | ||
368 | struct file *parfilp, | ||
369 | struct inode *parinode) | ||
370 | { | ||
371 | int error; | ||
372 | struct iovec aiov; | ||
373 | struct uio auio; | ||
374 | struct inode *inode; | ||
375 | xfs_fsop_handlereq_t hreq; | ||
376 | vnode_t *vp; | ||
377 | __u32 olen; | ||
378 | |||
379 | if (!capable(CAP_SYS_ADMIN)) | ||
380 | return -XFS_ERROR(EPERM); | ||
381 | if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) | ||
382 | return -XFS_ERROR(EFAULT); | ||
383 | |||
384 | error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode); | ||
385 | if (error) | ||
386 | return -error; | ||
387 | |||
388 | /* Restrict this handle operation to symlinks only. */ | ||
389 | if (vp->v_type != VLNK) { | ||
390 | VN_RELE(vp); | ||
391 | return -XFS_ERROR(EINVAL); | ||
392 | } | ||
393 | |||
394 | if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) { | ||
395 | VN_RELE(vp); | ||
396 | return -XFS_ERROR(EFAULT); | ||
397 | } | ||
398 | aiov.iov_len = olen; | ||
399 | aiov.iov_base = hreq.ohandle; | ||
400 | |||
401 | auio.uio_iov = &aiov; | ||
402 | auio.uio_iovcnt = 1; | ||
403 | auio.uio_offset = 0; | ||
404 | auio.uio_segflg = UIO_USERSPACE; | ||
405 | auio.uio_resid = olen; | ||
406 | |||
407 | VOP_READLINK(vp, &auio, IO_INVIS, NULL, error); | ||
408 | |||
409 | VN_RELE(vp); | ||
410 | return (olen - auio.uio_resid); | ||
411 | } | ||
412 | |||
413 | STATIC int | ||
414 | xfs_fssetdm_by_handle( | ||
415 | xfs_mount_t *mp, | ||
416 | void __user *arg, | ||
417 | struct file *parfilp, | ||
418 | struct inode *parinode) | ||
419 | { | ||
420 | int error; | ||
421 | struct fsdmidata fsd; | ||
422 | xfs_fsop_setdm_handlereq_t dmhreq; | ||
423 | struct inode *inode; | ||
424 | bhv_desc_t *bdp; | ||
425 | vnode_t *vp; | ||
426 | |||
427 | if (!capable(CAP_MKNOD)) | ||
428 | return -XFS_ERROR(EPERM); | ||
429 | if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) | ||
430 | return -XFS_ERROR(EFAULT); | ||
431 | |||
432 | error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode); | ||
433 | if (error) | ||
434 | return -error; | ||
435 | |||
436 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { | ||
437 | VN_RELE(vp); | ||
438 | return -XFS_ERROR(EPERM); | ||
439 | } | ||
440 | |||
441 | if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { | ||
442 | VN_RELE(vp); | ||
443 | return -XFS_ERROR(EFAULT); | ||
444 | } | ||
445 | |||
446 | bdp = bhv_base_unlocked(VN_BHV_HEAD(vp)); | ||
447 | error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL); | ||
448 | |||
449 | VN_RELE(vp); | ||
450 | if (error) | ||
451 | return -error; | ||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | STATIC int | ||
456 | xfs_attrlist_by_handle( | ||
457 | xfs_mount_t *mp, | ||
458 | void __user *arg, | ||
459 | struct file *parfilp, | ||
460 | struct inode *parinode) | ||
461 | { | ||
462 | int error; | ||
463 | attrlist_cursor_kern_t *cursor; | ||
464 | xfs_fsop_attrlist_handlereq_t al_hreq; | ||
465 | struct inode *inode; | ||
466 | vnode_t *vp; | ||
467 | char *kbuf; | ||
468 | |||
469 | if (!capable(CAP_SYS_ADMIN)) | ||
470 | return -XFS_ERROR(EPERM); | ||
471 | if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) | ||
472 | return -XFS_ERROR(EFAULT); | ||
473 | if (al_hreq.buflen > XATTR_LIST_MAX) | ||
474 | return -XFS_ERROR(EINVAL); | ||
475 | |||
476 | error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, | ||
477 | &vp, &inode); | ||
478 | if (error) | ||
479 | goto out; | ||
480 | |||
481 | kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); | ||
482 | if (!kbuf) | ||
483 | goto out_vn_rele; | ||
484 | |||
485 | cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; | ||
486 | VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags, | ||
487 | cursor, NULL, error); | ||
488 | if (error) | ||
489 | goto out_kfree; | ||
490 | |||
491 | if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) | ||
492 | error = -EFAULT; | ||
493 | |||
494 | out_kfree: | ||
495 | kfree(kbuf); | ||
496 | out_vn_rele: | ||
497 | VN_RELE(vp); | ||
498 | out: | ||
499 | return -error; | ||
500 | } | ||
501 | |||
502 | STATIC int | ||
503 | xfs_attrmulti_attr_get( | ||
504 | struct vnode *vp, | ||
505 | char *name, | ||
506 | char __user *ubuf, | ||
507 | __uint32_t *len, | ||
508 | __uint32_t flags) | ||
509 | { | ||
510 | char *kbuf; | ||
511 | int error = EFAULT; | ||
512 | |||
513 | if (*len > XATTR_SIZE_MAX) | ||
514 | return EINVAL; | ||
515 | kbuf = kmalloc(*len, GFP_KERNEL); | ||
516 | if (!kbuf) | ||
517 | return ENOMEM; | ||
518 | |||
519 | VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error); | ||
520 | if (error) | ||
521 | goto out_kfree; | ||
522 | |||
523 | if (copy_to_user(ubuf, kbuf, *len)) | ||
524 | error = EFAULT; | ||
525 | |||
526 | out_kfree: | ||
527 | kfree(kbuf); | ||
528 | return error; | ||
529 | } | ||
530 | |||
531 | STATIC int | ||
532 | xfs_attrmulti_attr_set( | ||
533 | struct vnode *vp, | ||
534 | char *name, | ||
535 | const char __user *ubuf, | ||
536 | __uint32_t len, | ||
537 | __uint32_t flags) | ||
538 | { | ||
539 | char *kbuf; | ||
540 | int error = EFAULT; | ||
541 | |||
542 | if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode)) | ||
543 | return EPERM; | ||
544 | if (len > XATTR_SIZE_MAX) | ||
545 | return EINVAL; | ||
546 | |||
547 | kbuf = kmalloc(len, GFP_KERNEL); | ||
548 | if (!kbuf) | ||
549 | return ENOMEM; | ||
550 | |||
551 | if (copy_from_user(kbuf, ubuf, len)) | ||
552 | goto out_kfree; | ||
553 | |||
554 | VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error); | ||
555 | |||
556 | out_kfree: | ||
557 | kfree(kbuf); | ||
558 | return error; | ||
559 | } | ||
560 | |||
561 | STATIC int | ||
562 | xfs_attrmulti_attr_remove( | ||
563 | struct vnode *vp, | ||
564 | char *name, | ||
565 | __uint32_t flags) | ||
566 | { | ||
567 | int error; | ||
568 | |||
569 | if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode)) | ||
570 | return EPERM; | ||
571 | |||
572 | VOP_ATTR_REMOVE(vp, name, flags, NULL, error); | ||
573 | return error; | ||
574 | } | ||
575 | |||
576 | STATIC int | ||
577 | xfs_attrmulti_by_handle( | ||
578 | xfs_mount_t *mp, | ||
579 | void __user *arg, | ||
580 | struct file *parfilp, | ||
581 | struct inode *parinode) | ||
582 | { | ||
583 | int error; | ||
584 | xfs_attr_multiop_t *ops; | ||
585 | xfs_fsop_attrmulti_handlereq_t am_hreq; | ||
586 | struct inode *inode; | ||
587 | vnode_t *vp; | ||
588 | unsigned int i, size; | ||
589 | char *attr_name; | ||
590 | |||
591 | if (!capable(CAP_SYS_ADMIN)) | ||
592 | return -XFS_ERROR(EPERM); | ||
593 | if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) | ||
594 | return -XFS_ERROR(EFAULT); | ||
595 | |||
596 | error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode); | ||
597 | if (error) | ||
598 | goto out; | ||
599 | |||
600 | error = E2BIG; | ||
601 | size = am_hreq.opcount * sizeof(attr_multiop_t); | ||
602 | if (!size || size > 16 * PAGE_SIZE) | ||
603 | goto out_vn_rele; | ||
604 | |||
605 | error = ENOMEM; | ||
606 | ops = kmalloc(size, GFP_KERNEL); | ||
607 | if (!ops) | ||
608 | goto out_vn_rele; | ||
609 | |||
610 | error = EFAULT; | ||
611 | if (copy_from_user(ops, am_hreq.ops, size)) | ||
612 | goto out_kfree_ops; | ||
613 | |||
614 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); | ||
615 | if (!attr_name) | ||
616 | goto out_kfree_ops; | ||
617 | |||
618 | |||
619 | error = 0; | ||
620 | for (i = 0; i < am_hreq.opcount; i++) { | ||
621 | ops[i].am_error = strncpy_from_user(attr_name, | ||
622 | ops[i].am_attrname, MAXNAMELEN); | ||
623 | if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) | ||
624 | error = -ERANGE; | ||
625 | if (ops[i].am_error < 0) | ||
626 | break; | ||
627 | |||
628 | switch (ops[i].am_opcode) { | ||
629 | case ATTR_OP_GET: | ||
630 | ops[i].am_error = xfs_attrmulti_attr_get(vp, | ||
631 | attr_name, ops[i].am_attrvalue, | ||
632 | &ops[i].am_length, ops[i].am_flags); | ||
633 | break; | ||
634 | case ATTR_OP_SET: | ||
635 | ops[i].am_error = xfs_attrmulti_attr_set(vp, | ||
636 | attr_name, ops[i].am_attrvalue, | ||
637 | ops[i].am_length, ops[i].am_flags); | ||
638 | break; | ||
639 | case ATTR_OP_REMOVE: | ||
640 | ops[i].am_error = xfs_attrmulti_attr_remove(vp, | ||
641 | attr_name, ops[i].am_flags); | ||
642 | break; | ||
643 | default: | ||
644 | ops[i].am_error = EINVAL; | ||
645 | } | ||
646 | } | ||
647 | |||
648 | if (copy_to_user(am_hreq.ops, ops, size)) | ||
649 | error = XFS_ERROR(EFAULT); | ||
650 | |||
651 | kfree(attr_name); | ||
652 | out_kfree_ops: | ||
653 | kfree(ops); | ||
654 | out_vn_rele: | ||
655 | VN_RELE(vp); | ||
656 | out: | ||
657 | return -error; | ||
658 | } | ||
659 | |||
660 | /* prototypes for a few of the stack-hungry cases that have | ||
661 | * their own functions. Functions are defined after their use | ||
662 | * so gcc doesn't get fancy and inline them with -03 */ | ||
663 | |||
664 | STATIC int | ||
665 | xfs_ioc_space( | ||
666 | bhv_desc_t *bdp, | ||
667 | vnode_t *vp, | ||
668 | struct file *filp, | ||
669 | int flags, | ||
670 | unsigned int cmd, | ||
671 | void __user *arg); | ||
672 | |||
673 | STATIC int | ||
674 | xfs_ioc_bulkstat( | ||
675 | xfs_mount_t *mp, | ||
676 | unsigned int cmd, | ||
677 | void __user *arg); | ||
678 | |||
679 | STATIC int | ||
680 | xfs_ioc_fsgeometry_v1( | ||
681 | xfs_mount_t *mp, | ||
682 | void __user *arg); | ||
683 | |||
684 | STATIC int | ||
685 | xfs_ioc_fsgeometry( | ||
686 | xfs_mount_t *mp, | ||
687 | void __user *arg); | ||
688 | |||
689 | STATIC int | ||
690 | xfs_ioc_xattr( | ||
691 | vnode_t *vp, | ||
692 | xfs_inode_t *ip, | ||
693 | struct file *filp, | ||
694 | unsigned int cmd, | ||
695 | void __user *arg); | ||
696 | |||
697 | STATIC int | ||
698 | xfs_ioc_getbmap( | ||
699 | bhv_desc_t *bdp, | ||
700 | struct file *filp, | ||
701 | int flags, | ||
702 | unsigned int cmd, | ||
703 | void __user *arg); | ||
704 | |||
705 | STATIC int | ||
706 | xfs_ioc_getbmapx( | ||
707 | bhv_desc_t *bdp, | ||
708 | void __user *arg); | ||
709 | |||
710 | int | ||
711 | xfs_ioctl( | ||
712 | bhv_desc_t *bdp, | ||
713 | struct inode *inode, | ||
714 | struct file *filp, | ||
715 | int ioflags, | ||
716 | unsigned int cmd, | ||
717 | void __user *arg) | ||
718 | { | ||
719 | int error; | ||
720 | vnode_t *vp; | ||
721 | xfs_inode_t *ip; | ||
722 | xfs_mount_t *mp; | ||
723 | |||
724 | vp = LINVFS_GET_VP(inode); | ||
725 | |||
726 | vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address); | ||
727 | |||
728 | ip = XFS_BHVTOI(bdp); | ||
729 | mp = ip->i_mount; | ||
730 | |||
731 | switch (cmd) { | ||
732 | |||
733 | case XFS_IOC_ALLOCSP: | ||
734 | case XFS_IOC_FREESP: | ||
735 | case XFS_IOC_RESVSP: | ||
736 | case XFS_IOC_UNRESVSP: | ||
737 | case XFS_IOC_ALLOCSP64: | ||
738 | case XFS_IOC_FREESP64: | ||
739 | case XFS_IOC_RESVSP64: | ||
740 | case XFS_IOC_UNRESVSP64: | ||
741 | /* | ||
742 | * Only allow the sys admin to reserve space unless | ||
743 | * unwritten extents are enabled. | ||
744 | */ | ||
745 | if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) && | ||
746 | !capable(CAP_SYS_ADMIN)) | ||
747 | return -EPERM; | ||
748 | |||
749 | return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg); | ||
750 | |||
751 | case XFS_IOC_DIOINFO: { | ||
752 | struct dioattr da; | ||
753 | xfs_buftarg_t *target = | ||
754 | (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? | ||
755 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
756 | |||
757 | da.d_mem = da.d_miniosz = 1 << target->pbr_sshift; | ||
758 | /* The size dio will do in one go */ | ||
759 | da.d_maxiosz = 64 * PAGE_CACHE_SIZE; | ||
760 | |||
761 | if (copy_to_user(arg, &da, sizeof(da))) | ||
762 | return -XFS_ERROR(EFAULT); | ||
763 | return 0; | ||
764 | } | ||
765 | |||
766 | case XFS_IOC_FSBULKSTAT_SINGLE: | ||
767 | case XFS_IOC_FSBULKSTAT: | ||
768 | case XFS_IOC_FSINUMBERS: | ||
769 | return xfs_ioc_bulkstat(mp, cmd, arg); | ||
770 | |||
771 | case XFS_IOC_FSGEOMETRY_V1: | ||
772 | return xfs_ioc_fsgeometry_v1(mp, arg); | ||
773 | |||
774 | case XFS_IOC_FSGEOMETRY: | ||
775 | return xfs_ioc_fsgeometry(mp, arg); | ||
776 | |||
777 | case XFS_IOC_GETVERSION: | ||
778 | case XFS_IOC_GETXFLAGS: | ||
779 | case XFS_IOC_SETXFLAGS: | ||
780 | case XFS_IOC_FSGETXATTR: | ||
781 | case XFS_IOC_FSSETXATTR: | ||
782 | case XFS_IOC_FSGETXATTRA: | ||
783 | return xfs_ioc_xattr(vp, ip, filp, cmd, arg); | ||
784 | |||
785 | case XFS_IOC_FSSETDM: { | ||
786 | struct fsdmidata dmi; | ||
787 | |||
788 | if (copy_from_user(&dmi, arg, sizeof(dmi))) | ||
789 | return -XFS_ERROR(EFAULT); | ||
790 | |||
791 | error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate, | ||
792 | NULL); | ||
793 | return -error; | ||
794 | } | ||
795 | |||
796 | case XFS_IOC_GETBMAP: | ||
797 | case XFS_IOC_GETBMAPA: | ||
798 | return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg); | ||
799 | |||
800 | case XFS_IOC_GETBMAPX: | ||
801 | return xfs_ioc_getbmapx(bdp, arg); | ||
802 | |||
803 | case XFS_IOC_FD_TO_HANDLE: | ||
804 | case XFS_IOC_PATH_TO_HANDLE: | ||
805 | case XFS_IOC_PATH_TO_FSHANDLE: | ||
806 | return xfs_find_handle(cmd, arg); | ||
807 | |||
808 | case XFS_IOC_OPEN_BY_HANDLE: | ||
809 | return xfs_open_by_handle(mp, arg, filp, inode); | ||
810 | |||
811 | case XFS_IOC_FSSETDM_BY_HANDLE: | ||
812 | return xfs_fssetdm_by_handle(mp, arg, filp, inode); | ||
813 | |||
814 | case XFS_IOC_READLINK_BY_HANDLE: | ||
815 | return xfs_readlink_by_handle(mp, arg, filp, inode); | ||
816 | |||
817 | case XFS_IOC_ATTRLIST_BY_HANDLE: | ||
818 | return xfs_attrlist_by_handle(mp, arg, filp, inode); | ||
819 | |||
820 | case XFS_IOC_ATTRMULTI_BY_HANDLE: | ||
821 | return xfs_attrmulti_by_handle(mp, arg, filp, inode); | ||
822 | |||
823 | case XFS_IOC_SWAPEXT: { | ||
824 | error = xfs_swapext((struct xfs_swapext __user *)arg); | ||
825 | return -error; | ||
826 | } | ||
827 | |||
828 | case XFS_IOC_FSCOUNTS: { | ||
829 | xfs_fsop_counts_t out; | ||
830 | |||
831 | error = xfs_fs_counts(mp, &out); | ||
832 | if (error) | ||
833 | return -error; | ||
834 | |||
835 | if (copy_to_user(arg, &out, sizeof(out))) | ||
836 | return -XFS_ERROR(EFAULT); | ||
837 | return 0; | ||
838 | } | ||
839 | |||
840 | case XFS_IOC_SET_RESBLKS: { | ||
841 | xfs_fsop_resblks_t inout; | ||
842 | __uint64_t in; | ||
843 | |||
844 | if (!capable(CAP_SYS_ADMIN)) | ||
845 | return -EPERM; | ||
846 | |||
847 | if (copy_from_user(&inout, arg, sizeof(inout))) | ||
848 | return -XFS_ERROR(EFAULT); | ||
849 | |||
850 | /* input parameter is passed in resblks field of structure */ | ||
851 | in = inout.resblks; | ||
852 | error = xfs_reserve_blocks(mp, &in, &inout); | ||
853 | if (error) | ||
854 | return -error; | ||
855 | |||
856 | if (copy_to_user(arg, &inout, sizeof(inout))) | ||
857 | return -XFS_ERROR(EFAULT); | ||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | case XFS_IOC_GET_RESBLKS: { | ||
862 | xfs_fsop_resblks_t out; | ||
863 | |||
864 | if (!capable(CAP_SYS_ADMIN)) | ||
865 | return -EPERM; | ||
866 | |||
867 | error = xfs_reserve_blocks(mp, NULL, &out); | ||
868 | if (error) | ||
869 | return -error; | ||
870 | |||
871 | if (copy_to_user(arg, &out, sizeof(out))) | ||
872 | return -XFS_ERROR(EFAULT); | ||
873 | |||
874 | return 0; | ||
875 | } | ||
876 | |||
877 | case XFS_IOC_FSGROWFSDATA: { | ||
878 | xfs_growfs_data_t in; | ||
879 | |||
880 | if (!capable(CAP_SYS_ADMIN)) | ||
881 | return -EPERM; | ||
882 | |||
883 | if (copy_from_user(&in, arg, sizeof(in))) | ||
884 | return -XFS_ERROR(EFAULT); | ||
885 | |||
886 | error = xfs_growfs_data(mp, &in); | ||
887 | return -error; | ||
888 | } | ||
889 | |||
890 | case XFS_IOC_FSGROWFSLOG: { | ||
891 | xfs_growfs_log_t in; | ||
892 | |||
893 | if (!capable(CAP_SYS_ADMIN)) | ||
894 | return -EPERM; | ||
895 | |||
896 | if (copy_from_user(&in, arg, sizeof(in))) | ||
897 | return -XFS_ERROR(EFAULT); | ||
898 | |||
899 | error = xfs_growfs_log(mp, &in); | ||
900 | return -error; | ||
901 | } | ||
902 | |||
903 | case XFS_IOC_FSGROWFSRT: { | ||
904 | xfs_growfs_rt_t in; | ||
905 | |||
906 | if (!capable(CAP_SYS_ADMIN)) | ||
907 | return -EPERM; | ||
908 | |||
909 | if (copy_from_user(&in, arg, sizeof(in))) | ||
910 | return -XFS_ERROR(EFAULT); | ||
911 | |||
912 | error = xfs_growfs_rt(mp, &in); | ||
913 | return -error; | ||
914 | } | ||
915 | |||
916 | case XFS_IOC_FREEZE: | ||
917 | if (!capable(CAP_SYS_ADMIN)) | ||
918 | return -EPERM; | ||
919 | |||
920 | if (inode->i_sb->s_frozen == SB_UNFROZEN) | ||
921 | freeze_bdev(inode->i_sb->s_bdev); | ||
922 | return 0; | ||
923 | |||
924 | case XFS_IOC_THAW: | ||
925 | if (!capable(CAP_SYS_ADMIN)) | ||
926 | return -EPERM; | ||
927 | if (inode->i_sb->s_frozen != SB_UNFROZEN) | ||
928 | thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); | ||
929 | return 0; | ||
930 | |||
931 | case XFS_IOC_GOINGDOWN: { | ||
932 | __uint32_t in; | ||
933 | |||
934 | if (!capable(CAP_SYS_ADMIN)) | ||
935 | return -EPERM; | ||
936 | |||
937 | if (get_user(in, (__uint32_t __user *)arg)) | ||
938 | return -XFS_ERROR(EFAULT); | ||
939 | |||
940 | error = xfs_fs_goingdown(mp, in); | ||
941 | return -error; | ||
942 | } | ||
943 | |||
944 | case XFS_IOC_ERROR_INJECTION: { | ||
945 | xfs_error_injection_t in; | ||
946 | |||
947 | if (!capable(CAP_SYS_ADMIN)) | ||
948 | return -EPERM; | ||
949 | |||
950 | if (copy_from_user(&in, arg, sizeof(in))) | ||
951 | return -XFS_ERROR(EFAULT); | ||
952 | |||
953 | error = xfs_errortag_add(in.errtag, mp); | ||
954 | return -error; | ||
955 | } | ||
956 | |||
957 | case XFS_IOC_ERROR_CLEARALL: | ||
958 | if (!capable(CAP_SYS_ADMIN)) | ||
959 | return -EPERM; | ||
960 | |||
961 | error = xfs_errortag_clearall(mp); | ||
962 | return -error; | ||
963 | |||
964 | default: | ||
965 | return -ENOTTY; | ||
966 | } | ||
967 | } | ||
968 | |||
969 | STATIC int | ||
970 | xfs_ioc_space( | ||
971 | bhv_desc_t *bdp, | ||
972 | vnode_t *vp, | ||
973 | struct file *filp, | ||
974 | int ioflags, | ||
975 | unsigned int cmd, | ||
976 | void __user *arg) | ||
977 | { | ||
978 | xfs_flock64_t bf; | ||
979 | int attr_flags = 0; | ||
980 | int error; | ||
981 | |||
982 | if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) | ||
983 | return -XFS_ERROR(EPERM); | ||
984 | |||
985 | if (!(filp->f_flags & FMODE_WRITE)) | ||
986 | return -XFS_ERROR(EBADF); | ||
987 | |||
988 | if (vp->v_type != VREG) | ||
989 | return -XFS_ERROR(EINVAL); | ||
990 | |||
991 | if (copy_from_user(&bf, arg, sizeof(bf))) | ||
992 | return -XFS_ERROR(EFAULT); | ||
993 | |||
994 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | ||
995 | attr_flags |= ATTR_NONBLOCK; | ||
996 | if (ioflags & IO_INVIS) | ||
997 | attr_flags |= ATTR_DMI; | ||
998 | |||
999 | error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos, | ||
1000 | NULL, attr_flags); | ||
1001 | return -error; | ||
1002 | } | ||
1003 | |||
1004 | STATIC int | ||
1005 | xfs_ioc_bulkstat( | ||
1006 | xfs_mount_t *mp, | ||
1007 | unsigned int cmd, | ||
1008 | void __user *arg) | ||
1009 | { | ||
1010 | xfs_fsop_bulkreq_t bulkreq; | ||
1011 | int count; /* # of records returned */ | ||
1012 | xfs_ino_t inlast; /* last inode number */ | ||
1013 | int done; | ||
1014 | int error; | ||
1015 | |||
1016 | /* done = 1 if there are more stats to get and if bulkstat */ | ||
1017 | /* should be called again (unused here, but used in dmapi) */ | ||
1018 | |||
1019 | if (!capable(CAP_SYS_ADMIN)) | ||
1020 | return -EPERM; | ||
1021 | |||
1022 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1023 | return -XFS_ERROR(EIO); | ||
1024 | |||
1025 | if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) | ||
1026 | return -XFS_ERROR(EFAULT); | ||
1027 | |||
1028 | if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) | ||
1029 | return -XFS_ERROR(EFAULT); | ||
1030 | |||
1031 | if ((count = bulkreq.icount) <= 0) | ||
1032 | return -XFS_ERROR(EINVAL); | ||
1033 | |||
1034 | if (cmd == XFS_IOC_FSINUMBERS) | ||
1035 | error = xfs_inumbers(mp, &inlast, &count, | ||
1036 | bulkreq.ubuffer); | ||
1037 | else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) | ||
1038 | error = xfs_bulkstat_single(mp, &inlast, | ||
1039 | bulkreq.ubuffer, &done); | ||
1040 | else { /* XFS_IOC_FSBULKSTAT */ | ||
1041 | if (count == 1 && inlast != 0) { | ||
1042 | inlast++; | ||
1043 | error = xfs_bulkstat_single(mp, &inlast, | ||
1044 | bulkreq.ubuffer, &done); | ||
1045 | } else { | ||
1046 | error = xfs_bulkstat(mp, &inlast, &count, | ||
1047 | (bulkstat_one_pf)xfs_bulkstat_one, NULL, | ||
1048 | sizeof(xfs_bstat_t), bulkreq.ubuffer, | ||
1049 | BULKSTAT_FG_QUICK, &done); | ||
1050 | } | ||
1051 | } | ||
1052 | |||
1053 | if (error) | ||
1054 | return -error; | ||
1055 | |||
1056 | if (bulkreq.ocount != NULL) { | ||
1057 | if (copy_to_user(bulkreq.lastip, &inlast, | ||
1058 | sizeof(xfs_ino_t))) | ||
1059 | return -XFS_ERROR(EFAULT); | ||
1060 | |||
1061 | if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) | ||
1062 | return -XFS_ERROR(EFAULT); | ||
1063 | } | ||
1064 | |||
1065 | return 0; | ||
1066 | } | ||
1067 | |||
1068 | STATIC int | ||
1069 | xfs_ioc_fsgeometry_v1( | ||
1070 | xfs_mount_t *mp, | ||
1071 | void __user *arg) | ||
1072 | { | ||
1073 | xfs_fsop_geom_v1_t fsgeo; | ||
1074 | int error; | ||
1075 | |||
1076 | error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); | ||
1077 | if (error) | ||
1078 | return -error; | ||
1079 | |||
1080 | if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) | ||
1081 | return -XFS_ERROR(EFAULT); | ||
1082 | return 0; | ||
1083 | } | ||
1084 | |||
1085 | STATIC int | ||
1086 | xfs_ioc_fsgeometry( | ||
1087 | xfs_mount_t *mp, | ||
1088 | void __user *arg) | ||
1089 | { | ||
1090 | xfs_fsop_geom_t fsgeo; | ||
1091 | int error; | ||
1092 | |||
1093 | error = xfs_fs_geometry(mp, &fsgeo, 4); | ||
1094 | if (error) | ||
1095 | return -error; | ||
1096 | |||
1097 | if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) | ||
1098 | return -XFS_ERROR(EFAULT); | ||
1099 | return 0; | ||
1100 | } | ||
1101 | |||
1102 | /* | ||
1103 | * Linux extended inode flags interface. | ||
1104 | */ | ||
1105 | #define LINUX_XFLAG_SYNC 0x00000008 /* Synchronous updates */ | ||
1106 | #define LINUX_XFLAG_IMMUTABLE 0x00000010 /* Immutable file */ | ||
1107 | #define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ | ||
1108 | #define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ | ||
1109 | #define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ | ||
1110 | |||
1111 | STATIC unsigned int | ||
1112 | xfs_merge_ioc_xflags( | ||
1113 | unsigned int flags, | ||
1114 | unsigned int start) | ||
1115 | { | ||
1116 | unsigned int xflags = start; | ||
1117 | |||
1118 | if (flags & LINUX_XFLAG_IMMUTABLE) | ||
1119 | xflags |= XFS_XFLAG_IMMUTABLE; | ||
1120 | else | ||
1121 | xflags &= ~XFS_XFLAG_IMMUTABLE; | ||
1122 | if (flags & LINUX_XFLAG_APPEND) | ||
1123 | xflags |= XFS_XFLAG_APPEND; | ||
1124 | else | ||
1125 | xflags &= ~XFS_XFLAG_APPEND; | ||
1126 | if (flags & LINUX_XFLAG_SYNC) | ||
1127 | xflags |= XFS_XFLAG_SYNC; | ||
1128 | else | ||
1129 | xflags &= ~XFS_XFLAG_SYNC; | ||
1130 | if (flags & LINUX_XFLAG_NOATIME) | ||
1131 | xflags |= XFS_XFLAG_NOATIME; | ||
1132 | else | ||
1133 | xflags &= ~XFS_XFLAG_NOATIME; | ||
1134 | if (flags & LINUX_XFLAG_NODUMP) | ||
1135 | xflags |= XFS_XFLAG_NODUMP; | ||
1136 | else | ||
1137 | xflags &= ~XFS_XFLAG_NODUMP; | ||
1138 | |||
1139 | return xflags; | ||
1140 | } | ||
1141 | |||
1142 | STATIC unsigned int | ||
1143 | xfs_di2lxflags( | ||
1144 | __uint16_t di_flags) | ||
1145 | { | ||
1146 | unsigned int flags = 0; | ||
1147 | |||
1148 | if (di_flags & XFS_DIFLAG_IMMUTABLE) | ||
1149 | flags |= LINUX_XFLAG_IMMUTABLE; | ||
1150 | if (di_flags & XFS_DIFLAG_APPEND) | ||
1151 | flags |= LINUX_XFLAG_APPEND; | ||
1152 | if (di_flags & XFS_DIFLAG_SYNC) | ||
1153 | flags |= LINUX_XFLAG_SYNC; | ||
1154 | if (di_flags & XFS_DIFLAG_NOATIME) | ||
1155 | flags |= LINUX_XFLAG_NOATIME; | ||
1156 | if (di_flags & XFS_DIFLAG_NODUMP) | ||
1157 | flags |= LINUX_XFLAG_NODUMP; | ||
1158 | return flags; | ||
1159 | } | ||
1160 | |||
1161 | STATIC int | ||
1162 | xfs_ioc_xattr( | ||
1163 | vnode_t *vp, | ||
1164 | xfs_inode_t *ip, | ||
1165 | struct file *filp, | ||
1166 | unsigned int cmd, | ||
1167 | void __user *arg) | ||
1168 | { | ||
1169 | struct fsxattr fa; | ||
1170 | vattr_t va; | ||
1171 | int error; | ||
1172 | int attr_flags; | ||
1173 | unsigned int flags; | ||
1174 | |||
1175 | switch (cmd) { | ||
1176 | case XFS_IOC_FSGETXATTR: { | ||
1177 | va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS; | ||
1178 | VOP_GETATTR(vp, &va, 0, NULL, error); | ||
1179 | if (error) | ||
1180 | return -error; | ||
1181 | |||
1182 | fa.fsx_xflags = va.va_xflags; | ||
1183 | fa.fsx_extsize = va.va_extsize; | ||
1184 | fa.fsx_nextents = va.va_nextents; | ||
1185 | |||
1186 | if (copy_to_user(arg, &fa, sizeof(fa))) | ||
1187 | return -XFS_ERROR(EFAULT); | ||
1188 | return 0; | ||
1189 | } | ||
1190 | |||
1191 | case XFS_IOC_FSSETXATTR: { | ||
1192 | if (copy_from_user(&fa, arg, sizeof(fa))) | ||
1193 | return -XFS_ERROR(EFAULT); | ||
1194 | |||
1195 | attr_flags = 0; | ||
1196 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | ||
1197 | attr_flags |= ATTR_NONBLOCK; | ||
1198 | |||
1199 | va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE; | ||
1200 | va.va_xflags = fa.fsx_xflags; | ||
1201 | va.va_extsize = fa.fsx_extsize; | ||
1202 | |||
1203 | VOP_SETATTR(vp, &va, attr_flags, NULL, error); | ||
1204 | if (!error) | ||
1205 | vn_revalidate(vp); /* update Linux inode flags */ | ||
1206 | return -error; | ||
1207 | } | ||
1208 | |||
1209 | case XFS_IOC_FSGETXATTRA: { | ||
1210 | va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS; | ||
1211 | VOP_GETATTR(vp, &va, 0, NULL, error); | ||
1212 | if (error) | ||
1213 | return -error; | ||
1214 | |||
1215 | fa.fsx_xflags = va.va_xflags; | ||
1216 | fa.fsx_extsize = va.va_extsize; | ||
1217 | fa.fsx_nextents = va.va_anextents; | ||
1218 | |||
1219 | if (copy_to_user(arg, &fa, sizeof(fa))) | ||
1220 | return -XFS_ERROR(EFAULT); | ||
1221 | return 0; | ||
1222 | } | ||
1223 | |||
1224 | case XFS_IOC_GETXFLAGS: { | ||
1225 | flags = xfs_di2lxflags(ip->i_d.di_flags); | ||
1226 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
1227 | return -XFS_ERROR(EFAULT); | ||
1228 | return 0; | ||
1229 | } | ||
1230 | |||
1231 | case XFS_IOC_SETXFLAGS: { | ||
1232 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
1233 | return -XFS_ERROR(EFAULT); | ||
1234 | |||
1235 | if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \ | ||
1236 | LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \ | ||
1237 | LINUX_XFLAG_SYNC)) | ||
1238 | return -XFS_ERROR(EOPNOTSUPP); | ||
1239 | |||
1240 | attr_flags = 0; | ||
1241 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | ||
1242 | attr_flags |= ATTR_NONBLOCK; | ||
1243 | |||
1244 | va.va_mask = XFS_AT_XFLAGS; | ||
1245 | va.va_xflags = xfs_merge_ioc_xflags(flags, | ||
1246 | xfs_ip2xflags(ip)); | ||
1247 | |||
1248 | VOP_SETATTR(vp, &va, attr_flags, NULL, error); | ||
1249 | if (!error) | ||
1250 | vn_revalidate(vp); /* update Linux inode flags */ | ||
1251 | return -error; | ||
1252 | } | ||
1253 | |||
1254 | case XFS_IOC_GETVERSION: { | ||
1255 | flags = LINVFS_GET_IP(vp)->i_generation; | ||
1256 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
1257 | return -XFS_ERROR(EFAULT); | ||
1258 | return 0; | ||
1259 | } | ||
1260 | |||
1261 | default: | ||
1262 | return -ENOTTY; | ||
1263 | } | ||
1264 | } | ||
1265 | |||
1266 | STATIC int | ||
1267 | xfs_ioc_getbmap( | ||
1268 | bhv_desc_t *bdp, | ||
1269 | struct file *filp, | ||
1270 | int ioflags, | ||
1271 | unsigned int cmd, | ||
1272 | void __user *arg) | ||
1273 | { | ||
1274 | struct getbmap bm; | ||
1275 | int iflags; | ||
1276 | int error; | ||
1277 | |||
1278 | if (copy_from_user(&bm, arg, sizeof(bm))) | ||
1279 | return -XFS_ERROR(EFAULT); | ||
1280 | |||
1281 | if (bm.bmv_count < 2) | ||
1282 | return -XFS_ERROR(EINVAL); | ||
1283 | |||
1284 | iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); | ||
1285 | if (ioflags & IO_INVIS) | ||
1286 | iflags |= BMV_IF_NO_DMAPI_READ; | ||
1287 | |||
1288 | error = xfs_getbmap(bdp, &bm, (struct getbmap __user *)arg+1, iflags); | ||
1289 | if (error) | ||
1290 | return -error; | ||
1291 | |||
1292 | if (copy_to_user(arg, &bm, sizeof(bm))) | ||
1293 | return -XFS_ERROR(EFAULT); | ||
1294 | return 0; | ||
1295 | } | ||
1296 | |||
1297 | STATIC int | ||
1298 | xfs_ioc_getbmapx( | ||
1299 | bhv_desc_t *bdp, | ||
1300 | void __user *arg) | ||
1301 | { | ||
1302 | struct getbmapx bmx; | ||
1303 | struct getbmap bm; | ||
1304 | int iflags; | ||
1305 | int error; | ||
1306 | |||
1307 | if (copy_from_user(&bmx, arg, sizeof(bmx))) | ||
1308 | return -XFS_ERROR(EFAULT); | ||
1309 | |||
1310 | if (bmx.bmv_count < 2) | ||
1311 | return -XFS_ERROR(EINVAL); | ||
1312 | |||
1313 | /* | ||
1314 | * Map input getbmapx structure to a getbmap | ||
1315 | * structure for xfs_getbmap. | ||
1316 | */ | ||
1317 | GETBMAP_CONVERT(bmx, bm); | ||
1318 | |||
1319 | iflags = bmx.bmv_iflags; | ||
1320 | |||
1321 | if (iflags & (~BMV_IF_VALID)) | ||
1322 | return -XFS_ERROR(EINVAL); | ||
1323 | |||
1324 | iflags |= BMV_IF_EXTENDED; | ||
1325 | |||
1326 | error = xfs_getbmap(bdp, &bm, (struct getbmapx __user *)arg+1, iflags); | ||
1327 | if (error) | ||
1328 | return -error; | ||
1329 | |||
1330 | GETBMAP_CONVERT(bm, bmx); | ||
1331 | |||
1332 | if (copy_to_user(arg, &bmx, sizeof(bmx))) | ||
1333 | return -XFS_ERROR(EFAULT); | ||
1334 | |||
1335 | return 0; | ||
1336 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c new file mode 100644 index 000000000000..7a12c83184f5 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -0,0 +1,163 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include <linux/config.h> | ||
34 | #include <linux/compat.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/ioctl.h> | ||
37 | #include <linux/ioctl32.h> | ||
38 | #include <linux/syscalls.h> | ||
39 | #include <linux/types.h> | ||
40 | #include <linux/fs.h> | ||
41 | #include <asm/uaccess.h> | ||
42 | |||
43 | #include "xfs.h" | ||
44 | #include "xfs_types.h" | ||
45 | #include "xfs_fs.h" | ||
46 | #include "xfs_vfs.h" | ||
47 | #include "xfs_vnode.h" | ||
48 | #include "xfs_dfrag.h" | ||
49 | |||
50 | #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) | ||
51 | #define BROKEN_X86_ALIGNMENT | ||
52 | #else | ||
53 | |||
54 | typedef struct xfs_fsop_bulkreq32 { | ||
55 | compat_uptr_t lastip; /* last inode # pointer */ | ||
56 | __s32 icount; /* count of entries in buffer */ | ||
57 | compat_uptr_t ubuffer; /* user buffer for inode desc. */ | ||
58 | __s32 ocount; /* output count pointer */ | ||
59 | } xfs_fsop_bulkreq32_t; | ||
60 | |||
61 | static unsigned long | ||
62 | xfs_ioctl32_bulkstat(unsigned long arg) | ||
63 | { | ||
64 | xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg; | ||
65 | xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p)); | ||
66 | u32 addr; | ||
67 | |||
68 | if (get_user(addr, &p32->lastip) || | ||
69 | put_user(compat_ptr(addr), &p->lastip) || | ||
70 | copy_in_user(&p->icount, &p32->icount, sizeof(s32)) || | ||
71 | get_user(addr, &p32->ubuffer) || | ||
72 | put_user(compat_ptr(addr), &p->ubuffer) || | ||
73 | get_user(addr, &p32->ocount) || | ||
74 | put_user(compat_ptr(addr), &p->ocount)) | ||
75 | return -EFAULT; | ||
76 | |||
77 | return (unsigned long)p; | ||
78 | } | ||
79 | #endif | ||
80 | |||
81 | static long | ||
82 | __xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) | ||
83 | { | ||
84 | int error; | ||
85 | struct inode *inode = f->f_dentry->d_inode; | ||
86 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
87 | |||
88 | switch (cmd) { | ||
89 | case XFS_IOC_DIOINFO: | ||
90 | case XFS_IOC_FSGEOMETRY_V1: | ||
91 | case XFS_IOC_FSGEOMETRY: | ||
92 | case XFS_IOC_GETVERSION: | ||
93 | case XFS_IOC_GETXFLAGS: | ||
94 | case XFS_IOC_SETXFLAGS: | ||
95 | case XFS_IOC_FSGETXATTR: | ||
96 | case XFS_IOC_FSSETXATTR: | ||
97 | case XFS_IOC_FSGETXATTRA: | ||
98 | case XFS_IOC_FSSETDM: | ||
99 | case XFS_IOC_GETBMAP: | ||
100 | case XFS_IOC_GETBMAPA: | ||
101 | case XFS_IOC_GETBMAPX: | ||
102 | /* not handled | ||
103 | case XFS_IOC_FD_TO_HANDLE: | ||
104 | case XFS_IOC_PATH_TO_HANDLE: | ||
105 | case XFS_IOC_PATH_TO_HANDLE: | ||
106 | case XFS_IOC_PATH_TO_FSHANDLE: | ||
107 | case XFS_IOC_OPEN_BY_HANDLE: | ||
108 | case XFS_IOC_FSSETDM_BY_HANDLE: | ||
109 | case XFS_IOC_READLINK_BY_HANDLE: | ||
110 | case XFS_IOC_ATTRLIST_BY_HANDLE: | ||
111 | case XFS_IOC_ATTRMULTI_BY_HANDLE: | ||
112 | */ | ||
113 | case XFS_IOC_FSCOUNTS: | ||
114 | case XFS_IOC_SET_RESBLKS: | ||
115 | case XFS_IOC_GET_RESBLKS: | ||
116 | case XFS_IOC_FSGROWFSDATA: | ||
117 | case XFS_IOC_FSGROWFSLOG: | ||
118 | case XFS_IOC_FSGROWFSRT: | ||
119 | case XFS_IOC_FREEZE: | ||
120 | case XFS_IOC_THAW: | ||
121 | case XFS_IOC_GOINGDOWN: | ||
122 | case XFS_IOC_ERROR_INJECTION: | ||
123 | case XFS_IOC_ERROR_CLEARALL: | ||
124 | break; | ||
125 | |||
126 | #ifndef BROKEN_X86_ALIGNMENT | ||
127 | /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */ | ||
128 | case XFS_IOC_ALLOCSP: | ||
129 | case XFS_IOC_FREESP: | ||
130 | case XFS_IOC_RESVSP: | ||
131 | case XFS_IOC_UNRESVSP: | ||
132 | case XFS_IOC_ALLOCSP64: | ||
133 | case XFS_IOC_FREESP64: | ||
134 | case XFS_IOC_RESVSP64: | ||
135 | case XFS_IOC_UNRESVSP64: | ||
136 | case XFS_IOC_SWAPEXT: | ||
137 | break; | ||
138 | |||
139 | case XFS_IOC_FSBULKSTAT_SINGLE: | ||
140 | case XFS_IOC_FSBULKSTAT: | ||
141 | case XFS_IOC_FSINUMBERS: | ||
142 | arg = xfs_ioctl32_bulkstat(arg); | ||
143 | break; | ||
144 | #endif | ||
145 | default: | ||
146 | return -ENOIOCTLCMD; | ||
147 | } | ||
148 | |||
149 | VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error); | ||
150 | VMODIFY(vp); | ||
151 | |||
152 | return error; | ||
153 | } | ||
154 | |||
155 | long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg) | ||
156 | { | ||
157 | return __xfs_compat_ioctl(0, f, cmd, arg); | ||
158 | } | ||
159 | |||
160 | long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg) | ||
161 | { | ||
162 | return __xfs_compat_ioctl(IO_INVIS, f, cmd, arg); | ||
163 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h new file mode 100644 index 000000000000..779f69a48116 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg); | ||
34 | long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg); | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c new file mode 100644 index 000000000000..407e99359391 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -0,0 +1,680 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | #include "xfs_fs.h" | ||
35 | #include "xfs_inum.h" | ||
36 | #include "xfs_log.h" | ||
37 | #include "xfs_trans.h" | ||
38 | #include "xfs_sb.h" | ||
39 | #include "xfs_ag.h" | ||
40 | #include "xfs_dir.h" | ||
41 | #include "xfs_dir2.h" | ||
42 | #include "xfs_alloc.h" | ||
43 | #include "xfs_dmapi.h" | ||
44 | #include "xfs_quota.h" | ||
45 | #include "xfs_mount.h" | ||
46 | #include "xfs_alloc_btree.h" | ||
47 | #include "xfs_bmap_btree.h" | ||
48 | #include "xfs_ialloc_btree.h" | ||
49 | #include "xfs_btree.h" | ||
50 | #include "xfs_ialloc.h" | ||
51 | #include "xfs_attr_sf.h" | ||
52 | #include "xfs_dir_sf.h" | ||
53 | #include "xfs_dir2_sf.h" | ||
54 | #include "xfs_dinode.h" | ||
55 | #include "xfs_inode.h" | ||
56 | #include "xfs_bmap.h" | ||
57 | #include "xfs_bit.h" | ||
58 | #include "xfs_rtalloc.h" | ||
59 | #include "xfs_error.h" | ||
60 | #include "xfs_itable.h" | ||
61 | #include "xfs_rw.h" | ||
62 | #include "xfs_acl.h" | ||
63 | #include "xfs_cap.h" | ||
64 | #include "xfs_mac.h" | ||
65 | #include "xfs_attr.h" | ||
66 | #include "xfs_buf_item.h" | ||
67 | #include "xfs_utils.h" | ||
68 | |||
69 | #include <linux/xattr.h> | ||
70 | #include <linux/namei.h> | ||
71 | |||
72 | |||
73 | /* | ||
74 | * Pull the link count and size up from the xfs inode to the linux inode | ||
75 | */ | ||
76 | STATIC void | ||
77 | validate_fields( | ||
78 | struct inode *ip) | ||
79 | { | ||
80 | vnode_t *vp = LINVFS_GET_VP(ip); | ||
81 | vattr_t va; | ||
82 | int error; | ||
83 | |||
84 | va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS; | ||
85 | VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error); | ||
86 | if (likely(!error)) { | ||
87 | ip->i_nlink = va.va_nlink; | ||
88 | ip->i_blocks = va.va_nblocks; | ||
89 | |||
90 | /* we're under i_sem so i_size can't change under us */ | ||
91 | if (i_size_read(ip) != va.va_size) | ||
92 | i_size_write(ip, va.va_size); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Determine whether a process has a valid fs_struct (kernel daemons | ||
98 | * like knfsd don't have an fs_struct). | ||
99 | * | ||
100 | * XXX(hch): nfsd is broken, better fix it instead. | ||
101 | */ | ||
102 | STATIC inline int | ||
103 | has_fs_struct(struct task_struct *task) | ||
104 | { | ||
105 | return (task->fs != init_task.fs); | ||
106 | } | ||
107 | |||
108 | STATIC int | ||
109 | linvfs_mknod( | ||
110 | struct inode *dir, | ||
111 | struct dentry *dentry, | ||
112 | int mode, | ||
113 | dev_t rdev) | ||
114 | { | ||
115 | struct inode *ip; | ||
116 | vattr_t va; | ||
117 | vnode_t *vp = NULL, *dvp = LINVFS_GET_VP(dir); | ||
118 | xfs_acl_t *default_acl = NULL; | ||
119 | attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; | ||
120 | int error; | ||
121 | |||
122 | /* | ||
123 | * Irix uses Missed'em'V split, but doesn't want to see | ||
124 | * the upper 5 bits of (14bit) major. | ||
125 | */ | ||
126 | if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff) | ||
127 | return -EINVAL; | ||
128 | |||
129 | if (test_default_acl && test_default_acl(dvp)) { | ||
130 | if (!_ACL_ALLOC(default_acl)) | ||
131 | return -ENOMEM; | ||
132 | if (!_ACL_GET_DEFAULT(dvp, default_acl)) { | ||
133 | _ACL_FREE(default_acl); | ||
134 | default_acl = NULL; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current)) | ||
139 | mode &= ~current->fs->umask; | ||
140 | |||
141 | memset(&va, 0, sizeof(va)); | ||
142 | va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; | ||
143 | va.va_type = IFTOVT(mode); | ||
144 | va.va_mode = mode; | ||
145 | |||
146 | switch (mode & S_IFMT) { | ||
147 | case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: | ||
148 | va.va_rdev = sysv_encode_dev(rdev); | ||
149 | va.va_mask |= XFS_AT_RDEV; | ||
150 | /*FALLTHROUGH*/ | ||
151 | case S_IFREG: | ||
152 | VOP_CREATE(dvp, dentry, &va, &vp, NULL, error); | ||
153 | break; | ||
154 | case S_IFDIR: | ||
155 | VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error); | ||
156 | break; | ||
157 | default: | ||
158 | error = EINVAL; | ||
159 | break; | ||
160 | } | ||
161 | |||
162 | if (default_acl) { | ||
163 | if (!error) { | ||
164 | error = _ACL_INHERIT(vp, &va, default_acl); | ||
165 | if (!error) { | ||
166 | VMODIFY(vp); | ||
167 | } else { | ||
168 | struct dentry teardown = {}; | ||
169 | int err2; | ||
170 | |||
171 | /* Oh, the horror. | ||
172 | * If we can't add the ACL we must back out. | ||
173 | * ENOSPC can hit here, among other things. | ||
174 | */ | ||
175 | teardown.d_inode = ip = LINVFS_GET_IP(vp); | ||
176 | teardown.d_name = dentry->d_name; | ||
177 | |||
178 | vn_mark_bad(vp); | ||
179 | |||
180 | if (S_ISDIR(mode)) | ||
181 | VOP_RMDIR(dvp, &teardown, NULL, err2); | ||
182 | else | ||
183 | VOP_REMOVE(dvp, &teardown, NULL, err2); | ||
184 | VN_RELE(vp); | ||
185 | } | ||
186 | } | ||
187 | _ACL_FREE(default_acl); | ||
188 | } | ||
189 | |||
190 | if (!error) { | ||
191 | ASSERT(vp); | ||
192 | ip = LINVFS_GET_IP(vp); | ||
193 | |||
194 | if (S_ISCHR(mode) || S_ISBLK(mode)) | ||
195 | ip->i_rdev = rdev; | ||
196 | else if (S_ISDIR(mode)) | ||
197 | validate_fields(ip); | ||
198 | d_instantiate(dentry, ip); | ||
199 | validate_fields(dir); | ||
200 | } | ||
201 | return -error; | ||
202 | } | ||
203 | |||
204 | STATIC int | ||
205 | linvfs_create( | ||
206 | struct inode *dir, | ||
207 | struct dentry *dentry, | ||
208 | int mode, | ||
209 | struct nameidata *nd) | ||
210 | { | ||
211 | return linvfs_mknod(dir, dentry, mode, 0); | ||
212 | } | ||
213 | |||
214 | STATIC int | ||
215 | linvfs_mkdir( | ||
216 | struct inode *dir, | ||
217 | struct dentry *dentry, | ||
218 | int mode) | ||
219 | { | ||
220 | return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0); | ||
221 | } | ||
222 | |||
223 | STATIC struct dentry * | ||
224 | linvfs_lookup( | ||
225 | struct inode *dir, | ||
226 | struct dentry *dentry, | ||
227 | struct nameidata *nd) | ||
228 | { | ||
229 | struct vnode *vp = LINVFS_GET_VP(dir), *cvp; | ||
230 | int error; | ||
231 | |||
232 | if (dentry->d_name.len >= MAXNAMELEN) | ||
233 | return ERR_PTR(-ENAMETOOLONG); | ||
234 | |||
235 | VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error); | ||
236 | if (error) { | ||
237 | if (unlikely(error != ENOENT)) | ||
238 | return ERR_PTR(-error); | ||
239 | d_add(dentry, NULL); | ||
240 | return NULL; | ||
241 | } | ||
242 | |||
243 | return d_splice_alias(LINVFS_GET_IP(cvp), dentry); | ||
244 | } | ||
245 | |||
246 | STATIC int | ||
247 | linvfs_link( | ||
248 | struct dentry *old_dentry, | ||
249 | struct inode *dir, | ||
250 | struct dentry *dentry) | ||
251 | { | ||
252 | struct inode *ip; /* inode of guy being linked to */ | ||
253 | vnode_t *tdvp; /* target directory for new name/link */ | ||
254 | vnode_t *vp; /* vp of name being linked */ | ||
255 | int error; | ||
256 | |||
257 | ip = old_dentry->d_inode; /* inode being linked to */ | ||
258 | if (S_ISDIR(ip->i_mode)) | ||
259 | return -EPERM; | ||
260 | |||
261 | tdvp = LINVFS_GET_VP(dir); | ||
262 | vp = LINVFS_GET_VP(ip); | ||
263 | |||
264 | VOP_LINK(tdvp, vp, dentry, NULL, error); | ||
265 | if (!error) { | ||
266 | VMODIFY(tdvp); | ||
267 | VN_HOLD(vp); | ||
268 | validate_fields(ip); | ||
269 | d_instantiate(dentry, ip); | ||
270 | } | ||
271 | return -error; | ||
272 | } | ||
273 | |||
274 | STATIC int | ||
275 | linvfs_unlink( | ||
276 | struct inode *dir, | ||
277 | struct dentry *dentry) | ||
278 | { | ||
279 | struct inode *inode; | ||
280 | vnode_t *dvp; /* directory containing name to remove */ | ||
281 | int error; | ||
282 | |||
283 | inode = dentry->d_inode; | ||
284 | dvp = LINVFS_GET_VP(dir); | ||
285 | |||
286 | VOP_REMOVE(dvp, dentry, NULL, error); | ||
287 | if (!error) { | ||
288 | validate_fields(dir); /* For size only */ | ||
289 | validate_fields(inode); | ||
290 | } | ||
291 | |||
292 | return -error; | ||
293 | } | ||
294 | |||
295 | STATIC int | ||
296 | linvfs_symlink( | ||
297 | struct inode *dir, | ||
298 | struct dentry *dentry, | ||
299 | const char *symname) | ||
300 | { | ||
301 | struct inode *ip; | ||
302 | vattr_t va; | ||
303 | vnode_t *dvp; /* directory containing name of symlink */ | ||
304 | vnode_t *cvp; /* used to lookup symlink to put in dentry */ | ||
305 | int error; | ||
306 | |||
307 | dvp = LINVFS_GET_VP(dir); | ||
308 | cvp = NULL; | ||
309 | |||
310 | memset(&va, 0, sizeof(va)); | ||
311 | va.va_type = VLNK; | ||
312 | va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; | ||
313 | va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; | ||
314 | |||
315 | error = 0; | ||
316 | VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); | ||
317 | if (!error && cvp) { | ||
318 | ASSERT(cvp->v_type == VLNK); | ||
319 | ip = LINVFS_GET_IP(cvp); | ||
320 | d_instantiate(dentry, ip); | ||
321 | validate_fields(dir); | ||
322 | validate_fields(ip); /* size needs update */ | ||
323 | } | ||
324 | return -error; | ||
325 | } | ||
326 | |||
327 | STATIC int | ||
328 | linvfs_rmdir( | ||
329 | struct inode *dir, | ||
330 | struct dentry *dentry) | ||
331 | { | ||
332 | struct inode *inode = dentry->d_inode; | ||
333 | vnode_t *dvp = LINVFS_GET_VP(dir); | ||
334 | int error; | ||
335 | |||
336 | VOP_RMDIR(dvp, dentry, NULL, error); | ||
337 | if (!error) { | ||
338 | validate_fields(inode); | ||
339 | validate_fields(dir); | ||
340 | } | ||
341 | return -error; | ||
342 | } | ||
343 | |||
344 | STATIC int | ||
345 | linvfs_rename( | ||
346 | struct inode *odir, | ||
347 | struct dentry *odentry, | ||
348 | struct inode *ndir, | ||
349 | struct dentry *ndentry) | ||
350 | { | ||
351 | struct inode *new_inode = ndentry->d_inode; | ||
352 | vnode_t *fvp; /* from directory */ | ||
353 | vnode_t *tvp; /* target directory */ | ||
354 | int error; | ||
355 | |||
356 | fvp = LINVFS_GET_VP(odir); | ||
357 | tvp = LINVFS_GET_VP(ndir); | ||
358 | |||
359 | VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error); | ||
360 | if (error) | ||
361 | return -error; | ||
362 | |||
363 | if (new_inode) | ||
364 | validate_fields(new_inode); | ||
365 | |||
366 | validate_fields(odir); | ||
367 | if (ndir != odir) | ||
368 | validate_fields(ndir); | ||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * careful here - this function can get called recursively, so | ||
374 | * we need to be very careful about how much stack we use. | ||
375 | * uio is kmalloced for this reason... | ||
376 | */ | ||
377 | STATIC int | ||
378 | linvfs_follow_link( | ||
379 | struct dentry *dentry, | ||
380 | struct nameidata *nd) | ||
381 | { | ||
382 | vnode_t *vp; | ||
383 | uio_t *uio; | ||
384 | iovec_t iov; | ||
385 | int error; | ||
386 | char *link; | ||
387 | |||
388 | ASSERT(dentry); | ||
389 | ASSERT(nd); | ||
390 | |||
391 | link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL); | ||
392 | if (!link) { | ||
393 | nd_set_link(nd, ERR_PTR(-ENOMEM)); | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL); | ||
398 | if (!uio) { | ||
399 | kfree(link); | ||
400 | nd_set_link(nd, ERR_PTR(-ENOMEM)); | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | vp = LINVFS_GET_VP(dentry->d_inode); | ||
405 | |||
406 | iov.iov_base = link; | ||
407 | iov.iov_len = MAXNAMELEN; | ||
408 | |||
409 | uio->uio_iov = &iov; | ||
410 | uio->uio_offset = 0; | ||
411 | uio->uio_segflg = UIO_SYSSPACE; | ||
412 | uio->uio_resid = MAXNAMELEN; | ||
413 | uio->uio_iovcnt = 1; | ||
414 | |||
415 | VOP_READLINK(vp, uio, 0, NULL, error); | ||
416 | if (error) { | ||
417 | kfree(link); | ||
418 | link = ERR_PTR(-error); | ||
419 | } else { | ||
420 | link[MAXNAMELEN - uio->uio_resid] = '\0'; | ||
421 | } | ||
422 | kfree(uio); | ||
423 | |||
424 | nd_set_link(nd, link); | ||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd) | ||
429 | { | ||
430 | char *s = nd_get_link(nd); | ||
431 | if (!IS_ERR(s)) | ||
432 | kfree(s); | ||
433 | } | ||
434 | |||
435 | #ifdef CONFIG_XFS_POSIX_ACL | ||
436 | STATIC int | ||
437 | linvfs_permission( | ||
438 | struct inode *inode, | ||
439 | int mode, | ||
440 | struct nameidata *nd) | ||
441 | { | ||
442 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
443 | int error; | ||
444 | |||
445 | mode <<= 6; /* convert from linux to vnode access bits */ | ||
446 | VOP_ACCESS(vp, mode, NULL, error); | ||
447 | return -error; | ||
448 | } | ||
449 | #else | ||
450 | #define linvfs_permission NULL | ||
451 | #endif | ||
452 | |||
453 | STATIC int | ||
454 | linvfs_getattr( | ||
455 | struct vfsmount *mnt, | ||
456 | struct dentry *dentry, | ||
457 | struct kstat *stat) | ||
458 | { | ||
459 | struct inode *inode = dentry->d_inode; | ||
460 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
461 | int error = 0; | ||
462 | |||
463 | if (unlikely(vp->v_flag & VMODIFIED)) | ||
464 | error = vn_revalidate(vp); | ||
465 | if (!error) | ||
466 | generic_fillattr(inode, stat); | ||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | STATIC int | ||
471 | linvfs_setattr( | ||
472 | struct dentry *dentry, | ||
473 | struct iattr *attr) | ||
474 | { | ||
475 | struct inode *inode = dentry->d_inode; | ||
476 | unsigned int ia_valid = attr->ia_valid; | ||
477 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
478 | vattr_t vattr; | ||
479 | int flags = 0; | ||
480 | int error; | ||
481 | |||
482 | memset(&vattr, 0, sizeof(vattr_t)); | ||
483 | if (ia_valid & ATTR_UID) { | ||
484 | vattr.va_mask |= XFS_AT_UID; | ||
485 | vattr.va_uid = attr->ia_uid; | ||
486 | } | ||
487 | if (ia_valid & ATTR_GID) { | ||
488 | vattr.va_mask |= XFS_AT_GID; | ||
489 | vattr.va_gid = attr->ia_gid; | ||
490 | } | ||
491 | if (ia_valid & ATTR_SIZE) { | ||
492 | vattr.va_mask |= XFS_AT_SIZE; | ||
493 | vattr.va_size = attr->ia_size; | ||
494 | } | ||
495 | if (ia_valid & ATTR_ATIME) { | ||
496 | vattr.va_mask |= XFS_AT_ATIME; | ||
497 | vattr.va_atime = attr->ia_atime; | ||
498 | } | ||
499 | if (ia_valid & ATTR_MTIME) { | ||
500 | vattr.va_mask |= XFS_AT_MTIME; | ||
501 | vattr.va_mtime = attr->ia_mtime; | ||
502 | } | ||
503 | if (ia_valid & ATTR_CTIME) { | ||
504 | vattr.va_mask |= XFS_AT_CTIME; | ||
505 | vattr.va_ctime = attr->ia_ctime; | ||
506 | } | ||
507 | if (ia_valid & ATTR_MODE) { | ||
508 | vattr.va_mask |= XFS_AT_MODE; | ||
509 | vattr.va_mode = attr->ia_mode; | ||
510 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | ||
511 | inode->i_mode &= ~S_ISGID; | ||
512 | } | ||
513 | |||
514 | if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) | ||
515 | flags |= ATTR_UTIME; | ||
516 | #ifdef ATTR_NO_BLOCK | ||
517 | if ((ia_valid & ATTR_NO_BLOCK)) | ||
518 | flags |= ATTR_NONBLOCK; | ||
519 | #endif | ||
520 | |||
521 | VOP_SETATTR(vp, &vattr, flags, NULL, error); | ||
522 | if (error) | ||
523 | return -error; | ||
524 | vn_revalidate(vp); | ||
525 | return error; | ||
526 | } | ||
527 | |||
528 | STATIC void | ||
529 | linvfs_truncate( | ||
530 | struct inode *inode) | ||
531 | { | ||
532 | block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block); | ||
533 | } | ||
534 | |||
535 | STATIC int | ||
536 | linvfs_setxattr( | ||
537 | struct dentry *dentry, | ||
538 | const char *name, | ||
539 | const void *data, | ||
540 | size_t size, | ||
541 | int flags) | ||
542 | { | ||
543 | vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); | ||
544 | char *attr = (char *)name; | ||
545 | attrnames_t *namesp; | ||
546 | int xflags = 0; | ||
547 | int error; | ||
548 | |||
549 | namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); | ||
550 | if (!namesp) | ||
551 | return -EOPNOTSUPP; | ||
552 | attr += namesp->attr_namelen; | ||
553 | error = namesp->attr_capable(vp, NULL); | ||
554 | if (error) | ||
555 | return error; | ||
556 | |||
557 | /* Convert Linux syscall to XFS internal ATTR flags */ | ||
558 | if (flags & XATTR_CREATE) | ||
559 | xflags |= ATTR_CREATE; | ||
560 | if (flags & XATTR_REPLACE) | ||
561 | xflags |= ATTR_REPLACE; | ||
562 | xflags |= namesp->attr_flag; | ||
563 | return namesp->attr_set(vp, attr, (void *)data, size, xflags); | ||
564 | } | ||
565 | |||
566 | STATIC ssize_t | ||
567 | linvfs_getxattr( | ||
568 | struct dentry *dentry, | ||
569 | const char *name, | ||
570 | void *data, | ||
571 | size_t size) | ||
572 | { | ||
573 | vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); | ||
574 | char *attr = (char *)name; | ||
575 | attrnames_t *namesp; | ||
576 | int xflags = 0; | ||
577 | ssize_t error; | ||
578 | |||
579 | namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); | ||
580 | if (!namesp) | ||
581 | return -EOPNOTSUPP; | ||
582 | attr += namesp->attr_namelen; | ||
583 | error = namesp->attr_capable(vp, NULL); | ||
584 | if (error) | ||
585 | return error; | ||
586 | |||
587 | /* Convert Linux syscall to XFS internal ATTR flags */ | ||
588 | if (!size) { | ||
589 | xflags |= ATTR_KERNOVAL; | ||
590 | data = NULL; | ||
591 | } | ||
592 | xflags |= namesp->attr_flag; | ||
593 | return namesp->attr_get(vp, attr, (void *)data, size, xflags); | ||
594 | } | ||
595 | |||
596 | STATIC ssize_t | ||
597 | linvfs_listxattr( | ||
598 | struct dentry *dentry, | ||
599 | char *data, | ||
600 | size_t size) | ||
601 | { | ||
602 | vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); | ||
603 | int error, xflags = ATTR_KERNAMELS; | ||
604 | ssize_t result; | ||
605 | |||
606 | if (!size) | ||
607 | xflags |= ATTR_KERNOVAL; | ||
608 | xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS; | ||
609 | |||
610 | error = attr_generic_list(vp, data, size, xflags, &result); | ||
611 | if (error < 0) | ||
612 | return error; | ||
613 | return result; | ||
614 | } | ||
615 | |||
616 | STATIC int | ||
617 | linvfs_removexattr( | ||
618 | struct dentry *dentry, | ||
619 | const char *name) | ||
620 | { | ||
621 | vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); | ||
622 | char *attr = (char *)name; | ||
623 | attrnames_t *namesp; | ||
624 | int xflags = 0; | ||
625 | int error; | ||
626 | |||
627 | namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); | ||
628 | if (!namesp) | ||
629 | return -EOPNOTSUPP; | ||
630 | attr += namesp->attr_namelen; | ||
631 | error = namesp->attr_capable(vp, NULL); | ||
632 | if (error) | ||
633 | return error; | ||
634 | xflags |= namesp->attr_flag; | ||
635 | return namesp->attr_remove(vp, attr, xflags); | ||
636 | } | ||
637 | |||
638 | |||
639 | struct inode_operations linvfs_file_inode_operations = { | ||
640 | .permission = linvfs_permission, | ||
641 | .truncate = linvfs_truncate, | ||
642 | .getattr = linvfs_getattr, | ||
643 | .setattr = linvfs_setattr, | ||
644 | .setxattr = linvfs_setxattr, | ||
645 | .getxattr = linvfs_getxattr, | ||
646 | .listxattr = linvfs_listxattr, | ||
647 | .removexattr = linvfs_removexattr, | ||
648 | }; | ||
649 | |||
650 | struct inode_operations linvfs_dir_inode_operations = { | ||
651 | .create = linvfs_create, | ||
652 | .lookup = linvfs_lookup, | ||
653 | .link = linvfs_link, | ||
654 | .unlink = linvfs_unlink, | ||
655 | .symlink = linvfs_symlink, | ||
656 | .mkdir = linvfs_mkdir, | ||
657 | .rmdir = linvfs_rmdir, | ||
658 | .mknod = linvfs_mknod, | ||
659 | .rename = linvfs_rename, | ||
660 | .permission = linvfs_permission, | ||
661 | .getattr = linvfs_getattr, | ||
662 | .setattr = linvfs_setattr, | ||
663 | .setxattr = linvfs_setxattr, | ||
664 | .getxattr = linvfs_getxattr, | ||
665 | .listxattr = linvfs_listxattr, | ||
666 | .removexattr = linvfs_removexattr, | ||
667 | }; | ||
668 | |||
669 | struct inode_operations linvfs_symlink_inode_operations = { | ||
670 | .readlink = generic_readlink, | ||
671 | .follow_link = linvfs_follow_link, | ||
672 | .put_link = linvfs_put_link, | ||
673 | .permission = linvfs_permission, | ||
674 | .getattr = linvfs_getattr, | ||
675 | .setattr = linvfs_setattr, | ||
676 | .setxattr = linvfs_setxattr, | ||
677 | .getxattr = linvfs_getxattr, | ||
678 | .listxattr = linvfs_listxattr, | ||
679 | .removexattr = linvfs_removexattr, | ||
680 | }; | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h new file mode 100644 index 000000000000..6a69a62c36b0 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_iops.h | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_IOPS_H__ | ||
33 | #define __XFS_IOPS_H__ | ||
34 | |||
35 | extern struct inode_operations linvfs_file_inode_operations; | ||
36 | extern struct inode_operations linvfs_dir_inode_operations; | ||
37 | extern struct inode_operations linvfs_symlink_inode_operations; | ||
38 | |||
39 | extern struct file_operations linvfs_file_operations; | ||
40 | extern struct file_operations linvfs_invis_file_operations; | ||
41 | extern struct file_operations linvfs_dir_operations; | ||
42 | |||
43 | extern struct address_space_operations linvfs_aops; | ||
44 | |||
45 | extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | ||
46 | extern void linvfs_unwritten_done(struct buffer_head *, int); | ||
47 | |||
48 | extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, | ||
49 | int, unsigned int, void __user *); | ||
50 | |||
51 | #endif /* __XFS_IOPS_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h new file mode 100644 index 000000000000..71bb41019a12 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -0,0 +1,374 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_LINUX__ | ||
33 | #define __XFS_LINUX__ | ||
34 | |||
35 | #include <linux/types.h> | ||
36 | #include <linux/config.h> | ||
37 | |||
38 | /* | ||
39 | * Some types are conditional depending on the target system. | ||
40 | * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. | ||
41 | * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well | ||
42 | * as requiring XFS_BIG_BLKNOS to be set. | ||
43 | */ | ||
44 | #if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) | ||
45 | # define XFS_BIG_BLKNOS 1 | ||
46 | # if BITS_PER_LONG == 64 | ||
47 | # define XFS_BIG_INUMS 1 | ||
48 | # else | ||
49 | # define XFS_BIG_INUMS 0 | ||
50 | # endif | ||
51 | #else | ||
52 | # define XFS_BIG_BLKNOS 0 | ||
53 | # define XFS_BIG_INUMS 0 | ||
54 | #endif | ||
55 | |||
56 | #include <xfs_types.h> | ||
57 | #include <xfs_arch.h> | ||
58 | |||
59 | #include <kmem.h> | ||
60 | #include <mrlock.h> | ||
61 | #include <spin.h> | ||
62 | #include <sv.h> | ||
63 | #include <mutex.h> | ||
64 | #include <sema.h> | ||
65 | #include <time.h> | ||
66 | |||
67 | #include <support/qsort.h> | ||
68 | #include <support/ktrace.h> | ||
69 | #include <support/debug.h> | ||
70 | #include <support/move.h> | ||
71 | #include <support/uuid.h> | ||
72 | |||
73 | #include <linux/mm.h> | ||
74 | #include <linux/kernel.h> | ||
75 | #include <linux/blkdev.h> | ||
76 | #include <linux/slab.h> | ||
77 | #include <linux/module.h> | ||
78 | #include <linux/file.h> | ||
79 | #include <linux/swap.h> | ||
80 | #include <linux/errno.h> | ||
81 | #include <linux/sched.h> | ||
82 | #include <linux/bitops.h> | ||
83 | #include <linux/major.h> | ||
84 | #include <linux/pagemap.h> | ||
85 | #include <linux/vfs.h> | ||
86 | #include <linux/seq_file.h> | ||
87 | #include <linux/init.h> | ||
88 | #include <linux/list.h> | ||
89 | #include <linux/proc_fs.h> | ||
90 | #include <linux/version.h> | ||
91 | #include <linux/sort.h> | ||
92 | |||
93 | #include <asm/page.h> | ||
94 | #include <asm/div64.h> | ||
95 | #include <asm/param.h> | ||
96 | #include <asm/uaccess.h> | ||
97 | #include <asm/byteorder.h> | ||
98 | #include <asm/unaligned.h> | ||
99 | |||
100 | #include <xfs_behavior.h> | ||
101 | #include <xfs_vfs.h> | ||
102 | #include <xfs_cred.h> | ||
103 | #include <xfs_vnode.h> | ||
104 | #include <xfs_stats.h> | ||
105 | #include <xfs_sysctl.h> | ||
106 | #include <xfs_iops.h> | ||
107 | #include <xfs_super.h> | ||
108 | #include <xfs_globals.h> | ||
109 | #include <xfs_fs_subr.h> | ||
110 | #include <xfs_lrw.h> | ||
111 | #include <xfs_buf.h> | ||
112 | |||
113 | /* | ||
114 | * Feature macros (disable/enable) | ||
115 | */ | ||
116 | #undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ | ||
117 | #define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */ | ||
118 | |||
119 | /* | ||
120 | * State flag for unwritten extent buffers. | ||
121 | * | ||
122 | * We need to be able to distinguish between these and delayed | ||
123 | * allocate buffers within XFS. The generic IO path code does | ||
124 | * not need to distinguish - we use the BH_Delay flag for both | ||
125 | * delalloc and these ondisk-uninitialised buffers. | ||
126 | */ | ||
127 | BUFFER_FNS(PrivateStart, unwritten); | ||
128 | static inline void set_buffer_unwritten_io(struct buffer_head *bh) | ||
129 | { | ||
130 | bh->b_end_io = linvfs_unwritten_done; | ||
131 | } | ||
132 | |||
133 | #define restricted_chown xfs_params.restrict_chown.val | ||
134 | #define irix_sgid_inherit xfs_params.sgid_inherit.val | ||
135 | #define irix_symlink_mode xfs_params.symlink_mode.val | ||
136 | #define xfs_panic_mask xfs_params.panic_mask.val | ||
137 | #define xfs_error_level xfs_params.error_level.val | ||
138 | #define xfs_syncd_centisecs xfs_params.syncd_timer.val | ||
139 | #define xfs_stats_clear xfs_params.stats_clear.val | ||
140 | #define xfs_inherit_sync xfs_params.inherit_sync.val | ||
141 | #define xfs_inherit_nodump xfs_params.inherit_nodump.val | ||
142 | #define xfs_inherit_noatime xfs_params.inherit_noatim.val | ||
143 | #define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val | ||
144 | #define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val | ||
145 | #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val | ||
146 | #define xfs_rotorstep xfs_params.rotorstep.val | ||
147 | |||
148 | #ifndef __smp_processor_id | ||
149 | #define __smp_processor_id() smp_processor_id() | ||
150 | #endif | ||
151 | #define current_cpu() __smp_processor_id() | ||
152 | #define current_pid() (current->pid) | ||
153 | #define current_fsuid(cred) (current->fsuid) | ||
154 | #define current_fsgid(cred) (current->fsgid) | ||
155 | |||
156 | #define NBPP PAGE_SIZE | ||
157 | #define DPPSHFT (PAGE_SHIFT - 9) | ||
158 | #define NDPP (1 << (PAGE_SHIFT - 9)) | ||
159 | #define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT) | ||
160 | #define dtopt(DD) ((DD) >> DPPSHFT) | ||
161 | #define dpoff(DD) ((DD) & (NDPP-1)) | ||
162 | |||
163 | #define NBBY 8 /* number of bits per byte */ | ||
164 | #define NBPC PAGE_SIZE /* Number of bytes per click */ | ||
165 | #define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */ | ||
166 | |||
167 | /* | ||
168 | * Size of block device i/o is parameterized here. | ||
169 | * Currently the system supports page-sized i/o. | ||
170 | */ | ||
171 | #define BLKDEV_IOSHIFT BPCSHIFT | ||
172 | #define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT) | ||
173 | /* number of BB's per block device block */ | ||
174 | #define BLKDEV_BB BTOBB(BLKDEV_IOSIZE) | ||
175 | |||
176 | /* bytes to clicks */ | ||
177 | #define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) | ||
178 | #define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) | ||
179 | #define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) | ||
180 | #define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) | ||
181 | #define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT) | ||
182 | #define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT) | ||
183 | |||
184 | /* off_t bytes to clicks */ | ||
185 | #define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) | ||
186 | #define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT) | ||
187 | |||
188 | /* clicks to off_t bytes */ | ||
189 | #define ctooff(x) ((xfs_off_t)(x)<<BPCSHIFT) | ||
190 | |||
191 | /* clicks to bytes */ | ||
192 | #define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT) | ||
193 | #define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) | ||
194 | #define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT) | ||
195 | #define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT) | ||
196 | |||
197 | /* bytes to clicks */ | ||
198 | #define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) | ||
199 | |||
200 | #ifndef CELL_CAPABLE | ||
201 | #define FSC_NOTIFY_NAME_CHANGED(vp) | ||
202 | #endif | ||
203 | |||
204 | #ifndef ENOATTR | ||
205 | #define ENOATTR ENODATA /* Attribute not found */ | ||
206 | #endif | ||
207 | |||
208 | /* Note: EWRONGFS never visible outside the kernel */ | ||
209 | #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ | ||
210 | |||
211 | /* | ||
212 | * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't | ||
213 | * return codes out of its known range in errno. | ||
214 | * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't | ||
215 | * conflict with any code we use already or any code a driver may use) | ||
216 | * XXX Some options (currently we do #2): | ||
217 | * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated] | ||
218 | * 2/ 990 ["Unknown error 990"] | ||
219 | * 3/ EUCLEAN ["Structure needs cleaning"] | ||
220 | * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace] | ||
221 | */ | ||
222 | #define EFSCORRUPTED 990 /* Filesystem is corrupted */ | ||
223 | |||
224 | #define SYNCHRONIZE() barrier() | ||
225 | #define __return_address __builtin_return_address(0) | ||
226 | |||
227 | /* | ||
228 | * IRIX (BSD) quotactl makes use of separate commands for user/group, | ||
229 | * whereas on Linux the syscall encodes this information into the cmd | ||
230 | * field (see the QCMD macro in quota.h). These macros help keep the | ||
231 | * code portable - they are not visible from the syscall interface. | ||
232 | */ | ||
233 | #define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */ | ||
234 | #define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */ | ||
235 | |||
236 | /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */ | ||
237 | /* we may well need to fine-tune this if it ever becomes an issue. */ | ||
238 | #define DQUOT_MAX_HEURISTIC 1024 /* NR_DQUOTS */ | ||
239 | #define ndquot DQUOT_MAX_HEURISTIC | ||
240 | |||
241 | /* IRIX uses the current size of the name cache to guess a good value */ | ||
242 | /* - this isn't the same but is a good enough starting point for now. */ | ||
243 | #define DQUOT_HASH_HEURISTIC files_stat.nr_files | ||
244 | |||
245 | /* IRIX inodes maintain the project ID also, zero this field on Linux */ | ||
246 | #define DEFAULT_PROJID 0 | ||
247 | #define dfltprid DEFAULT_PROJID | ||
248 | |||
249 | #define MAXPATHLEN 1024 | ||
250 | |||
251 | #define MIN(a,b) (min(a,b)) | ||
252 | #define MAX(a,b) (max(a,b)) | ||
253 | #define howmany(x, y) (((x)+((y)-1))/(y)) | ||
254 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) | ||
255 | |||
256 | #define xfs_stack_trace() dump_stack() | ||
257 | |||
258 | #define xfs_itruncate_data(ip, off) \ | ||
259 | (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) | ||
260 | |||
261 | |||
262 | /* Move the kernel do_div definition off to one side */ | ||
263 | |||
264 | #if defined __i386__ | ||
265 | /* For ia32 we need to pull some tricks to get past various versions | ||
266 | * of the compiler which do not like us using do_div in the middle | ||
267 | * of large functions. | ||
268 | */ | ||
269 | static inline __u32 xfs_do_div(void *a, __u32 b, int n) | ||
270 | { | ||
271 | __u32 mod; | ||
272 | |||
273 | switch (n) { | ||
274 | case 4: | ||
275 | mod = *(__u32 *)a % b; | ||
276 | *(__u32 *)a = *(__u32 *)a / b; | ||
277 | return mod; | ||
278 | case 8: | ||
279 | { | ||
280 | unsigned long __upper, __low, __high, __mod; | ||
281 | __u64 c = *(__u64 *)a; | ||
282 | __upper = __high = c >> 32; | ||
283 | __low = c; | ||
284 | if (__high) { | ||
285 | __upper = __high % (b); | ||
286 | __high = __high / (b); | ||
287 | } | ||
288 | asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); | ||
289 | asm("":"=A" (c):"a" (__low),"d" (__high)); | ||
290 | *(__u64 *)a = c; | ||
291 | return __mod; | ||
292 | } | ||
293 | } | ||
294 | |||
295 | /* NOTREACHED */ | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | /* Side effect free 64 bit mod operation */ | ||
300 | static inline __u32 xfs_do_mod(void *a, __u32 b, int n) | ||
301 | { | ||
302 | switch (n) { | ||
303 | case 4: | ||
304 | return *(__u32 *)a % b; | ||
305 | case 8: | ||
306 | { | ||
307 | unsigned long __upper, __low, __high, __mod; | ||
308 | __u64 c = *(__u64 *)a; | ||
309 | __upper = __high = c >> 32; | ||
310 | __low = c; | ||
311 | if (__high) { | ||
312 | __upper = __high % (b); | ||
313 | __high = __high / (b); | ||
314 | } | ||
315 | asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); | ||
316 | asm("":"=A" (c):"a" (__low),"d" (__high)); | ||
317 | return __mod; | ||
318 | } | ||
319 | } | ||
320 | |||
321 | /* NOTREACHED */ | ||
322 | return 0; | ||
323 | } | ||
324 | #else | ||
325 | static inline __u32 xfs_do_div(void *a, __u32 b, int n) | ||
326 | { | ||
327 | __u32 mod; | ||
328 | |||
329 | switch (n) { | ||
330 | case 4: | ||
331 | mod = *(__u32 *)a % b; | ||
332 | *(__u32 *)a = *(__u32 *)a / b; | ||
333 | return mod; | ||
334 | case 8: | ||
335 | mod = do_div(*(__u64 *)a, b); | ||
336 | return mod; | ||
337 | } | ||
338 | |||
339 | /* NOTREACHED */ | ||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | /* Side effect free 64 bit mod operation */ | ||
344 | static inline __u32 xfs_do_mod(void *a, __u32 b, int n) | ||
345 | { | ||
346 | switch (n) { | ||
347 | case 4: | ||
348 | return *(__u32 *)a % b; | ||
349 | case 8: | ||
350 | { | ||
351 | __u64 c = *(__u64 *)a; | ||
352 | return do_div(c, b); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | /* NOTREACHED */ | ||
357 | return 0; | ||
358 | } | ||
359 | #endif | ||
360 | |||
361 | #undef do_div | ||
362 | #define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) | ||
363 | #define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) | ||
364 | |||
365 | static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) | ||
366 | { | ||
367 | x += y - 1; | ||
368 | do_div(x, y); | ||
369 | return(x * y); | ||
370 | } | ||
371 | |||
372 | #define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL) | ||
373 | |||
374 | #endif /* __XFS_LINUX__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c new file mode 100644 index 000000000000..ff145fd0d1a4 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -0,0 +1,1082 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | /* | ||
33 | * fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff) | ||
34 | * | ||
35 | */ | ||
36 | |||
37 | #include "xfs.h" | ||
38 | |||
39 | #include "xfs_fs.h" | ||
40 | #include "xfs_inum.h" | ||
41 | #include "xfs_log.h" | ||
42 | #include "xfs_trans.h" | ||
43 | #include "xfs_sb.h" | ||
44 | #include "xfs_ag.h" | ||
45 | #include "xfs_dir.h" | ||
46 | #include "xfs_dir2.h" | ||
47 | #include "xfs_alloc.h" | ||
48 | #include "xfs_dmapi.h" | ||
49 | #include "xfs_quota.h" | ||
50 | #include "xfs_mount.h" | ||
51 | #include "xfs_alloc_btree.h" | ||
52 | #include "xfs_bmap_btree.h" | ||
53 | #include "xfs_ialloc_btree.h" | ||
54 | #include "xfs_btree.h" | ||
55 | #include "xfs_ialloc.h" | ||
56 | #include "xfs_attr_sf.h" | ||
57 | #include "xfs_dir_sf.h" | ||
58 | #include "xfs_dir2_sf.h" | ||
59 | #include "xfs_dinode.h" | ||
60 | #include "xfs_inode.h" | ||
61 | #include "xfs_bmap.h" | ||
62 | #include "xfs_bit.h" | ||
63 | #include "xfs_rtalloc.h" | ||
64 | #include "xfs_error.h" | ||
65 | #include "xfs_itable.h" | ||
66 | #include "xfs_rw.h" | ||
67 | #include "xfs_acl.h" | ||
68 | #include "xfs_cap.h" | ||
69 | #include "xfs_mac.h" | ||
70 | #include "xfs_attr.h" | ||
71 | #include "xfs_inode_item.h" | ||
72 | #include "xfs_buf_item.h" | ||
73 | #include "xfs_utils.h" | ||
74 | #include "xfs_iomap.h" | ||
75 | |||
76 | #include <linux/capability.h> | ||
77 | #include <linux/writeback.h> | ||
78 | |||
79 | |||
80 | #if defined(XFS_RW_TRACE) | ||
81 | void | ||
82 | xfs_rw_enter_trace( | ||
83 | int tag, | ||
84 | xfs_iocore_t *io, | ||
85 | void *data, | ||
86 | size_t segs, | ||
87 | loff_t offset, | ||
88 | int ioflags) | ||
89 | { | ||
90 | xfs_inode_t *ip = XFS_IO_INODE(io); | ||
91 | |||
92 | if (ip->i_rwtrace == NULL) | ||
93 | return; | ||
94 | ktrace_enter(ip->i_rwtrace, | ||
95 | (void *)(unsigned long)tag, | ||
96 | (void *)ip, | ||
97 | (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), | ||
98 | (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), | ||
99 | (void *)data, | ||
100 | (void *)((unsigned long)segs), | ||
101 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), | ||
102 | (void *)((unsigned long)(offset & 0xffffffff)), | ||
103 | (void *)((unsigned long)ioflags), | ||
104 | (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)), | ||
105 | (void *)((unsigned long)(io->io_new_size & 0xffffffff)), | ||
106 | (void *)NULL, | ||
107 | (void *)NULL, | ||
108 | (void *)NULL, | ||
109 | (void *)NULL, | ||
110 | (void *)NULL); | ||
111 | } | ||
112 | |||
113 | void | ||
114 | xfs_inval_cached_trace( | ||
115 | xfs_iocore_t *io, | ||
116 | xfs_off_t offset, | ||
117 | xfs_off_t len, | ||
118 | xfs_off_t first, | ||
119 | xfs_off_t last) | ||
120 | { | ||
121 | xfs_inode_t *ip = XFS_IO_INODE(io); | ||
122 | |||
123 | if (ip->i_rwtrace == NULL) | ||
124 | return; | ||
125 | ktrace_enter(ip->i_rwtrace, | ||
126 | (void *)(__psint_t)XFS_INVAL_CACHED, | ||
127 | (void *)ip, | ||
128 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), | ||
129 | (void *)((unsigned long)(offset & 0xffffffff)), | ||
130 | (void *)((unsigned long)((len >> 32) & 0xffffffff)), | ||
131 | (void *)((unsigned long)(len & 0xffffffff)), | ||
132 | (void *)((unsigned long)((first >> 32) & 0xffffffff)), | ||
133 | (void *)((unsigned long)(first & 0xffffffff)), | ||
134 | (void *)((unsigned long)((last >> 32) & 0xffffffff)), | ||
135 | (void *)((unsigned long)(last & 0xffffffff)), | ||
136 | (void *)NULL, | ||
137 | (void *)NULL, | ||
138 | (void *)NULL, | ||
139 | (void *)NULL, | ||
140 | (void *)NULL, | ||
141 | (void *)NULL); | ||
142 | } | ||
143 | #endif | ||
144 | |||
145 | /* | ||
146 | * xfs_iozero | ||
147 | * | ||
148 | * xfs_iozero clears the specified range of buffer supplied, | ||
149 | * and marks all the affected blocks as valid and modified. If | ||
150 | * an affected block is not allocated, it will be allocated. If | ||
151 | * an affected block is not completely overwritten, and is not | ||
152 | * valid before the operation, it will be read from disk before | ||
153 | * being partially zeroed. | ||
154 | */ | ||
155 | STATIC int | ||
156 | xfs_iozero( | ||
157 | struct inode *ip, /* inode */ | ||
158 | loff_t pos, /* offset in file */ | ||
159 | size_t count, /* size of data to zero */ | ||
160 | loff_t end_size) /* max file size to set */ | ||
161 | { | ||
162 | unsigned bytes; | ||
163 | struct page *page; | ||
164 | struct address_space *mapping; | ||
165 | char *kaddr; | ||
166 | int status; | ||
167 | |||
168 | mapping = ip->i_mapping; | ||
169 | do { | ||
170 | unsigned long index, offset; | ||
171 | |||
172 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ | ||
173 | index = pos >> PAGE_CACHE_SHIFT; | ||
174 | bytes = PAGE_CACHE_SIZE - offset; | ||
175 | if (bytes > count) | ||
176 | bytes = count; | ||
177 | |||
178 | status = -ENOMEM; | ||
179 | page = grab_cache_page(mapping, index); | ||
180 | if (!page) | ||
181 | break; | ||
182 | |||
183 | kaddr = kmap(page); | ||
184 | status = mapping->a_ops->prepare_write(NULL, page, offset, | ||
185 | offset + bytes); | ||
186 | if (status) { | ||
187 | goto unlock; | ||
188 | } | ||
189 | |||
190 | memset((void *) (kaddr + offset), 0, bytes); | ||
191 | flush_dcache_page(page); | ||
192 | status = mapping->a_ops->commit_write(NULL, page, offset, | ||
193 | offset + bytes); | ||
194 | if (!status) { | ||
195 | pos += bytes; | ||
196 | count -= bytes; | ||
197 | if (pos > i_size_read(ip)) | ||
198 | i_size_write(ip, pos < end_size ? pos : end_size); | ||
199 | } | ||
200 | |||
201 | unlock: | ||
202 | kunmap(page); | ||
203 | unlock_page(page); | ||
204 | page_cache_release(page); | ||
205 | if (status) | ||
206 | break; | ||
207 | } while (count); | ||
208 | |||
209 | return (-status); | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * xfs_inval_cached_pages | ||
214 | * | ||
215 | * This routine is responsible for keeping direct I/O and buffered I/O | ||
216 | * somewhat coherent. From here we make sure that we're at least | ||
217 | * temporarily holding the inode I/O lock exclusively and then call | ||
218 | * the page cache to flush and invalidate any cached pages. If there | ||
219 | * are no cached pages this routine will be very quick. | ||
220 | */ | ||
221 | void | ||
222 | xfs_inval_cached_pages( | ||
223 | vnode_t *vp, | ||
224 | xfs_iocore_t *io, | ||
225 | xfs_off_t offset, | ||
226 | int write, | ||
227 | int relock) | ||
228 | { | ||
229 | if (VN_CACHED(vp)) { | ||
230 | xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1); | ||
231 | VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED); | ||
232 | } | ||
233 | |||
234 | } | ||
235 | |||
236 | ssize_t /* bytes read, or (-) error */ | ||
237 | xfs_read( | ||
238 | bhv_desc_t *bdp, | ||
239 | struct kiocb *iocb, | ||
240 | const struct iovec *iovp, | ||
241 | unsigned int segs, | ||
242 | loff_t *offset, | ||
243 | int ioflags, | ||
244 | cred_t *credp) | ||
245 | { | ||
246 | struct file *file = iocb->ki_filp; | ||
247 | struct inode *inode = file->f_mapping->host; | ||
248 | size_t size = 0; | ||
249 | ssize_t ret; | ||
250 | xfs_fsize_t n; | ||
251 | xfs_inode_t *ip; | ||
252 | xfs_mount_t *mp; | ||
253 | vnode_t *vp; | ||
254 | unsigned long seg; | ||
255 | |||
256 | ip = XFS_BHVTOI(bdp); | ||
257 | vp = BHV_TO_VNODE(bdp); | ||
258 | mp = ip->i_mount; | ||
259 | |||
260 | XFS_STATS_INC(xs_read_calls); | ||
261 | |||
262 | /* START copy & waste from filemap.c */ | ||
263 | for (seg = 0; seg < segs; seg++) { | ||
264 | const struct iovec *iv = &iovp[seg]; | ||
265 | |||
266 | /* | ||
267 | * If any segment has a negative length, or the cumulative | ||
268 | * length ever wraps negative then return -EINVAL. | ||
269 | */ | ||
270 | size += iv->iov_len; | ||
271 | if (unlikely((ssize_t)(size|iv->iov_len) < 0)) | ||
272 | return XFS_ERROR(-EINVAL); | ||
273 | } | ||
274 | /* END copy & waste from filemap.c */ | ||
275 | |||
276 | if (unlikely(ioflags & IO_ISDIRECT)) { | ||
277 | xfs_buftarg_t *target = | ||
278 | (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? | ||
279 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
280 | if ((*offset & target->pbr_smask) || | ||
281 | (size & target->pbr_smask)) { | ||
282 | if (*offset == ip->i_d.di_size) { | ||
283 | return (0); | ||
284 | } | ||
285 | return -XFS_ERROR(EINVAL); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | n = XFS_MAXIOFFSET(mp) - *offset; | ||
290 | if ((n <= 0) || (size == 0)) | ||
291 | return 0; | ||
292 | |||
293 | if (n < size) | ||
294 | size = n; | ||
295 | |||
296 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
297 | return -EIO; | ||
298 | } | ||
299 | |||
300 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
301 | down(&inode->i_sem); | ||
302 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
303 | |||
304 | if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && | ||
305 | !(ioflags & IO_INVIS)) { | ||
306 | vrwlock_t locktype = VRWLOCK_READ; | ||
307 | |||
308 | ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, | ||
309 | BHV_TO_VNODE(bdp), *offset, size, | ||
310 | FILP_DELAY_FLAG(file), &locktype); | ||
311 | if (ret) { | ||
312 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
313 | goto unlock_isem; | ||
314 | } | ||
315 | } | ||
316 | |||
317 | xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, | ||
318 | (void *)iovp, segs, *offset, ioflags); | ||
319 | ret = __generic_file_aio_read(iocb, iovp, segs, offset); | ||
320 | if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) | ||
321 | ret = wait_on_sync_kiocb(iocb); | ||
322 | if (ret > 0) | ||
323 | XFS_STATS_ADD(xs_read_bytes, ret); | ||
324 | |||
325 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
326 | |||
327 | if (likely(!(ioflags & IO_INVIS))) | ||
328 | xfs_ichgtime(ip, XFS_ICHGTIME_ACC); | ||
329 | |||
330 | unlock_isem: | ||
331 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
332 | up(&inode->i_sem); | ||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | ssize_t | ||
337 | xfs_sendfile( | ||
338 | bhv_desc_t *bdp, | ||
339 | struct file *filp, | ||
340 | loff_t *offset, | ||
341 | int ioflags, | ||
342 | size_t count, | ||
343 | read_actor_t actor, | ||
344 | void *target, | ||
345 | cred_t *credp) | ||
346 | { | ||
347 | ssize_t ret; | ||
348 | xfs_fsize_t n; | ||
349 | xfs_inode_t *ip; | ||
350 | xfs_mount_t *mp; | ||
351 | vnode_t *vp; | ||
352 | |||
353 | ip = XFS_BHVTOI(bdp); | ||
354 | vp = BHV_TO_VNODE(bdp); | ||
355 | mp = ip->i_mount; | ||
356 | |||
357 | XFS_STATS_INC(xs_read_calls); | ||
358 | |||
359 | n = XFS_MAXIOFFSET(mp) - *offset; | ||
360 | if ((n <= 0) || (count == 0)) | ||
361 | return 0; | ||
362 | |||
363 | if (n < count) | ||
364 | count = n; | ||
365 | |||
366 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
367 | return -EIO; | ||
368 | |||
369 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
370 | |||
371 | if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && | ||
372 | (!(ioflags & IO_INVIS))) { | ||
373 | vrwlock_t locktype = VRWLOCK_READ; | ||
374 | int error; | ||
375 | |||
376 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count, | ||
377 | FILP_DELAY_FLAG(filp), &locktype); | ||
378 | if (error) { | ||
379 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
380 | return -error; | ||
381 | } | ||
382 | } | ||
383 | xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore, | ||
384 | (void *)(unsigned long)target, count, *offset, ioflags); | ||
385 | ret = generic_file_sendfile(filp, offset, count, actor, target); | ||
386 | |||
387 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
388 | |||
389 | if (ret > 0) | ||
390 | XFS_STATS_ADD(xs_read_bytes, ret); | ||
391 | |||
392 | if (likely(!(ioflags & IO_INVIS))) | ||
393 | xfs_ichgtime(ip, XFS_ICHGTIME_ACC); | ||
394 | |||
395 | return ret; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * This routine is called to handle zeroing any space in the last | ||
400 | * block of the file that is beyond the EOF. We do this since the | ||
401 | * size is being increased without writing anything to that block | ||
402 | * and we don't want anyone to read the garbage on the disk. | ||
403 | */ | ||
404 | STATIC int /* error (positive) */ | ||
405 | xfs_zero_last_block( | ||
406 | struct inode *ip, | ||
407 | xfs_iocore_t *io, | ||
408 | xfs_off_t offset, | ||
409 | xfs_fsize_t isize, | ||
410 | xfs_fsize_t end_size) | ||
411 | { | ||
412 | xfs_fileoff_t last_fsb; | ||
413 | xfs_mount_t *mp; | ||
414 | int nimaps; | ||
415 | int zero_offset; | ||
416 | int zero_len; | ||
417 | int isize_fsb_offset; | ||
418 | int error = 0; | ||
419 | xfs_bmbt_irec_t imap; | ||
420 | loff_t loff; | ||
421 | size_t lsize; | ||
422 | |||
423 | ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); | ||
424 | ASSERT(offset > isize); | ||
425 | |||
426 | mp = io->io_mount; | ||
427 | |||
428 | isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize); | ||
429 | if (isize_fsb_offset == 0) { | ||
430 | /* | ||
431 | * There are no extra bytes in the last block on disk to | ||
432 | * zero, so return. | ||
433 | */ | ||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | last_fsb = XFS_B_TO_FSBT(mp, isize); | ||
438 | nimaps = 1; | ||
439 | error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, | ||
440 | &nimaps, NULL); | ||
441 | if (error) { | ||
442 | return error; | ||
443 | } | ||
444 | ASSERT(nimaps > 0); | ||
445 | /* | ||
446 | * If the block underlying isize is just a hole, then there | ||
447 | * is nothing to zero. | ||
448 | */ | ||
449 | if (imap.br_startblock == HOLESTARTBLOCK) { | ||
450 | return 0; | ||
451 | } | ||
452 | /* | ||
453 | * Zero the part of the last block beyond the EOF, and write it | ||
454 | * out sync. We need to drop the ilock while we do this so we | ||
455 | * don't deadlock when the buffer cache calls back to us. | ||
456 | */ | ||
457 | XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); | ||
458 | loff = XFS_FSB_TO_B(mp, last_fsb); | ||
459 | lsize = XFS_FSB_TO_B(mp, 1); | ||
460 | |||
461 | zero_offset = isize_fsb_offset; | ||
462 | zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset; | ||
463 | |||
464 | error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); | ||
465 | |||
466 | XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); | ||
467 | ASSERT(error >= 0); | ||
468 | return error; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Zero any on disk space between the current EOF and the new, | ||
473 | * larger EOF. This handles the normal case of zeroing the remainder | ||
474 | * of the last block in the file and the unusual case of zeroing blocks | ||
475 | * out beyond the size of the file. This second case only happens | ||
476 | * with fixed size extents and when the system crashes before the inode | ||
477 | * size was updated but after blocks were allocated. If fill is set, | ||
478 | * then any holes in the range are filled and zeroed. If not, the holes | ||
479 | * are left alone as holes. | ||
480 | */ | ||
481 | |||
482 | int /* error (positive) */ | ||
483 | xfs_zero_eof( | ||
484 | vnode_t *vp, | ||
485 | xfs_iocore_t *io, | ||
486 | xfs_off_t offset, /* starting I/O offset */ | ||
487 | xfs_fsize_t isize, /* current inode size */ | ||
488 | xfs_fsize_t end_size) /* terminal inode size */ | ||
489 | { | ||
490 | struct inode *ip = LINVFS_GET_IP(vp); | ||
491 | xfs_fileoff_t start_zero_fsb; | ||
492 | xfs_fileoff_t end_zero_fsb; | ||
493 | xfs_fileoff_t prev_zero_fsb; | ||
494 | xfs_fileoff_t zero_count_fsb; | ||
495 | xfs_fileoff_t last_fsb; | ||
496 | xfs_extlen_t buf_len_fsb; | ||
497 | xfs_extlen_t prev_zero_count; | ||
498 | xfs_mount_t *mp; | ||
499 | int nimaps; | ||
500 | int error = 0; | ||
501 | xfs_bmbt_irec_t imap; | ||
502 | loff_t loff; | ||
503 | size_t lsize; | ||
504 | |||
505 | ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); | ||
506 | ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); | ||
507 | |||
508 | mp = io->io_mount; | ||
509 | |||
510 | /* | ||
511 | * First handle zeroing the block on which isize resides. | ||
512 | * We only zero a part of that block so it is handled specially. | ||
513 | */ | ||
514 | error = xfs_zero_last_block(ip, io, offset, isize, end_size); | ||
515 | if (error) { | ||
516 | ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); | ||
517 | ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); | ||
518 | return error; | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * Calculate the range between the new size and the old | ||
523 | * where blocks needing to be zeroed may exist. To get the | ||
524 | * block where the last byte in the file currently resides, | ||
525 | * we need to subtract one from the size and truncate back | ||
526 | * to a block boundary. We subtract 1 in case the size is | ||
527 | * exactly on a block boundary. | ||
528 | */ | ||
529 | last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; | ||
530 | start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); | ||
531 | end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); | ||
532 | ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); | ||
533 | if (last_fsb == end_zero_fsb) { | ||
534 | /* | ||
535 | * The size was only incremented on its last block. | ||
536 | * We took care of that above, so just return. | ||
537 | */ | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | ASSERT(start_zero_fsb <= end_zero_fsb); | ||
542 | prev_zero_fsb = NULLFILEOFF; | ||
543 | prev_zero_count = 0; | ||
544 | while (start_zero_fsb <= end_zero_fsb) { | ||
545 | nimaps = 1; | ||
546 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; | ||
547 | error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, | ||
548 | 0, NULL, 0, &imap, &nimaps, NULL); | ||
549 | if (error) { | ||
550 | ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); | ||
551 | ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); | ||
552 | return error; | ||
553 | } | ||
554 | ASSERT(nimaps > 0); | ||
555 | |||
556 | if (imap.br_state == XFS_EXT_UNWRITTEN || | ||
557 | imap.br_startblock == HOLESTARTBLOCK) { | ||
558 | /* | ||
559 | * This loop handles initializing pages that were | ||
560 | * partially initialized by the code below this | ||
561 | * loop. It basically zeroes the part of the page | ||
562 | * that sits on a hole and sets the page as P_HOLE | ||
563 | * and calls remapf if it is a mapped file. | ||
564 | */ | ||
565 | prev_zero_fsb = NULLFILEOFF; | ||
566 | prev_zero_count = 0; | ||
567 | start_zero_fsb = imap.br_startoff + | ||
568 | imap.br_blockcount; | ||
569 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); | ||
570 | continue; | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * There are blocks in the range requested. | ||
575 | * Zero them a single write at a time. We actually | ||
576 | * don't zero the entire range returned if it is | ||
577 | * too big and simply loop around to get the rest. | ||
578 | * That is not the most efficient thing to do, but it | ||
579 | * is simple and this path should not be exercised often. | ||
580 | */ | ||
581 | buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount, | ||
582 | mp->m_writeio_blocks << 8); | ||
583 | /* | ||
584 | * Drop the inode lock while we're doing the I/O. | ||
585 | * We'll still have the iolock to protect us. | ||
586 | */ | ||
587 | XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); | ||
588 | |||
589 | loff = XFS_FSB_TO_B(mp, start_zero_fsb); | ||
590 | lsize = XFS_FSB_TO_B(mp, buf_len_fsb); | ||
591 | |||
592 | error = xfs_iozero(ip, loff, lsize, end_size); | ||
593 | |||
594 | if (error) { | ||
595 | goto out_lock; | ||
596 | } | ||
597 | |||
598 | prev_zero_fsb = start_zero_fsb; | ||
599 | prev_zero_count = buf_len_fsb; | ||
600 | start_zero_fsb = imap.br_startoff + buf_len_fsb; | ||
601 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); | ||
602 | |||
603 | XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); | ||
604 | } | ||
605 | |||
606 | return 0; | ||
607 | |||
608 | out_lock: | ||
609 | |||
610 | XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); | ||
611 | ASSERT(error >= 0); | ||
612 | return error; | ||
613 | } | ||
614 | |||
615 | ssize_t /* bytes written, or (-) error */ | ||
616 | xfs_write( | ||
617 | bhv_desc_t *bdp, | ||
618 | struct kiocb *iocb, | ||
619 | const struct iovec *iovp, | ||
620 | unsigned int nsegs, | ||
621 | loff_t *offset, | ||
622 | int ioflags, | ||
623 | cred_t *credp) | ||
624 | { | ||
625 | struct file *file = iocb->ki_filp; | ||
626 | struct address_space *mapping = file->f_mapping; | ||
627 | struct inode *inode = mapping->host; | ||
628 | unsigned long segs = nsegs; | ||
629 | xfs_inode_t *xip; | ||
630 | xfs_mount_t *mp; | ||
631 | ssize_t ret = 0, error = 0; | ||
632 | xfs_fsize_t isize, new_size; | ||
633 | xfs_iocore_t *io; | ||
634 | vnode_t *vp; | ||
635 | unsigned long seg; | ||
636 | int iolock; | ||
637 | int eventsent = 0; | ||
638 | vrwlock_t locktype; | ||
639 | size_t ocount = 0, count; | ||
640 | loff_t pos; | ||
641 | int need_isem = 1, need_flush = 0; | ||
642 | |||
643 | XFS_STATS_INC(xs_write_calls); | ||
644 | |||
645 | vp = BHV_TO_VNODE(bdp); | ||
646 | xip = XFS_BHVTOI(bdp); | ||
647 | |||
648 | for (seg = 0; seg < segs; seg++) { | ||
649 | const struct iovec *iv = &iovp[seg]; | ||
650 | |||
651 | /* | ||
652 | * If any segment has a negative length, or the cumulative | ||
653 | * length ever wraps negative then return -EINVAL. | ||
654 | */ | ||
655 | ocount += iv->iov_len; | ||
656 | if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) | ||
657 | return -EINVAL; | ||
658 | if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) | ||
659 | continue; | ||
660 | if (seg == 0) | ||
661 | return -EFAULT; | ||
662 | segs = seg; | ||
663 | ocount -= iv->iov_len; /* This segment is no good */ | ||
664 | break; | ||
665 | } | ||
666 | |||
667 | count = ocount; | ||
668 | pos = *offset; | ||
669 | |||
670 | if (count == 0) | ||
671 | return 0; | ||
672 | |||
673 | io = &xip->i_iocore; | ||
674 | mp = io->io_mount; | ||
675 | |||
676 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
677 | return -EIO; | ||
678 | |||
679 | fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE); | ||
680 | |||
681 | if (ioflags & IO_ISDIRECT) { | ||
682 | xfs_buftarg_t *target = | ||
683 | (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? | ||
684 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
685 | |||
686 | if ((pos & target->pbr_smask) || (count & target->pbr_smask)) | ||
687 | return XFS_ERROR(-EINVAL); | ||
688 | |||
689 | if (!VN_CACHED(vp) && pos < i_size_read(inode)) | ||
690 | need_isem = 0; | ||
691 | |||
692 | if (VN_CACHED(vp)) | ||
693 | need_flush = 1; | ||
694 | } | ||
695 | |||
696 | relock: | ||
697 | if (need_isem) { | ||
698 | iolock = XFS_IOLOCK_EXCL; | ||
699 | locktype = VRWLOCK_WRITE; | ||
700 | |||
701 | down(&inode->i_sem); | ||
702 | } else { | ||
703 | iolock = XFS_IOLOCK_SHARED; | ||
704 | locktype = VRWLOCK_WRITE_DIRECT; | ||
705 | } | ||
706 | |||
707 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); | ||
708 | |||
709 | isize = i_size_read(inode); | ||
710 | |||
711 | if (file->f_flags & O_APPEND) | ||
712 | *offset = isize; | ||
713 | |||
714 | start: | ||
715 | error = -generic_write_checks(file, &pos, &count, | ||
716 | S_ISBLK(inode->i_mode)); | ||
717 | if (error) { | ||
718 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); | ||
719 | goto out_unlock_isem; | ||
720 | } | ||
721 | |||
722 | new_size = pos + count; | ||
723 | if (new_size > isize) | ||
724 | io->io_new_size = new_size; | ||
725 | |||
726 | if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && | ||
727 | !(ioflags & IO_INVIS) && !eventsent)) { | ||
728 | loff_t savedsize = pos; | ||
729 | int dmflags = FILP_DELAY_FLAG(file); | ||
730 | |||
731 | if (need_isem) | ||
732 | dmflags |= DM_FLAGS_ISEM; | ||
733 | |||
734 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | ||
735 | error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, | ||
736 | pos, count, | ||
737 | dmflags, &locktype); | ||
738 | if (error) { | ||
739 | xfs_iunlock(xip, iolock); | ||
740 | goto out_unlock_isem; | ||
741 | } | ||
742 | xfs_ilock(xip, XFS_ILOCK_EXCL); | ||
743 | eventsent = 1; | ||
744 | |||
745 | /* | ||
746 | * The iolock was dropped and reaquired in XFS_SEND_DATA | ||
747 | * so we have to recheck the size when appending. | ||
748 | * We will only "goto start;" once, since having sent the | ||
749 | * event prevents another call to XFS_SEND_DATA, which is | ||
750 | * what allows the size to change in the first place. | ||
751 | */ | ||
752 | if ((file->f_flags & O_APPEND) && savedsize != isize) { | ||
753 | pos = isize = xip->i_d.di_size; | ||
754 | goto start; | ||
755 | } | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * On Linux, generic_file_write updates the times even if | ||
760 | * no data is copied in so long as the write had a size. | ||
761 | * | ||
762 | * We must update xfs' times since revalidate will overcopy xfs. | ||
763 | */ | ||
764 | if (!(ioflags & IO_INVIS)) { | ||
765 | xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
766 | inode_update_time(inode, 1); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * If the offset is beyond the size of the file, we have a couple | ||
771 | * of things to do. First, if there is already space allocated | ||
772 | * we need to either create holes or zero the disk or ... | ||
773 | * | ||
774 | * If there is a page where the previous size lands, we need | ||
775 | * to zero it out up to the new size. | ||
776 | */ | ||
777 | |||
778 | if (pos > isize) { | ||
779 | error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, | ||
780 | isize, pos + count); | ||
781 | if (error) { | ||
782 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); | ||
783 | goto out_unlock_isem; | ||
784 | } | ||
785 | } | ||
786 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | ||
787 | |||
788 | /* | ||
789 | * If we're writing the file then make sure to clear the | ||
790 | * setuid and setgid bits if the process is not being run | ||
791 | * by root. This keeps people from modifying setuid and | ||
792 | * setgid binaries. | ||
793 | */ | ||
794 | |||
795 | if (((xip->i_d.di_mode & S_ISUID) || | ||
796 | ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == | ||
797 | (S_ISGID | S_IXGRP))) && | ||
798 | !capable(CAP_FSETID)) { | ||
799 | error = xfs_write_clear_setuid(xip); | ||
800 | if (likely(!error)) | ||
801 | error = -remove_suid(file->f_dentry); | ||
802 | if (unlikely(error)) { | ||
803 | xfs_iunlock(xip, iolock); | ||
804 | goto out_unlock_isem; | ||
805 | } | ||
806 | } | ||
807 | |||
808 | retry: | ||
809 | /* We can write back this queue in page reclaim */ | ||
810 | current->backing_dev_info = mapping->backing_dev_info; | ||
811 | |||
812 | if ((ioflags & IO_ISDIRECT)) { | ||
813 | if (need_flush) { | ||
814 | xfs_inval_cached_trace(io, pos, -1, | ||
815 | ctooff(offtoct(pos)), -1); | ||
816 | VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)), | ||
817 | -1, FI_REMAPF_LOCKED); | ||
818 | } | ||
819 | |||
820 | if (need_isem) { | ||
821 | /* demote the lock now the cached pages are gone */ | ||
822 | XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); | ||
823 | up(&inode->i_sem); | ||
824 | |||
825 | iolock = XFS_IOLOCK_SHARED; | ||
826 | locktype = VRWLOCK_WRITE_DIRECT; | ||
827 | need_isem = 0; | ||
828 | } | ||
829 | |||
830 | xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, | ||
831 | *offset, ioflags); | ||
832 | ret = generic_file_direct_write(iocb, iovp, | ||
833 | &segs, pos, offset, count, ocount); | ||
834 | |||
835 | /* | ||
836 | * direct-io write to a hole: fall through to buffered I/O | ||
837 | * for completing the rest of the request. | ||
838 | */ | ||
839 | if (ret >= 0 && ret != count) { | ||
840 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
841 | |||
842 | pos += ret; | ||
843 | count -= ret; | ||
844 | |||
845 | need_isem = 1; | ||
846 | ioflags &= ~IO_ISDIRECT; | ||
847 | xfs_iunlock(xip, iolock); | ||
848 | goto relock; | ||
849 | } | ||
850 | } else { | ||
851 | xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, | ||
852 | *offset, ioflags); | ||
853 | ret = generic_file_buffered_write(iocb, iovp, segs, | ||
854 | pos, offset, count, ret); | ||
855 | } | ||
856 | |||
857 | current->backing_dev_info = NULL; | ||
858 | |||
859 | if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) | ||
860 | ret = wait_on_sync_kiocb(iocb); | ||
861 | |||
862 | if ((ret == -ENOSPC) && | ||
863 | DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && | ||
864 | !(ioflags & IO_INVIS)) { | ||
865 | |||
866 | xfs_rwunlock(bdp, locktype); | ||
867 | error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, | ||
868 | DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, | ||
869 | 0, 0, 0); /* Delay flag intentionally unused */ | ||
870 | if (error) | ||
871 | goto out_unlock_isem; | ||
872 | xfs_rwlock(bdp, locktype); | ||
873 | pos = xip->i_d.di_size; | ||
874 | ret = 0; | ||
875 | goto retry; | ||
876 | } | ||
877 | |||
878 | if (*offset > xip->i_d.di_size) { | ||
879 | xfs_ilock(xip, XFS_ILOCK_EXCL); | ||
880 | if (*offset > xip->i_d.di_size) { | ||
881 | xip->i_d.di_size = *offset; | ||
882 | i_size_write(inode, *offset); | ||
883 | xip->i_update_core = 1; | ||
884 | xip->i_update_size = 1; | ||
885 | } | ||
886 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | ||
887 | } | ||
888 | |||
889 | error = -ret; | ||
890 | if (ret <= 0) | ||
891 | goto out_unlock_internal; | ||
892 | |||
893 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
894 | |||
895 | /* Handle various SYNC-type writes */ | ||
896 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | ||
897 | /* | ||
898 | * If we're treating this as O_DSYNC and we have not updated the | ||
899 | * size, force the log. | ||
900 | */ | ||
901 | if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && | ||
902 | !(xip->i_update_size)) { | ||
903 | xfs_inode_log_item_t *iip = xip->i_itemp; | ||
904 | |||
905 | /* | ||
906 | * If an allocation transaction occurred | ||
907 | * without extending the size, then we have to force | ||
908 | * the log up the proper point to ensure that the | ||
909 | * allocation is permanent. We can't count on | ||
910 | * the fact that buffered writes lock out direct I/O | ||
911 | * writes - the direct I/O write could have extended | ||
912 | * the size nontransactionally, then finished before | ||
913 | * we started. xfs_write_file will think that the file | ||
914 | * didn't grow but the update isn't safe unless the | ||
915 | * size change is logged. | ||
916 | * | ||
917 | * Force the log if we've committed a transaction | ||
918 | * against the inode or if someone else has and | ||
919 | * the commit record hasn't gone to disk (e.g. | ||
920 | * the inode is pinned). This guarantees that | ||
921 | * all changes affecting the inode are permanent | ||
922 | * when we return. | ||
923 | */ | ||
924 | if (iip && iip->ili_last_lsn) { | ||
925 | xfs_log_force(mp, iip->ili_last_lsn, | ||
926 | XFS_LOG_FORCE | XFS_LOG_SYNC); | ||
927 | } else if (xfs_ipincount(xip) > 0) { | ||
928 | xfs_log_force(mp, (xfs_lsn_t)0, | ||
929 | XFS_LOG_FORCE | XFS_LOG_SYNC); | ||
930 | } | ||
931 | |||
932 | } else { | ||
933 | xfs_trans_t *tp; | ||
934 | |||
935 | /* | ||
936 | * O_SYNC or O_DSYNC _with_ a size update are handled | ||
937 | * the same way. | ||
938 | * | ||
939 | * If the write was synchronous then we need to make | ||
940 | * sure that the inode modification time is permanent. | ||
941 | * We'll have updated the timestamp above, so here | ||
942 | * we use a synchronous transaction to log the inode. | ||
943 | * It's not fast, but it's necessary. | ||
944 | * | ||
945 | * If this a dsync write and the size got changed | ||
946 | * non-transactionally, then we need to ensure that | ||
947 | * the size change gets logged in a synchronous | ||
948 | * transaction. | ||
949 | */ | ||
950 | |||
951 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); | ||
952 | if ((error = xfs_trans_reserve(tp, 0, | ||
953 | XFS_SWRITE_LOG_RES(mp), | ||
954 | 0, 0, 0))) { | ||
955 | /* Transaction reserve failed */ | ||
956 | xfs_trans_cancel(tp, 0); | ||
957 | } else { | ||
958 | /* Transaction reserve successful */ | ||
959 | xfs_ilock(xip, XFS_ILOCK_EXCL); | ||
960 | xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); | ||
961 | xfs_trans_ihold(tp, xip); | ||
962 | xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); | ||
963 | xfs_trans_set_sync(tp); | ||
964 | error = xfs_trans_commit(tp, 0, NULL); | ||
965 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | ||
966 | } | ||
967 | if (error) | ||
968 | goto out_unlock_internal; | ||
969 | } | ||
970 | |||
971 | xfs_rwunlock(bdp, locktype); | ||
972 | if (need_isem) | ||
973 | up(&inode->i_sem); | ||
974 | |||
975 | error = sync_page_range(inode, mapping, pos, ret); | ||
976 | if (!error) | ||
977 | error = ret; | ||
978 | return error; | ||
979 | } | ||
980 | |||
981 | out_unlock_internal: | ||
982 | xfs_rwunlock(bdp, locktype); | ||
983 | out_unlock_isem: | ||
984 | if (need_isem) | ||
985 | up(&inode->i_sem); | ||
986 | return -error; | ||
987 | } | ||
988 | |||
989 | /* | ||
990 | * All xfs metadata buffers except log state machine buffers | ||
991 | * get this attached as their b_bdstrat callback function. | ||
992 | * This is so that we can catch a buffer | ||
993 | * after prematurely unpinning it to forcibly shutdown the filesystem. | ||
994 | */ | ||
995 | int | ||
996 | xfs_bdstrat_cb(struct xfs_buf *bp) | ||
997 | { | ||
998 | xfs_mount_t *mp; | ||
999 | |||
1000 | mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); | ||
1001 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
1002 | pagebuf_iorequest(bp); | ||
1003 | return 0; | ||
1004 | } else { | ||
1005 | xfs_buftrace("XFS__BDSTRAT IOERROR", bp); | ||
1006 | /* | ||
1007 | * Metadata write that didn't get logged but | ||
1008 | * written delayed anyway. These aren't associated | ||
1009 | * with a transaction, and can be ignored. | ||
1010 | */ | ||
1011 | if (XFS_BUF_IODONE_FUNC(bp) == NULL && | ||
1012 | (XFS_BUF_ISREAD(bp)) == 0) | ||
1013 | return (xfs_bioerror_relse(bp)); | ||
1014 | else | ||
1015 | return (xfs_bioerror(bp)); | ||
1016 | } | ||
1017 | } | ||
1018 | |||
1019 | |||
1020 | int | ||
1021 | xfs_bmap(bhv_desc_t *bdp, | ||
1022 | xfs_off_t offset, | ||
1023 | ssize_t count, | ||
1024 | int flags, | ||
1025 | xfs_iomap_t *iomapp, | ||
1026 | int *niomaps) | ||
1027 | { | ||
1028 | xfs_inode_t *ip = XFS_BHVTOI(bdp); | ||
1029 | xfs_iocore_t *io = &ip->i_iocore; | ||
1030 | |||
1031 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
1032 | ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == | ||
1033 | ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); | ||
1034 | |||
1035 | return xfs_iomap(io, offset, count, flags, iomapp, niomaps); | ||
1036 | } | ||
1037 | |||
1038 | /* | ||
1039 | * Wrapper around bdstrat so that we can stop data | ||
1040 | * from going to disk in case we are shutting down the filesystem. | ||
1041 | * Typically user data goes thru this path; one of the exceptions | ||
1042 | * is the superblock. | ||
1043 | */ | ||
1044 | int | ||
1045 | xfsbdstrat( | ||
1046 | struct xfs_mount *mp, | ||
1047 | struct xfs_buf *bp) | ||
1048 | { | ||
1049 | ASSERT(mp); | ||
1050 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
1051 | /* Grio redirection would go here | ||
1052 | * if (XFS_BUF_IS_GRIO(bp)) { | ||
1053 | */ | ||
1054 | |||
1055 | pagebuf_iorequest(bp); | ||
1056 | return 0; | ||
1057 | } | ||
1058 | |||
1059 | xfs_buftrace("XFSBDSTRAT IOERROR", bp); | ||
1060 | return (xfs_bioerror_relse(bp)); | ||
1061 | } | ||
1062 | |||
1063 | /* | ||
1064 | * If the underlying (data/log/rt) device is readonly, there are some | ||
1065 | * operations that cannot proceed. | ||
1066 | */ | ||
1067 | int | ||
1068 | xfs_dev_is_read_only( | ||
1069 | xfs_mount_t *mp, | ||
1070 | char *message) | ||
1071 | { | ||
1072 | if (xfs_readonly_buftarg(mp->m_ddev_targp) || | ||
1073 | xfs_readonly_buftarg(mp->m_logdev_targp) || | ||
1074 | (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { | ||
1075 | cmn_err(CE_NOTE, | ||
1076 | "XFS: %s required on read-only device.", message); | ||
1077 | cmn_err(CE_NOTE, | ||
1078 | "XFS: write access unavailable, cannot proceed."); | ||
1079 | return EROFS; | ||
1080 | } | ||
1081 | return 0; | ||
1082 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h new file mode 100644 index 000000000000..d723e35254a0 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_lrw.h | |||
@@ -0,0 +1,116 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_LRW_H__ | ||
33 | #define __XFS_LRW_H__ | ||
34 | |||
35 | struct vnode; | ||
36 | struct bhv_desc; | ||
37 | struct xfs_mount; | ||
38 | struct xfs_iocore; | ||
39 | struct xfs_inode; | ||
40 | struct xfs_bmbt_irec; | ||
41 | struct xfs_buf; | ||
42 | struct xfs_iomap; | ||
43 | |||
44 | #if defined(XFS_RW_TRACE) | ||
45 | /* | ||
46 | * Defines for the trace mechanisms in xfs_lrw.c. | ||
47 | */ | ||
48 | #define XFS_RW_KTRACE_SIZE 128 | ||
49 | |||
50 | #define XFS_READ_ENTER 1 | ||
51 | #define XFS_WRITE_ENTER 2 | ||
52 | #define XFS_IOMAP_READ_ENTER 3 | ||
53 | #define XFS_IOMAP_WRITE_ENTER 4 | ||
54 | #define XFS_IOMAP_READ_MAP 5 | ||
55 | #define XFS_IOMAP_WRITE_MAP 6 | ||
56 | #define XFS_IOMAP_WRITE_NOSPACE 7 | ||
57 | #define XFS_ITRUNC_START 8 | ||
58 | #define XFS_ITRUNC_FINISH1 9 | ||
59 | #define XFS_ITRUNC_FINISH2 10 | ||
60 | #define XFS_CTRUNC1 11 | ||
61 | #define XFS_CTRUNC2 12 | ||
62 | #define XFS_CTRUNC3 13 | ||
63 | #define XFS_CTRUNC4 14 | ||
64 | #define XFS_CTRUNC5 15 | ||
65 | #define XFS_CTRUNC6 16 | ||
66 | #define XFS_BUNMAPI 17 | ||
67 | #define XFS_INVAL_CACHED 18 | ||
68 | #define XFS_DIORD_ENTER 19 | ||
69 | #define XFS_DIOWR_ENTER 20 | ||
70 | #define XFS_SENDFILE_ENTER 21 | ||
71 | #define XFS_WRITEPAGE_ENTER 22 | ||
72 | #define XFS_RELEASEPAGE_ENTER 23 | ||
73 | #define XFS_IOMAP_ALLOC_ENTER 24 | ||
74 | #define XFS_IOMAP_ALLOC_MAP 25 | ||
75 | #define XFS_IOMAP_UNWRITTEN 26 | ||
76 | extern void xfs_rw_enter_trace(int, struct xfs_iocore *, | ||
77 | void *, size_t, loff_t, int); | ||
78 | extern void xfs_inval_cached_trace(struct xfs_iocore *, | ||
79 | xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t); | ||
80 | #else | ||
81 | #define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags) | ||
82 | #define xfs_inval_cached_trace(io, offset, len, first, last) | ||
83 | #endif | ||
84 | |||
85 | /* | ||
86 | * Maximum count of bmaps used by read and write paths. | ||
87 | */ | ||
88 | #define XFS_MAX_RW_NBMAPS 4 | ||
89 | |||
90 | extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int, | ||
91 | struct xfs_iomap *, int *); | ||
92 | extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); | ||
93 | extern int xfs_bdstrat_cb(struct xfs_buf *); | ||
94 | |||
95 | extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, | ||
96 | xfs_fsize_t, xfs_fsize_t); | ||
97 | extern void xfs_inval_cached_pages(struct vnode *, struct xfs_iocore *, | ||
98 | xfs_off_t, int, int); | ||
99 | extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *, | ||
100 | const struct iovec *, unsigned int, | ||
101 | loff_t *, int, struct cred *); | ||
102 | extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *, | ||
103 | const struct iovec *, unsigned int, | ||
104 | loff_t *, int, struct cred *); | ||
105 | extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, | ||
106 | loff_t *, int, size_t, read_actor_t, | ||
107 | void *, struct cred *); | ||
108 | |||
109 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | ||
110 | |||
111 | #define XFS_FSB_TO_DB_IO(io,fsb) \ | ||
112 | (((io)->io_flags & XFS_IOCORE_RT) ? \ | ||
113 | XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \ | ||
114 | XFS_FSB_TO_DADDR((io)->io_mount, (fsb))) | ||
115 | |||
116 | #endif /* __XFS_LRW_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c new file mode 100644 index 000000000000..aaf5ddba47f3 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_stats.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | #include <linux/proc_fs.h> | ||
35 | |||
36 | DEFINE_PER_CPU(struct xfsstats, xfsstats); | ||
37 | |||
38 | STATIC int | ||
39 | xfs_read_xfsstats( | ||
40 | char *buffer, | ||
41 | char **start, | ||
42 | off_t offset, | ||
43 | int count, | ||
44 | int *eof, | ||
45 | void *data) | ||
46 | { | ||
47 | int c, i, j, len, val; | ||
48 | __uint64_t xs_xstrat_bytes = 0; | ||
49 | __uint64_t xs_write_bytes = 0; | ||
50 | __uint64_t xs_read_bytes = 0; | ||
51 | |||
52 | static struct xstats_entry { | ||
53 | char *desc; | ||
54 | int endpoint; | ||
55 | } xstats[] = { | ||
56 | { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC }, | ||
57 | { "abt", XFSSTAT_END_ALLOC_BTREE }, | ||
58 | { "blk_map", XFSSTAT_END_BLOCK_MAPPING }, | ||
59 | { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE }, | ||
60 | { "dir", XFSSTAT_END_DIRECTORY_OPS }, | ||
61 | { "trans", XFSSTAT_END_TRANSACTIONS }, | ||
62 | { "ig", XFSSTAT_END_INODE_OPS }, | ||
63 | { "log", XFSSTAT_END_LOG_OPS }, | ||
64 | { "push_ail", XFSSTAT_END_TAIL_PUSHING }, | ||
65 | { "xstrat", XFSSTAT_END_WRITE_CONVERT }, | ||
66 | { "rw", XFSSTAT_END_READ_WRITE_OPS }, | ||
67 | { "attr", XFSSTAT_END_ATTRIBUTE_OPS }, | ||
68 | { "icluster", XFSSTAT_END_INODE_CLUSTER }, | ||
69 | { "vnodes", XFSSTAT_END_VNODE_OPS }, | ||
70 | { "buf", XFSSTAT_END_BUF }, | ||
71 | }; | ||
72 | |||
73 | /* Loop over all stats groups */ | ||
74 | for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) { | ||
75 | len += sprintf(buffer + len, xstats[i].desc); | ||
76 | /* inner loop does each group */ | ||
77 | while (j < xstats[i].endpoint) { | ||
78 | val = 0; | ||
79 | /* sum over all cpus */ | ||
80 | for (c = 0; c < NR_CPUS; c++) { | ||
81 | if (!cpu_possible(c)) continue; | ||
82 | val += *(((__u32*)&per_cpu(xfsstats, c) + j)); | ||
83 | } | ||
84 | len += sprintf(buffer + len, " %u", val); | ||
85 | j++; | ||
86 | } | ||
87 | buffer[len++] = '\n'; | ||
88 | } | ||
89 | /* extra precision counters */ | ||
90 | for (i = 0; i < NR_CPUS; i++) { | ||
91 | if (!cpu_possible(i)) continue; | ||
92 | xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; | ||
93 | xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; | ||
94 | xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; | ||
95 | } | ||
96 | |||
97 | len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n", | ||
98 | xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); | ||
99 | len += sprintf(buffer + len, "debug %u\n", | ||
100 | #if defined(DEBUG) | ||
101 | 1); | ||
102 | #else | ||
103 | 0); | ||
104 | #endif | ||
105 | |||
106 | if (offset >= len) { | ||
107 | *start = buffer; | ||
108 | *eof = 1; | ||
109 | return 0; | ||
110 | } | ||
111 | *start = buffer + offset; | ||
112 | if ((len -= offset) > count) | ||
113 | return count; | ||
114 | *eof = 1; | ||
115 | |||
116 | return len; | ||
117 | } | ||
118 | |||
119 | void | ||
120 | xfs_init_procfs(void) | ||
121 | { | ||
122 | if (!proc_mkdir("fs/xfs", NULL)) | ||
123 | return; | ||
124 | create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL); | ||
125 | } | ||
126 | |||
127 | void | ||
128 | xfs_cleanup_procfs(void) | ||
129 | { | ||
130 | remove_proc_entry("fs/xfs/stat", NULL); | ||
131 | remove_proc_entry("fs/xfs", NULL); | ||
132 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h new file mode 100644 index 000000000000..3f756a6c3eb0 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_stats.h | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_STATS_H__ | ||
33 | #define __XFS_STATS_H__ | ||
34 | |||
35 | |||
36 | #if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) | ||
37 | |||
38 | #include <linux/percpu.h> | ||
39 | |||
40 | /* | ||
41 | * XFS global statistics | ||
42 | */ | ||
43 | struct xfsstats { | ||
44 | # define XFSSTAT_END_EXTENT_ALLOC 4 | ||
45 | __uint32_t xs_allocx; | ||
46 | __uint32_t xs_allocb; | ||
47 | __uint32_t xs_freex; | ||
48 | __uint32_t xs_freeb; | ||
49 | # define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) | ||
50 | __uint32_t xs_abt_lookup; | ||
51 | __uint32_t xs_abt_compare; | ||
52 | __uint32_t xs_abt_insrec; | ||
53 | __uint32_t xs_abt_delrec; | ||
54 | # define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) | ||
55 | __uint32_t xs_blk_mapr; | ||
56 | __uint32_t xs_blk_mapw; | ||
57 | __uint32_t xs_blk_unmap; | ||
58 | __uint32_t xs_add_exlist; | ||
59 | __uint32_t xs_del_exlist; | ||
60 | __uint32_t xs_look_exlist; | ||
61 | __uint32_t xs_cmp_exlist; | ||
62 | # define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) | ||
63 | __uint32_t xs_bmbt_lookup; | ||
64 | __uint32_t xs_bmbt_compare; | ||
65 | __uint32_t xs_bmbt_insrec; | ||
66 | __uint32_t xs_bmbt_delrec; | ||
67 | # define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) | ||
68 | __uint32_t xs_dir_lookup; | ||
69 | __uint32_t xs_dir_create; | ||
70 | __uint32_t xs_dir_remove; | ||
71 | __uint32_t xs_dir_getdents; | ||
72 | # define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) | ||
73 | __uint32_t xs_trans_sync; | ||
74 | __uint32_t xs_trans_async; | ||
75 | __uint32_t xs_trans_empty; | ||
76 | # define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) | ||
77 | __uint32_t xs_ig_attempts; | ||
78 | __uint32_t xs_ig_found; | ||
79 | __uint32_t xs_ig_frecycle; | ||
80 | __uint32_t xs_ig_missed; | ||
81 | __uint32_t xs_ig_dup; | ||
82 | __uint32_t xs_ig_reclaims; | ||
83 | __uint32_t xs_ig_attrchg; | ||
84 | # define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) | ||
85 | __uint32_t xs_log_writes; | ||
86 | __uint32_t xs_log_blocks; | ||
87 | __uint32_t xs_log_noiclogs; | ||
88 | __uint32_t xs_log_force; | ||
89 | __uint32_t xs_log_force_sleep; | ||
90 | # define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) | ||
91 | __uint32_t xs_try_logspace; | ||
92 | __uint32_t xs_sleep_logspace; | ||
93 | __uint32_t xs_push_ail; | ||
94 | __uint32_t xs_push_ail_success; | ||
95 | __uint32_t xs_push_ail_pushbuf; | ||
96 | __uint32_t xs_push_ail_pinned; | ||
97 | __uint32_t xs_push_ail_locked; | ||
98 | __uint32_t xs_push_ail_flushing; | ||
99 | __uint32_t xs_push_ail_restarts; | ||
100 | __uint32_t xs_push_ail_flush; | ||
101 | # define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) | ||
102 | __uint32_t xs_xstrat_quick; | ||
103 | __uint32_t xs_xstrat_split; | ||
104 | # define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) | ||
105 | __uint32_t xs_write_calls; | ||
106 | __uint32_t xs_read_calls; | ||
107 | # define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) | ||
108 | __uint32_t xs_attr_get; | ||
109 | __uint32_t xs_attr_set; | ||
110 | __uint32_t xs_attr_remove; | ||
111 | __uint32_t xs_attr_list; | ||
112 | # define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3) | ||
113 | __uint32_t xs_iflush_count; | ||
114 | __uint32_t xs_icluster_flushcnt; | ||
115 | __uint32_t xs_icluster_flushinode; | ||
116 | # define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) | ||
117 | __uint32_t vn_active; /* # vnodes not on free lists */ | ||
118 | __uint32_t vn_alloc; /* # times vn_alloc called */ | ||
119 | __uint32_t vn_get; /* # times vn_get called */ | ||
120 | __uint32_t vn_hold; /* # times vn_hold called */ | ||
121 | __uint32_t vn_rele; /* # times vn_rele called */ | ||
122 | __uint32_t vn_reclaim; /* # times vn_reclaim called */ | ||
123 | __uint32_t vn_remove; /* # times vn_remove called */ | ||
124 | __uint32_t vn_free; /* # times vn_free called */ | ||
125 | #define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) | ||
126 | __uint32_t pb_get; | ||
127 | __uint32_t pb_create; | ||
128 | __uint32_t pb_get_locked; | ||
129 | __uint32_t pb_get_locked_waited; | ||
130 | __uint32_t pb_busy_locked; | ||
131 | __uint32_t pb_miss_locked; | ||
132 | __uint32_t pb_page_retries; | ||
133 | __uint32_t pb_page_found; | ||
134 | __uint32_t pb_get_read; | ||
135 | /* Extra precision counters */ | ||
136 | __uint64_t xs_xstrat_bytes; | ||
137 | __uint64_t xs_write_bytes; | ||
138 | __uint64_t xs_read_bytes; | ||
139 | }; | ||
140 | |||
141 | DECLARE_PER_CPU(struct xfsstats, xfsstats); | ||
142 | |||
143 | /* | ||
144 | * We don't disable preempt, not too worried about poking the | ||
145 | * wrong CPU's stat for now (also aggregated before reporting). | ||
146 | */ | ||
147 | #define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) | ||
148 | #define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) | ||
149 | #define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) | ||
150 | |||
151 | extern void xfs_init_procfs(void); | ||
152 | extern void xfs_cleanup_procfs(void); | ||
153 | |||
154 | |||
155 | #else /* !CONFIG_PROC_FS */ | ||
156 | |||
157 | # define XFS_STATS_INC(count) | ||
158 | # define XFS_STATS_DEC(count) | ||
159 | # define XFS_STATS_ADD(count, inc) | ||
160 | |||
161 | static __inline void xfs_init_procfs(void) { }; | ||
162 | static __inline void xfs_cleanup_procfs(void) { }; | ||
163 | |||
164 | #endif /* !CONFIG_PROC_FS */ | ||
165 | |||
166 | #endif /* __XFS_STATS_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c new file mode 100644 index 000000000000..53dc658cafa6 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -0,0 +1,912 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | |||
35 | #include "xfs_inum.h" | ||
36 | #include "xfs_log.h" | ||
37 | #include "xfs_clnt.h" | ||
38 | #include "xfs_trans.h" | ||
39 | #include "xfs_sb.h" | ||
40 | #include "xfs_dir.h" | ||
41 | #include "xfs_dir2.h" | ||
42 | #include "xfs_alloc.h" | ||
43 | #include "xfs_dmapi.h" | ||
44 | #include "xfs_quota.h" | ||
45 | #include "xfs_mount.h" | ||
46 | #include "xfs_alloc_btree.h" | ||
47 | #include "xfs_bmap_btree.h" | ||
48 | #include "xfs_ialloc_btree.h" | ||
49 | #include "xfs_btree.h" | ||
50 | #include "xfs_ialloc.h" | ||
51 | #include "xfs_attr_sf.h" | ||
52 | #include "xfs_dir_sf.h" | ||
53 | #include "xfs_dir2_sf.h" | ||
54 | #include "xfs_dinode.h" | ||
55 | #include "xfs_inode.h" | ||
56 | #include "xfs_bmap.h" | ||
57 | #include "xfs_bit.h" | ||
58 | #include "xfs_rtalloc.h" | ||
59 | #include "xfs_error.h" | ||
60 | #include "xfs_itable.h" | ||
61 | #include "xfs_rw.h" | ||
62 | #include "xfs_acl.h" | ||
63 | #include "xfs_cap.h" | ||
64 | #include "xfs_mac.h" | ||
65 | #include "xfs_attr.h" | ||
66 | #include "xfs_buf_item.h" | ||
67 | #include "xfs_utils.h" | ||
68 | #include "xfs_version.h" | ||
69 | #include "xfs_ioctl32.h" | ||
70 | |||
71 | #include <linux/namei.h> | ||
72 | #include <linux/init.h> | ||
73 | #include <linux/mount.h> | ||
74 | #include <linux/writeback.h> | ||
75 | |||
76 | STATIC struct quotactl_ops linvfs_qops; | ||
77 | STATIC struct super_operations linvfs_sops; | ||
78 | STATIC kmem_zone_t *linvfs_inode_zone; | ||
79 | |||
80 | STATIC struct xfs_mount_args * | ||
81 | xfs_args_allocate( | ||
82 | struct super_block *sb) | ||
83 | { | ||
84 | struct xfs_mount_args *args; | ||
85 | |||
86 | args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP); | ||
87 | args->logbufs = args->logbufsize = -1; | ||
88 | strncpy(args->fsname, sb->s_id, MAXNAMELEN); | ||
89 | |||
90 | /* Copy the already-parsed mount(2) flags we're interested in */ | ||
91 | if (sb->s_flags & MS_NOATIME) | ||
92 | args->flags |= XFSMNT_NOATIME; | ||
93 | if (sb->s_flags & MS_DIRSYNC) | ||
94 | args->flags |= XFSMNT_DIRSYNC; | ||
95 | if (sb->s_flags & MS_SYNCHRONOUS) | ||
96 | args->flags |= XFSMNT_WSYNC; | ||
97 | |||
98 | /* Default to 32 bit inodes on Linux all the time */ | ||
99 | args->flags |= XFSMNT_32BITINODES; | ||
100 | |||
101 | return args; | ||
102 | } | ||
103 | |||
104 | __uint64_t | ||
105 | xfs_max_file_offset( | ||
106 | unsigned int blockshift) | ||
107 | { | ||
108 | unsigned int pagefactor = 1; | ||
109 | unsigned int bitshift = BITS_PER_LONG - 1; | ||
110 | |||
111 | /* Figure out maximum filesize, on Linux this can depend on | ||
112 | * the filesystem blocksize (on 32 bit platforms). | ||
113 | * __block_prepare_write does this in an [unsigned] long... | ||
114 | * page->index << (PAGE_CACHE_SHIFT - bbits) | ||
115 | * So, for page sized blocks (4K on 32 bit platforms), | ||
116 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is | ||
117 | * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) | ||
118 | * but for smaller blocksizes it is less (bbits = log2 bsize). | ||
119 | * Note1: get_block_t takes a long (implicit cast from above) | ||
120 | * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch | ||
121 | * can optionally convert the [unsigned] long from above into | ||
122 | * an [unsigned] long long. | ||
123 | */ | ||
124 | |||
125 | #if BITS_PER_LONG == 32 | ||
126 | # if defined(CONFIG_LBD) | ||
127 | ASSERT(sizeof(sector_t) == 8); | ||
128 | pagefactor = PAGE_CACHE_SIZE; | ||
129 | bitshift = BITS_PER_LONG; | ||
130 | # else | ||
131 | pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift); | ||
132 | # endif | ||
133 | #endif | ||
134 | |||
135 | return (((__uint64_t)pagefactor) << bitshift) - 1; | ||
136 | } | ||
137 | |||
138 | STATIC __inline__ void | ||
139 | xfs_set_inodeops( | ||
140 | struct inode *inode) | ||
141 | { | ||
142 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
143 | |||
144 | if (vp->v_type == VNON) { | ||
145 | vn_mark_bad(vp); | ||
146 | } else if (S_ISREG(inode->i_mode)) { | ||
147 | inode->i_op = &linvfs_file_inode_operations; | ||
148 | inode->i_fop = &linvfs_file_operations; | ||
149 | inode->i_mapping->a_ops = &linvfs_aops; | ||
150 | } else if (S_ISDIR(inode->i_mode)) { | ||
151 | inode->i_op = &linvfs_dir_inode_operations; | ||
152 | inode->i_fop = &linvfs_dir_operations; | ||
153 | } else if (S_ISLNK(inode->i_mode)) { | ||
154 | inode->i_op = &linvfs_symlink_inode_operations; | ||
155 | if (inode->i_blocks) | ||
156 | inode->i_mapping->a_ops = &linvfs_aops; | ||
157 | } else { | ||
158 | inode->i_op = &linvfs_file_inode_operations; | ||
159 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
160 | } | ||
161 | } | ||
162 | |||
163 | STATIC __inline__ void | ||
164 | xfs_revalidate_inode( | ||
165 | xfs_mount_t *mp, | ||
166 | vnode_t *vp, | ||
167 | xfs_inode_t *ip) | ||
168 | { | ||
169 | struct inode *inode = LINVFS_GET_IP(vp); | ||
170 | |||
171 | inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); | ||
172 | inode->i_nlink = ip->i_d.di_nlink; | ||
173 | inode->i_uid = ip->i_d.di_uid; | ||
174 | inode->i_gid = ip->i_d.di_gid; | ||
175 | if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { | ||
176 | inode->i_rdev = 0; | ||
177 | } else { | ||
178 | xfs_dev_t dev = ip->i_df.if_u2.if_rdev; | ||
179 | inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); | ||
180 | } | ||
181 | inode->i_blksize = PAGE_CACHE_SIZE; | ||
182 | inode->i_generation = ip->i_d.di_gen; | ||
183 | i_size_write(inode, ip->i_d.di_size); | ||
184 | inode->i_blocks = | ||
185 | XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); | ||
186 | inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; | ||
187 | inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; | ||
188 | inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; | ||
189 | inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; | ||
190 | inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; | ||
191 | inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; | ||
192 | if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) | ||
193 | inode->i_flags |= S_IMMUTABLE; | ||
194 | else | ||
195 | inode->i_flags &= ~S_IMMUTABLE; | ||
196 | if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) | ||
197 | inode->i_flags |= S_APPEND; | ||
198 | else | ||
199 | inode->i_flags &= ~S_APPEND; | ||
200 | if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) | ||
201 | inode->i_flags |= S_SYNC; | ||
202 | else | ||
203 | inode->i_flags &= ~S_SYNC; | ||
204 | if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) | ||
205 | inode->i_flags |= S_NOATIME; | ||
206 | else | ||
207 | inode->i_flags &= ~S_NOATIME; | ||
208 | vp->v_flag &= ~VMODIFIED; | ||
209 | } | ||
210 | |||
211 | void | ||
212 | xfs_initialize_vnode( | ||
213 | bhv_desc_t *bdp, | ||
214 | vnode_t *vp, | ||
215 | bhv_desc_t *inode_bhv, | ||
216 | int unlock) | ||
217 | { | ||
218 | xfs_inode_t *ip = XFS_BHVTOI(inode_bhv); | ||
219 | struct inode *inode = LINVFS_GET_IP(vp); | ||
220 | |||
221 | if (!inode_bhv->bd_vobj) { | ||
222 | vp->v_vfsp = bhvtovfs(bdp); | ||
223 | bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops); | ||
224 | bhv_insert(VN_BHV_HEAD(vp), inode_bhv); | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * We need to set the ops vectors, and unlock the inode, but if | ||
229 | * we have been called during the new inode create process, it is | ||
230 | * too early to fill in the Linux inode. We will get called a | ||
231 | * second time once the inode is properly set up, and then we can | ||
232 | * finish our work. | ||
233 | */ | ||
234 | if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { | ||
235 | vp->v_type = IFTOVT(ip->i_d.di_mode); | ||
236 | xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); | ||
237 | xfs_set_inodeops(inode); | ||
238 | |||
239 | ip->i_flags &= ~XFS_INEW; | ||
240 | barrier(); | ||
241 | |||
242 | unlock_new_inode(inode); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | int | ||
247 | xfs_blkdev_get( | ||
248 | xfs_mount_t *mp, | ||
249 | const char *name, | ||
250 | struct block_device **bdevp) | ||
251 | { | ||
252 | int error = 0; | ||
253 | |||
254 | *bdevp = open_bdev_excl(name, 0, mp); | ||
255 | if (IS_ERR(*bdevp)) { | ||
256 | error = PTR_ERR(*bdevp); | ||
257 | printk("XFS: Invalid device [%s], error=%d\n", name, error); | ||
258 | } | ||
259 | |||
260 | return -error; | ||
261 | } | ||
262 | |||
263 | void | ||
264 | xfs_blkdev_put( | ||
265 | struct block_device *bdev) | ||
266 | { | ||
267 | if (bdev) | ||
268 | close_bdev_excl(bdev); | ||
269 | } | ||
270 | |||
271 | |||
272 | STATIC struct inode * | ||
273 | linvfs_alloc_inode( | ||
274 | struct super_block *sb) | ||
275 | { | ||
276 | vnode_t *vp; | ||
277 | |||
278 | vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, | ||
279 | kmem_flags_convert(KM_SLEEP)); | ||
280 | if (!vp) | ||
281 | return NULL; | ||
282 | return LINVFS_GET_IP(vp); | ||
283 | } | ||
284 | |||
285 | STATIC void | ||
286 | linvfs_destroy_inode( | ||
287 | struct inode *inode) | ||
288 | { | ||
289 | kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); | ||
290 | } | ||
291 | |||
292 | STATIC void | ||
293 | init_once( | ||
294 | void *data, | ||
295 | kmem_cache_t *cachep, | ||
296 | unsigned long flags) | ||
297 | { | ||
298 | vnode_t *vp = (vnode_t *)data; | ||
299 | |||
300 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
301 | SLAB_CTOR_CONSTRUCTOR) | ||
302 | inode_init_once(LINVFS_GET_IP(vp)); | ||
303 | } | ||
304 | |||
305 | STATIC int | ||
306 | init_inodecache( void ) | ||
307 | { | ||
308 | linvfs_inode_zone = kmem_cache_create("linvfs_icache", | ||
309 | sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, | ||
310 | init_once, NULL); | ||
311 | if (linvfs_inode_zone == NULL) | ||
312 | return -ENOMEM; | ||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | STATIC void | ||
317 | destroy_inodecache( void ) | ||
318 | { | ||
319 | if (kmem_cache_destroy(linvfs_inode_zone)) | ||
320 | printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Attempt to flush the inode, this will actually fail | ||
325 | * if the inode is pinned, but we dirty the inode again | ||
326 | * at the point when it is unpinned after a log write, | ||
327 | * since this is when the inode itself becomes flushable. | ||
328 | */ | ||
329 | STATIC int | ||
330 | linvfs_write_inode( | ||
331 | struct inode *inode, | ||
332 | int sync) | ||
333 | { | ||
334 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
335 | int error = 0, flags = FLUSH_INODE; | ||
336 | |||
337 | if (vp) { | ||
338 | vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); | ||
339 | if (sync) | ||
340 | flags |= FLUSH_SYNC; | ||
341 | VOP_IFLUSH(vp, flags, error); | ||
342 | if (error == EAGAIN) { | ||
343 | if (sync) | ||
344 | VOP_IFLUSH(vp, flags | FLUSH_LOG, error); | ||
345 | else | ||
346 | error = 0; | ||
347 | } | ||
348 | } | ||
349 | |||
350 | return -error; | ||
351 | } | ||
352 | |||
353 | STATIC void | ||
354 | linvfs_clear_inode( | ||
355 | struct inode *inode) | ||
356 | { | ||
357 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
358 | |||
359 | if (vp) { | ||
360 | vn_rele(vp); | ||
361 | vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); | ||
362 | /* | ||
363 | * Do all our cleanup, and remove this vnode. | ||
364 | */ | ||
365 | vn_remove(vp); | ||
366 | } | ||
367 | } | ||
368 | |||
369 | |||
370 | /* | ||
371 | * Enqueue a work item to be picked up by the vfs xfssyncd thread. | ||
372 | * Doing this has two advantages: | ||
373 | * - It saves on stack space, which is tight in certain situations | ||
374 | * - It can be used (with care) as a mechanism to avoid deadlocks. | ||
375 | * Flushing while allocating in a full filesystem requires both. | ||
376 | */ | ||
377 | STATIC void | ||
378 | xfs_syncd_queue_work( | ||
379 | struct vfs *vfs, | ||
380 | void *data, | ||
381 | void (*syncer)(vfs_t *, void *)) | ||
382 | { | ||
383 | vfs_sync_work_t *work; | ||
384 | |||
385 | work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP); | ||
386 | INIT_LIST_HEAD(&work->w_list); | ||
387 | work->w_syncer = syncer; | ||
388 | work->w_data = data; | ||
389 | work->w_vfs = vfs; | ||
390 | spin_lock(&vfs->vfs_sync_lock); | ||
391 | list_add_tail(&work->w_list, &vfs->vfs_sync_list); | ||
392 | spin_unlock(&vfs->vfs_sync_lock); | ||
393 | wake_up_process(vfs->vfs_sync_task); | ||
394 | } | ||
395 | |||
396 | /* | ||
397 | * Flush delayed allocate data, attempting to free up reserved space | ||
398 | * from existing allocations. At this point a new allocation attempt | ||
399 | * has failed with ENOSPC and we are in the process of scratching our | ||
400 | * heads, looking about for more room... | ||
401 | */ | ||
402 | STATIC void | ||
403 | xfs_flush_inode_work( | ||
404 | vfs_t *vfs, | ||
405 | void *inode) | ||
406 | { | ||
407 | filemap_flush(((struct inode *)inode)->i_mapping); | ||
408 | iput((struct inode *)inode); | ||
409 | } | ||
410 | |||
411 | void | ||
412 | xfs_flush_inode( | ||
413 | xfs_inode_t *ip) | ||
414 | { | ||
415 | struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); | ||
416 | struct vfs *vfs = XFS_MTOVFS(ip->i_mount); | ||
417 | |||
418 | igrab(inode); | ||
419 | xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work); | ||
420 | delay(HZ/2); | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * This is the "bigger hammer" version of xfs_flush_inode_work... | ||
425 | * (IOW, "If at first you don't succeed, use a Bigger Hammer"). | ||
426 | */ | ||
427 | STATIC void | ||
428 | xfs_flush_device_work( | ||
429 | vfs_t *vfs, | ||
430 | void *inode) | ||
431 | { | ||
432 | sync_blockdev(vfs->vfs_super->s_bdev); | ||
433 | iput((struct inode *)inode); | ||
434 | } | ||
435 | |||
436 | void | ||
437 | xfs_flush_device( | ||
438 | xfs_inode_t *ip) | ||
439 | { | ||
440 | struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); | ||
441 | struct vfs *vfs = XFS_MTOVFS(ip->i_mount); | ||
442 | |||
443 | igrab(inode); | ||
444 | xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work); | ||
445 | delay(HZ/2); | ||
446 | xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); | ||
447 | } | ||
448 | |||
449 | #define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR) | ||
450 | STATIC void | ||
451 | vfs_sync_worker( | ||
452 | vfs_t *vfsp, | ||
453 | void *unused) | ||
454 | { | ||
455 | int error; | ||
456 | |||
457 | if (!(vfsp->vfs_flag & VFS_RDONLY)) | ||
458 | VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error); | ||
459 | vfsp->vfs_sync_seq++; | ||
460 | wmb(); | ||
461 | wake_up(&vfsp->vfs_wait_single_sync_task); | ||
462 | } | ||
463 | |||
464 | STATIC int | ||
465 | xfssyncd( | ||
466 | void *arg) | ||
467 | { | ||
468 | long timeleft; | ||
469 | vfs_t *vfsp = (vfs_t *) arg; | ||
470 | struct list_head tmp; | ||
471 | struct vfs_sync_work *work, *n; | ||
472 | |||
473 | daemonize("xfssyncd"); | ||
474 | |||
475 | vfsp->vfs_sync_work.w_vfs = vfsp; | ||
476 | vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; | ||
477 | vfsp->vfs_sync_task = current; | ||
478 | wmb(); | ||
479 | wake_up(&vfsp->vfs_wait_sync_task); | ||
480 | |||
481 | INIT_LIST_HEAD(&tmp); | ||
482 | timeleft = (xfs_syncd_centisecs * HZ) / 100; | ||
483 | for (;;) { | ||
484 | set_current_state(TASK_INTERRUPTIBLE); | ||
485 | timeleft = schedule_timeout(timeleft); | ||
486 | /* swsusp */ | ||
487 | try_to_freeze(PF_FREEZE); | ||
488 | if (vfsp->vfs_flag & VFS_UMOUNT) | ||
489 | break; | ||
490 | |||
491 | spin_lock(&vfsp->vfs_sync_lock); | ||
492 | /* | ||
493 | * We can get woken by laptop mode, to do a sync - | ||
494 | * that's the (only!) case where the list would be | ||
495 | * empty with time remaining. | ||
496 | */ | ||
497 | if (!timeleft || list_empty(&vfsp->vfs_sync_list)) { | ||
498 | if (!timeleft) | ||
499 | timeleft = (xfs_syncd_centisecs * HZ) / 100; | ||
500 | INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list); | ||
501 | list_add_tail(&vfsp->vfs_sync_work.w_list, | ||
502 | &vfsp->vfs_sync_list); | ||
503 | } | ||
504 | list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list) | ||
505 | list_move(&work->w_list, &tmp); | ||
506 | spin_unlock(&vfsp->vfs_sync_lock); | ||
507 | |||
508 | list_for_each_entry_safe(work, n, &tmp, w_list) { | ||
509 | (*work->w_syncer)(vfsp, work->w_data); | ||
510 | list_del(&work->w_list); | ||
511 | if (work == &vfsp->vfs_sync_work) | ||
512 | continue; | ||
513 | kmem_free(work, sizeof(struct vfs_sync_work)); | ||
514 | } | ||
515 | } | ||
516 | |||
517 | vfsp->vfs_sync_task = NULL; | ||
518 | wmb(); | ||
519 | wake_up(&vfsp->vfs_wait_sync_task); | ||
520 | |||
521 | return 0; | ||
522 | } | ||
523 | |||
524 | STATIC int | ||
525 | linvfs_start_syncd( | ||
526 | vfs_t *vfsp) | ||
527 | { | ||
528 | int pid; | ||
529 | |||
530 | pid = kernel_thread(xfssyncd, (void *) vfsp, | ||
531 | CLONE_VM | CLONE_FS | CLONE_FILES); | ||
532 | if (pid < 0) | ||
533 | return -pid; | ||
534 | wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task); | ||
535 | return 0; | ||
536 | } | ||
537 | |||
538 | STATIC void | ||
539 | linvfs_stop_syncd( | ||
540 | vfs_t *vfsp) | ||
541 | { | ||
542 | vfsp->vfs_flag |= VFS_UMOUNT; | ||
543 | wmb(); | ||
544 | |||
545 | wake_up_process(vfsp->vfs_sync_task); | ||
546 | wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task); | ||
547 | } | ||
548 | |||
549 | STATIC void | ||
550 | linvfs_put_super( | ||
551 | struct super_block *sb) | ||
552 | { | ||
553 | vfs_t *vfsp = LINVFS_GET_VFS(sb); | ||
554 | int error; | ||
555 | |||
556 | linvfs_stop_syncd(vfsp); | ||
557 | VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error); | ||
558 | if (!error) | ||
559 | VFS_UNMOUNT(vfsp, 0, NULL, error); | ||
560 | if (error) { | ||
561 | printk("XFS unmount got error %d\n", error); | ||
562 | printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp); | ||
563 | return; | ||
564 | } | ||
565 | |||
566 | vfs_deallocate(vfsp); | ||
567 | } | ||
568 | |||
569 | STATIC void | ||
570 | linvfs_write_super( | ||
571 | struct super_block *sb) | ||
572 | { | ||
573 | vfs_t *vfsp = LINVFS_GET_VFS(sb); | ||
574 | int error; | ||
575 | |||
576 | if (sb->s_flags & MS_RDONLY) { | ||
577 | sb->s_dirt = 0; /* paranoia */ | ||
578 | return; | ||
579 | } | ||
580 | /* Push the log and superblock a little */ | ||
581 | VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error); | ||
582 | sb->s_dirt = 0; | ||
583 | } | ||
584 | |||
585 | STATIC int | ||
586 | linvfs_sync_super( | ||
587 | struct super_block *sb, | ||
588 | int wait) | ||
589 | { | ||
590 | vfs_t *vfsp = LINVFS_GET_VFS(sb); | ||
591 | int error; | ||
592 | int flags = SYNC_FSDATA; | ||
593 | |||
594 | if (wait) | ||
595 | flags |= SYNC_WAIT; | ||
596 | |||
597 | VFS_SYNC(vfsp, flags, NULL, error); | ||
598 | sb->s_dirt = 0; | ||
599 | |||
600 | if (unlikely(laptop_mode)) { | ||
601 | int prev_sync_seq = vfsp->vfs_sync_seq; | ||
602 | |||
603 | /* | ||
604 | * The disk must be active because we're syncing. | ||
605 | * We schedule xfssyncd now (now that the disk is | ||
606 | * active) instead of later (when it might not be). | ||
607 | */ | ||
608 | wake_up_process(vfsp->vfs_sync_task); | ||
609 | /* | ||
610 | * We have to wait for the sync iteration to complete. | ||
611 | * If we don't, the disk activity caused by the sync | ||
612 | * will come after the sync is completed, and that | ||
613 | * triggers another sync from laptop mode. | ||
614 | */ | ||
615 | wait_event(vfsp->vfs_wait_single_sync_task, | ||
616 | vfsp->vfs_sync_seq != prev_sync_seq); | ||
617 | } | ||
618 | |||
619 | return -error; | ||
620 | } | ||
621 | |||
622 | STATIC int | ||
623 | linvfs_statfs( | ||
624 | struct super_block *sb, | ||
625 | struct kstatfs *statp) | ||
626 | { | ||
627 | vfs_t *vfsp = LINVFS_GET_VFS(sb); | ||
628 | int error; | ||
629 | |||
630 | VFS_STATVFS(vfsp, statp, NULL, error); | ||
631 | return -error; | ||
632 | } | ||
633 | |||
634 | STATIC int | ||
635 | linvfs_remount( | ||
636 | struct super_block *sb, | ||
637 | int *flags, | ||
638 | char *options) | ||
639 | { | ||
640 | vfs_t *vfsp = LINVFS_GET_VFS(sb); | ||
641 | struct xfs_mount_args *args = xfs_args_allocate(sb); | ||
642 | int error; | ||
643 | |||
644 | VFS_PARSEARGS(vfsp, options, args, 1, error); | ||
645 | if (!error) | ||
646 | VFS_MNTUPDATE(vfsp, flags, args, error); | ||
647 | kmem_free(args, sizeof(*args)); | ||
648 | return -error; | ||
649 | } | ||
650 | |||
651 | STATIC void | ||
652 | linvfs_freeze_fs( | ||
653 | struct super_block *sb) | ||
654 | { | ||
655 | VFS_FREEZE(LINVFS_GET_VFS(sb)); | ||
656 | } | ||
657 | |||
658 | STATIC int | ||
659 | linvfs_show_options( | ||
660 | struct seq_file *m, | ||
661 | struct vfsmount *mnt) | ||
662 | { | ||
663 | struct vfs *vfsp = LINVFS_GET_VFS(mnt->mnt_sb); | ||
664 | int error; | ||
665 | |||
666 | VFS_SHOWARGS(vfsp, m, error); | ||
667 | return error; | ||
668 | } | ||
669 | |||
670 | STATIC int | ||
671 | linvfs_getxstate( | ||
672 | struct super_block *sb, | ||
673 | struct fs_quota_stat *fqs) | ||
674 | { | ||
675 | struct vfs *vfsp = LINVFS_GET_VFS(sb); | ||
676 | int error; | ||
677 | |||
678 | VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error); | ||
679 | return -error; | ||
680 | } | ||
681 | |||
682 | STATIC int | ||
683 | linvfs_setxstate( | ||
684 | struct super_block *sb, | ||
685 | unsigned int flags, | ||
686 | int op) | ||
687 | { | ||
688 | struct vfs *vfsp = LINVFS_GET_VFS(sb); | ||
689 | int error; | ||
690 | |||
691 | VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error); | ||
692 | return -error; | ||
693 | } | ||
694 | |||
695 | STATIC int | ||
696 | linvfs_getxquota( | ||
697 | struct super_block *sb, | ||
698 | int type, | ||
699 | qid_t id, | ||
700 | struct fs_disk_quota *fdq) | ||
701 | { | ||
702 | struct vfs *vfsp = LINVFS_GET_VFS(sb); | ||
703 | int error, getmode; | ||
704 | |||
705 | getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA; | ||
706 | VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error); | ||
707 | return -error; | ||
708 | } | ||
709 | |||
710 | STATIC int | ||
711 | linvfs_setxquota( | ||
712 | struct super_block *sb, | ||
713 | int type, | ||
714 | qid_t id, | ||
715 | struct fs_disk_quota *fdq) | ||
716 | { | ||
717 | struct vfs *vfsp = LINVFS_GET_VFS(sb); | ||
718 | int error, setmode; | ||
719 | |||
720 | setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM; | ||
721 | VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error); | ||
722 | return -error; | ||
723 | } | ||
724 | |||
725 | STATIC int | ||
726 | linvfs_fill_super( | ||
727 | struct super_block *sb, | ||
728 | void *data, | ||
729 | int silent) | ||
730 | { | ||
731 | vnode_t *rootvp; | ||
732 | struct vfs *vfsp = vfs_allocate(); | ||
733 | struct xfs_mount_args *args = xfs_args_allocate(sb); | ||
734 | struct kstatfs statvfs; | ||
735 | int error, error2; | ||
736 | |||
737 | vfsp->vfs_super = sb; | ||
738 | LINVFS_SET_VFS(sb, vfsp); | ||
739 | if (sb->s_flags & MS_RDONLY) | ||
740 | vfsp->vfs_flag |= VFS_RDONLY; | ||
741 | bhv_insert_all_vfsops(vfsp); | ||
742 | |||
743 | VFS_PARSEARGS(vfsp, (char *)data, args, 0, error); | ||
744 | if (error) { | ||
745 | bhv_remove_all_vfsops(vfsp, 1); | ||
746 | goto fail_vfsop; | ||
747 | } | ||
748 | |||
749 | sb_min_blocksize(sb, BBSIZE); | ||
750 | #ifdef CONFIG_XFS_EXPORT | ||
751 | sb->s_export_op = &linvfs_export_ops; | ||
752 | #endif | ||
753 | sb->s_qcop = &linvfs_qops; | ||
754 | sb->s_op = &linvfs_sops; | ||
755 | |||
756 | VFS_MOUNT(vfsp, args, NULL, error); | ||
757 | if (error) { | ||
758 | bhv_remove_all_vfsops(vfsp, 1); | ||
759 | goto fail_vfsop; | ||
760 | } | ||
761 | |||
762 | VFS_STATVFS(vfsp, &statvfs, NULL, error); | ||
763 | if (error) | ||
764 | goto fail_unmount; | ||
765 | |||
766 | sb->s_dirt = 1; | ||
767 | sb->s_magic = statvfs.f_type; | ||
768 | sb->s_blocksize = statvfs.f_bsize; | ||
769 | sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1; | ||
770 | sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); | ||
771 | sb->s_time_gran = 1; | ||
772 | set_posix_acl_flag(sb); | ||
773 | |||
774 | VFS_ROOT(vfsp, &rootvp, error); | ||
775 | if (error) | ||
776 | goto fail_unmount; | ||
777 | |||
778 | sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp)); | ||
779 | if (!sb->s_root) { | ||
780 | error = ENOMEM; | ||
781 | goto fail_vnrele; | ||
782 | } | ||
783 | if (is_bad_inode(sb->s_root->d_inode)) { | ||
784 | error = EINVAL; | ||
785 | goto fail_vnrele; | ||
786 | } | ||
787 | if ((error = linvfs_start_syncd(vfsp))) | ||
788 | goto fail_vnrele; | ||
789 | vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address); | ||
790 | |||
791 | kmem_free(args, sizeof(*args)); | ||
792 | return 0; | ||
793 | |||
794 | fail_vnrele: | ||
795 | if (sb->s_root) { | ||
796 | dput(sb->s_root); | ||
797 | sb->s_root = NULL; | ||
798 | } else { | ||
799 | VN_RELE(rootvp); | ||
800 | } | ||
801 | |||
802 | fail_unmount: | ||
803 | VFS_UNMOUNT(vfsp, 0, NULL, error2); | ||
804 | |||
805 | fail_vfsop: | ||
806 | vfs_deallocate(vfsp); | ||
807 | kmem_free(args, sizeof(*args)); | ||
808 | return -error; | ||
809 | } | ||
810 | |||
811 | STATIC struct super_block * | ||
812 | linvfs_get_sb( | ||
813 | struct file_system_type *fs_type, | ||
814 | int flags, | ||
815 | const char *dev_name, | ||
816 | void *data) | ||
817 | { | ||
818 | return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super); | ||
819 | } | ||
820 | |||
821 | STATIC struct super_operations linvfs_sops = { | ||
822 | .alloc_inode = linvfs_alloc_inode, | ||
823 | .destroy_inode = linvfs_destroy_inode, | ||
824 | .write_inode = linvfs_write_inode, | ||
825 | .clear_inode = linvfs_clear_inode, | ||
826 | .put_super = linvfs_put_super, | ||
827 | .write_super = linvfs_write_super, | ||
828 | .sync_fs = linvfs_sync_super, | ||
829 | .write_super_lockfs = linvfs_freeze_fs, | ||
830 | .statfs = linvfs_statfs, | ||
831 | .remount_fs = linvfs_remount, | ||
832 | .show_options = linvfs_show_options, | ||
833 | }; | ||
834 | |||
835 | STATIC struct quotactl_ops linvfs_qops = { | ||
836 | .get_xstate = linvfs_getxstate, | ||
837 | .set_xstate = linvfs_setxstate, | ||
838 | .get_xquota = linvfs_getxquota, | ||
839 | .set_xquota = linvfs_setxquota, | ||
840 | }; | ||
841 | |||
842 | STATIC struct file_system_type xfs_fs_type = { | ||
843 | .owner = THIS_MODULE, | ||
844 | .name = "xfs", | ||
845 | .get_sb = linvfs_get_sb, | ||
846 | .kill_sb = kill_block_super, | ||
847 | .fs_flags = FS_REQUIRES_DEV, | ||
848 | }; | ||
849 | |||
850 | |||
851 | STATIC int __init | ||
852 | init_xfs_fs( void ) | ||
853 | { | ||
854 | int error; | ||
855 | struct sysinfo si; | ||
856 | static char message[] __initdata = KERN_INFO \ | ||
857 | XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"; | ||
858 | |||
859 | printk(message); | ||
860 | |||
861 | si_meminfo(&si); | ||
862 | xfs_physmem = si.totalram; | ||
863 | |||
864 | ktrace_init(64); | ||
865 | |||
866 | error = init_inodecache(); | ||
867 | if (error < 0) | ||
868 | goto undo_inodecache; | ||
869 | |||
870 | error = pagebuf_init(); | ||
871 | if (error < 0) | ||
872 | goto undo_pagebuf; | ||
873 | |||
874 | vn_init(); | ||
875 | xfs_init(); | ||
876 | uuid_init(); | ||
877 | vfs_initquota(); | ||
878 | |||
879 | error = register_filesystem(&xfs_fs_type); | ||
880 | if (error) | ||
881 | goto undo_register; | ||
882 | XFS_DM_INIT(&xfs_fs_type); | ||
883 | return 0; | ||
884 | |||
885 | undo_register: | ||
886 | pagebuf_terminate(); | ||
887 | |||
888 | undo_pagebuf: | ||
889 | destroy_inodecache(); | ||
890 | |||
891 | undo_inodecache: | ||
892 | return error; | ||
893 | } | ||
894 | |||
895 | STATIC void __exit | ||
896 | exit_xfs_fs( void ) | ||
897 | { | ||
898 | vfs_exitquota(); | ||
899 | XFS_DM_EXIT(&xfs_fs_type); | ||
900 | unregister_filesystem(&xfs_fs_type); | ||
901 | xfs_cleanup(); | ||
902 | pagebuf_terminate(); | ||
903 | destroy_inodecache(); | ||
904 | ktrace_uninit(); | ||
905 | } | ||
906 | |||
907 | module_init(init_xfs_fs); | ||
908 | module_exit(exit_xfs_fs); | ||
909 | |||
910 | MODULE_AUTHOR("Silicon Graphics, Inc."); | ||
911 | MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); | ||
912 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h new file mode 100644 index 000000000000..ec7e0035c731 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -0,0 +1,138 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_SUPER_H__ | ||
33 | #define __XFS_SUPER_H__ | ||
34 | |||
35 | #ifdef CONFIG_XFS_DMAPI | ||
36 | # define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) | ||
37 | # define vfs_initdmapi() dmapi_init() | ||
38 | # define vfs_exitdmapi() dmapi_uninit() | ||
39 | #else | ||
40 | # define vfs_insertdmapi(vfs) do { } while (0) | ||
41 | # define vfs_initdmapi() do { } while (0) | ||
42 | # define vfs_exitdmapi() do { } while (0) | ||
43 | #endif | ||
44 | |||
45 | #ifdef CONFIG_XFS_QUOTA | ||
46 | # define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops) | ||
47 | extern void xfs_qm_init(void); | ||
48 | extern void xfs_qm_exit(void); | ||
49 | # define vfs_initquota() xfs_qm_init() | ||
50 | # define vfs_exitquota() xfs_qm_exit() | ||
51 | #else | ||
52 | # define vfs_insertquota(vfs) do { } while (0) | ||
53 | # define vfs_initquota() do { } while (0) | ||
54 | # define vfs_exitquota() do { } while (0) | ||
55 | #endif | ||
56 | |||
57 | #ifdef CONFIG_XFS_POSIX_ACL | ||
58 | # define XFS_ACL_STRING "ACLs, " | ||
59 | # define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) | ||
60 | #else | ||
61 | # define XFS_ACL_STRING | ||
62 | # define set_posix_acl_flag(sb) do { } while (0) | ||
63 | #endif | ||
64 | |||
65 | #ifdef CONFIG_XFS_SECURITY | ||
66 | # define XFS_SECURITY_STRING "security attributes, " | ||
67 | # define ENOSECURITY 0 | ||
68 | #else | ||
69 | # define XFS_SECURITY_STRING | ||
70 | # define ENOSECURITY EOPNOTSUPP | ||
71 | #endif | ||
72 | |||
73 | #ifdef CONFIG_XFS_RT | ||
74 | # define XFS_REALTIME_STRING "realtime, " | ||
75 | #else | ||
76 | # define XFS_REALTIME_STRING | ||
77 | #endif | ||
78 | |||
79 | #if XFS_BIG_BLKNOS | ||
80 | # if XFS_BIG_INUMS | ||
81 | # define XFS_BIGFS_STRING "large block/inode numbers, " | ||
82 | # else | ||
83 | # define XFS_BIGFS_STRING "large block numbers, " | ||
84 | # endif | ||
85 | #else | ||
86 | # define XFS_BIGFS_STRING | ||
87 | #endif | ||
88 | |||
89 | #ifdef CONFIG_XFS_TRACE | ||
90 | # define XFS_TRACE_STRING "tracing, " | ||
91 | #else | ||
92 | # define XFS_TRACE_STRING | ||
93 | #endif | ||
94 | |||
95 | #ifdef CONFIG_XFS_DMAPI | ||
96 | # define XFS_DMAPI_STRING "dmapi support, " | ||
97 | #else | ||
98 | # define XFS_DMAPI_STRING | ||
99 | #endif | ||
100 | |||
101 | #ifdef DEBUG | ||
102 | # define XFS_DBG_STRING "debug" | ||
103 | #else | ||
104 | # define XFS_DBG_STRING "no debug" | ||
105 | #endif | ||
106 | |||
107 | #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ | ||
108 | XFS_SECURITY_STRING \ | ||
109 | XFS_REALTIME_STRING \ | ||
110 | XFS_BIGFS_STRING \ | ||
111 | XFS_TRACE_STRING \ | ||
112 | XFS_DMAPI_STRING \ | ||
113 | XFS_DBG_STRING /* DBG must be last */ | ||
114 | |||
115 | #define LINVFS_GET_VFS(s) \ | ||
116 | (vfs_t *)((s)->s_fs_info) | ||
117 | #define LINVFS_SET_VFS(s, vfsp) \ | ||
118 | ((s)->s_fs_info = vfsp) | ||
119 | |||
120 | struct xfs_inode; | ||
121 | struct xfs_mount; | ||
122 | struct xfs_buftarg; | ||
123 | struct block_device; | ||
124 | |||
125 | extern __uint64_t xfs_max_file_offset(unsigned int); | ||
126 | |||
127 | extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int); | ||
128 | |||
129 | extern void xfs_flush_inode(struct xfs_inode *); | ||
130 | extern void xfs_flush_device(struct xfs_inode *); | ||
131 | |||
132 | extern int xfs_blkdev_get(struct xfs_mount *, const char *, | ||
133 | struct block_device **); | ||
134 | extern void xfs_blkdev_put(struct block_device *); | ||
135 | |||
136 | extern struct export_operations linvfs_export_ops; | ||
137 | |||
138 | #endif /* __XFS_SUPER_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c new file mode 100644 index 000000000000..0dc010356f4d --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2001-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | #include "xfs_rw.h" | ||
35 | #include <linux/sysctl.h> | ||
36 | #include <linux/proc_fs.h> | ||
37 | |||
38 | |||
39 | static struct ctl_table_header *xfs_table_header; | ||
40 | |||
41 | |||
42 | #ifdef CONFIG_PROC_FS | ||
43 | STATIC int | ||
44 | xfs_stats_clear_proc_handler( | ||
45 | ctl_table *ctl, | ||
46 | int write, | ||
47 | struct file *filp, | ||
48 | void __user *buffer, | ||
49 | size_t *lenp, | ||
50 | loff_t *ppos) | ||
51 | { | ||
52 | int c, ret, *valp = ctl->data; | ||
53 | __uint32_t vn_active; | ||
54 | |||
55 | ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); | ||
56 | |||
57 | if (!ret && write && *valp) { | ||
58 | printk("XFS Clearing xfsstats\n"); | ||
59 | for (c = 0; c < NR_CPUS; c++) { | ||
60 | if (!cpu_possible(c)) continue; | ||
61 | preempt_disable(); | ||
62 | /* save vn_active, it's a universal truth! */ | ||
63 | vn_active = per_cpu(xfsstats, c).vn_active; | ||
64 | memset(&per_cpu(xfsstats, c), 0, | ||
65 | sizeof(struct xfsstats)); | ||
66 | per_cpu(xfsstats, c).vn_active = vn_active; | ||
67 | preempt_enable(); | ||
68 | } | ||
69 | xfs_stats_clear = 0; | ||
70 | } | ||
71 | |||
72 | return ret; | ||
73 | } | ||
74 | #endif /* CONFIG_PROC_FS */ | ||
75 | |||
76 | STATIC ctl_table xfs_table[] = { | ||
77 | {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val, | ||
78 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
79 | &sysctl_intvec, NULL, | ||
80 | &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max}, | ||
81 | |||
82 | {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val, | ||
83 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
84 | &sysctl_intvec, NULL, | ||
85 | &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max}, | ||
86 | |||
87 | {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val, | ||
88 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
89 | &sysctl_intvec, NULL, | ||
90 | &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max}, | ||
91 | |||
92 | {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val, | ||
93 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
94 | &sysctl_intvec, NULL, | ||
95 | &xfs_params.panic_mask.min, &xfs_params.panic_mask.max}, | ||
96 | |||
97 | {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val, | ||
98 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
99 | &sysctl_intvec, NULL, | ||
100 | &xfs_params.error_level.min, &xfs_params.error_level.max}, | ||
101 | |||
102 | {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val, | ||
103 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
104 | &sysctl_intvec, NULL, | ||
105 | &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max}, | ||
106 | |||
107 | {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val, | ||
108 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
109 | &sysctl_intvec, NULL, | ||
110 | &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max}, | ||
111 | |||
112 | {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val, | ||
113 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
114 | &sysctl_intvec, NULL, | ||
115 | &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max}, | ||
116 | |||
117 | {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val, | ||
118 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
119 | &sysctl_intvec, NULL, | ||
120 | &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max}, | ||
121 | |||
122 | {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val, | ||
123 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
124 | &sysctl_intvec, NULL, | ||
125 | &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max}, | ||
126 | |||
127 | {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val, | ||
128 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
129 | &sysctl_intvec, NULL, | ||
130 | &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max}, | ||
131 | |||
132 | {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val, | ||
133 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
134 | &sysctl_intvec, NULL, | ||
135 | &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max}, | ||
136 | |||
137 | {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val, | ||
138 | sizeof(int), 0644, NULL, &proc_dointvec_minmax, | ||
139 | &sysctl_intvec, NULL, | ||
140 | &xfs_params.rotorstep.min, &xfs_params.rotorstep.max}, | ||
141 | |||
142 | /* please keep this the last entry */ | ||
143 | #ifdef CONFIG_PROC_FS | ||
144 | {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, | ||
145 | sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler, | ||
146 | &sysctl_intvec, NULL, | ||
147 | &xfs_params.stats_clear.min, &xfs_params.stats_clear.max}, | ||
148 | #endif /* CONFIG_PROC_FS */ | ||
149 | |||
150 | {0} | ||
151 | }; | ||
152 | |||
153 | STATIC ctl_table xfs_dir_table[] = { | ||
154 | {FS_XFS, "xfs", NULL, 0, 0555, xfs_table}, | ||
155 | {0} | ||
156 | }; | ||
157 | |||
158 | STATIC ctl_table xfs_root_table[] = { | ||
159 | {CTL_FS, "fs", NULL, 0, 0555, xfs_dir_table}, | ||
160 | {0} | ||
161 | }; | ||
162 | |||
163 | void | ||
164 | xfs_sysctl_register(void) | ||
165 | { | ||
166 | xfs_table_header = register_sysctl_table(xfs_root_table, 1); | ||
167 | } | ||
168 | |||
169 | void | ||
170 | xfs_sysctl_unregister(void) | ||
171 | { | ||
172 | if (xfs_table_header) | ||
173 | unregister_sysctl_table(xfs_table_header); | ||
174 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h new file mode 100644 index 000000000000..a39a95020a58 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_sysctl.h | |||
@@ -0,0 +1,114 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2001-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #ifndef __XFS_SYSCTL_H__ | ||
34 | #define __XFS_SYSCTL_H__ | ||
35 | |||
36 | #include <linux/sysctl.h> | ||
37 | |||
38 | /* | ||
39 | * Tunable xfs parameters | ||
40 | */ | ||
41 | |||
42 | typedef struct xfs_sysctl_val { | ||
43 | int min; | ||
44 | int val; | ||
45 | int max; | ||
46 | } xfs_sysctl_val_t; | ||
47 | |||
48 | typedef struct xfs_param { | ||
49 | xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/ | ||
50 | xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is | ||
51 | * not a member of parent dir GID. */ | ||
52 | xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ | ||
53 | xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ | ||
54 | xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ | ||
55 | xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */ | ||
56 | xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ | ||
57 | xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */ | ||
58 | xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */ | ||
59 | xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */ | ||
60 | xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */ | ||
61 | xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */ | ||
62 | xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ | ||
63 | xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ | ||
64 | } xfs_param_t; | ||
65 | |||
66 | /* | ||
67 | * xfs_error_level: | ||
68 | * | ||
69 | * How much error reporting will be done when internal problems are | ||
70 | * encountered. These problems normally return an EFSCORRUPTED to their | ||
71 | * caller, with no other information reported. | ||
72 | * | ||
73 | * 0 No error reports | ||
74 | * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown | ||
75 | * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any | ||
76 | * additional errors that are known to not cause shutdowns) | ||
77 | * | ||
78 | * xfs_panic_mask bit 0x8 turns the error reports into panics | ||
79 | */ | ||
80 | |||
81 | enum { | ||
82 | /* XFS_REFCACHE_SIZE = 1 */ | ||
83 | /* XFS_REFCACHE_PURGE = 2 */ | ||
84 | XFS_RESTRICT_CHOWN = 3, | ||
85 | XFS_SGID_INHERIT = 4, | ||
86 | XFS_SYMLINK_MODE = 5, | ||
87 | XFS_PANIC_MASK = 6, | ||
88 | XFS_ERRLEVEL = 7, | ||
89 | XFS_SYNCD_TIMER = 8, | ||
90 | /* XFS_PROBE_DMAPI = 9 */ | ||
91 | /* XFS_PROBE_IOOPS = 10 */ | ||
92 | /* XFS_PROBE_QUOTA = 11 */ | ||
93 | XFS_STATS_CLEAR = 12, | ||
94 | XFS_INHERIT_SYNC = 13, | ||
95 | XFS_INHERIT_NODUMP = 14, | ||
96 | XFS_INHERIT_NOATIME = 15, | ||
97 | XFS_BUF_TIMER = 16, | ||
98 | XFS_BUF_AGE = 17, | ||
99 | /* XFS_IO_BYPASS = 18 */ | ||
100 | XFS_INHERIT_NOSYM = 19, | ||
101 | XFS_ROTORSTEP = 20, | ||
102 | }; | ||
103 | |||
104 | extern xfs_param_t xfs_params; | ||
105 | |||
106 | #ifdef CONFIG_SYSCTL | ||
107 | extern void xfs_sysctl_register(void); | ||
108 | extern void xfs_sysctl_unregister(void); | ||
109 | #else | ||
110 | # define xfs_sysctl_register() do { } while (0) | ||
111 | # define xfs_sysctl_unregister() do { } while (0) | ||
112 | #endif /* CONFIG_SYSCTL */ | ||
113 | |||
114 | #endif /* __XFS_SYSCTL_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h new file mode 100644 index 000000000000..96f96394417e --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_version.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * Dummy file that can contain a timestamp to put into the | ||
35 | * XFS init string, to help users keep track of what they're | ||
36 | * running | ||
37 | */ | ||
38 | |||
39 | #ifndef __XFS_VERSION_H__ | ||
40 | #define __XFS_VERSION_H__ | ||
41 | |||
42 | #define XFS_VERSION_STRING "SGI XFS" | ||
43 | |||
44 | #endif /* __XFS_VERSION_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c new file mode 100644 index 000000000000..669c61644959 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_vfs.c | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | #include "xfs_fs.h" | ||
35 | #include "xfs_macros.h" | ||
36 | #include "xfs_inum.h" | ||
37 | #include "xfs_log.h" | ||
38 | #include "xfs_clnt.h" | ||
39 | #include "xfs_trans.h" | ||
40 | #include "xfs_sb.h" | ||
41 | #include "xfs_ag.h" | ||
42 | #include "xfs_dir.h" | ||
43 | #include "xfs_dir2.h" | ||
44 | #include "xfs_imap.h" | ||
45 | #include "xfs_alloc.h" | ||
46 | #include "xfs_dmapi.h" | ||
47 | #include "xfs_mount.h" | ||
48 | #include "xfs_quota.h" | ||
49 | |||
50 | int | ||
51 | vfs_mount( | ||
52 | struct bhv_desc *bdp, | ||
53 | struct xfs_mount_args *args, | ||
54 | struct cred *cr) | ||
55 | { | ||
56 | struct bhv_desc *next = bdp; | ||
57 | |||
58 | ASSERT(next); | ||
59 | while (! (bhvtovfsops(next))->vfs_mount) | ||
60 | next = BHV_NEXT(next); | ||
61 | return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr)); | ||
62 | } | ||
63 | |||
64 | int | ||
65 | vfs_parseargs( | ||
66 | struct bhv_desc *bdp, | ||
67 | char *s, | ||
68 | struct xfs_mount_args *args, | ||
69 | int f) | ||
70 | { | ||
71 | struct bhv_desc *next = bdp; | ||
72 | |||
73 | ASSERT(next); | ||
74 | while (! (bhvtovfsops(next))->vfs_parseargs) | ||
75 | next = BHV_NEXT(next); | ||
76 | return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f)); | ||
77 | } | ||
78 | |||
79 | int | ||
80 | vfs_showargs( | ||
81 | struct bhv_desc *bdp, | ||
82 | struct seq_file *m) | ||
83 | { | ||
84 | struct bhv_desc *next = bdp; | ||
85 | |||
86 | ASSERT(next); | ||
87 | while (! (bhvtovfsops(next))->vfs_showargs) | ||
88 | next = BHV_NEXT(next); | ||
89 | return ((*bhvtovfsops(next)->vfs_showargs)(next, m)); | ||
90 | } | ||
91 | |||
92 | int | ||
93 | vfs_unmount( | ||
94 | struct bhv_desc *bdp, | ||
95 | int fl, | ||
96 | struct cred *cr) | ||
97 | { | ||
98 | struct bhv_desc *next = bdp; | ||
99 | |||
100 | ASSERT(next); | ||
101 | while (! (bhvtovfsops(next))->vfs_unmount) | ||
102 | next = BHV_NEXT(next); | ||
103 | return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr)); | ||
104 | } | ||
105 | |||
106 | int | ||
107 | vfs_mntupdate( | ||
108 | struct bhv_desc *bdp, | ||
109 | int *fl, | ||
110 | struct xfs_mount_args *args) | ||
111 | { | ||
112 | struct bhv_desc *next = bdp; | ||
113 | |||
114 | ASSERT(next); | ||
115 | while (! (bhvtovfsops(next))->vfs_mntupdate) | ||
116 | next = BHV_NEXT(next); | ||
117 | return ((*bhvtovfsops(next)->vfs_mntupdate)(next, fl, args)); | ||
118 | } | ||
119 | |||
120 | int | ||
121 | vfs_root( | ||
122 | struct bhv_desc *bdp, | ||
123 | struct vnode **vpp) | ||
124 | { | ||
125 | struct bhv_desc *next = bdp; | ||
126 | |||
127 | ASSERT(next); | ||
128 | while (! (bhvtovfsops(next))->vfs_root) | ||
129 | next = BHV_NEXT(next); | ||
130 | return ((*bhvtovfsops(next)->vfs_root)(next, vpp)); | ||
131 | } | ||
132 | |||
133 | int | ||
134 | vfs_statvfs( | ||
135 | struct bhv_desc *bdp, | ||
136 | xfs_statfs_t *sp, | ||
137 | struct vnode *vp) | ||
138 | { | ||
139 | struct bhv_desc *next = bdp; | ||
140 | |||
141 | ASSERT(next); | ||
142 | while (! (bhvtovfsops(next))->vfs_statvfs) | ||
143 | next = BHV_NEXT(next); | ||
144 | return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp)); | ||
145 | } | ||
146 | |||
147 | int | ||
148 | vfs_sync( | ||
149 | struct bhv_desc *bdp, | ||
150 | int fl, | ||
151 | struct cred *cr) | ||
152 | { | ||
153 | struct bhv_desc *next = bdp; | ||
154 | |||
155 | ASSERT(next); | ||
156 | while (! (bhvtovfsops(next))->vfs_sync) | ||
157 | next = BHV_NEXT(next); | ||
158 | return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr)); | ||
159 | } | ||
160 | |||
161 | int | ||
162 | vfs_vget( | ||
163 | struct bhv_desc *bdp, | ||
164 | struct vnode **vpp, | ||
165 | struct fid *fidp) | ||
166 | { | ||
167 | struct bhv_desc *next = bdp; | ||
168 | |||
169 | ASSERT(next); | ||
170 | while (! (bhvtovfsops(next))->vfs_vget) | ||
171 | next = BHV_NEXT(next); | ||
172 | return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp)); | ||
173 | } | ||
174 | |||
175 | int | ||
176 | vfs_dmapiops( | ||
177 | struct bhv_desc *bdp, | ||
178 | caddr_t addr) | ||
179 | { | ||
180 | struct bhv_desc *next = bdp; | ||
181 | |||
182 | ASSERT(next); | ||
183 | while (! (bhvtovfsops(next))->vfs_dmapiops) | ||
184 | next = BHV_NEXT(next); | ||
185 | return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr)); | ||
186 | } | ||
187 | |||
188 | int | ||
189 | vfs_quotactl( | ||
190 | struct bhv_desc *bdp, | ||
191 | int cmd, | ||
192 | int id, | ||
193 | caddr_t addr) | ||
194 | { | ||
195 | struct bhv_desc *next = bdp; | ||
196 | |||
197 | ASSERT(next); | ||
198 | while (! (bhvtovfsops(next))->vfs_quotactl) | ||
199 | next = BHV_NEXT(next); | ||
200 | return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr)); | ||
201 | } | ||
202 | |||
203 | void | ||
204 | vfs_init_vnode( | ||
205 | struct bhv_desc *bdp, | ||
206 | struct vnode *vp, | ||
207 | struct bhv_desc *bp, | ||
208 | int unlock) | ||
209 | { | ||
210 | struct bhv_desc *next = bdp; | ||
211 | |||
212 | ASSERT(next); | ||
213 | while (! (bhvtovfsops(next))->vfs_init_vnode) | ||
214 | next = BHV_NEXT(next); | ||
215 | ((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock)); | ||
216 | } | ||
217 | |||
218 | void | ||
219 | vfs_force_shutdown( | ||
220 | struct bhv_desc *bdp, | ||
221 | int fl, | ||
222 | char *file, | ||
223 | int line) | ||
224 | { | ||
225 | struct bhv_desc *next = bdp; | ||
226 | |||
227 | ASSERT(next); | ||
228 | while (! (bhvtovfsops(next))->vfs_force_shutdown) | ||
229 | next = BHV_NEXT(next); | ||
230 | ((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line)); | ||
231 | } | ||
232 | |||
233 | void | ||
234 | vfs_freeze( | ||
235 | struct bhv_desc *bdp) | ||
236 | { | ||
237 | struct bhv_desc *next = bdp; | ||
238 | |||
239 | ASSERT(next); | ||
240 | while (! (bhvtovfsops(next))->vfs_freeze) | ||
241 | next = BHV_NEXT(next); | ||
242 | ((*bhvtovfsops(next)->vfs_freeze)(next)); | ||
243 | } | ||
244 | |||
245 | vfs_t * | ||
246 | vfs_allocate( void ) | ||
247 | { | ||
248 | struct vfs *vfsp; | ||
249 | |||
250 | vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP); | ||
251 | bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); | ||
252 | INIT_LIST_HEAD(&vfsp->vfs_sync_list); | ||
253 | spin_lock_init(&vfsp->vfs_sync_lock); | ||
254 | init_waitqueue_head(&vfsp->vfs_wait_sync_task); | ||
255 | init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); | ||
256 | return vfsp; | ||
257 | } | ||
258 | |||
259 | void | ||
260 | vfs_deallocate( | ||
261 | struct vfs *vfsp) | ||
262 | { | ||
263 | bhv_head_destroy(VFS_BHVHEAD(vfsp)); | ||
264 | kmem_free(vfsp, sizeof(vfs_t)); | ||
265 | } | ||
266 | |||
267 | void | ||
268 | vfs_insertops( | ||
269 | struct vfs *vfsp, | ||
270 | struct bhv_vfsops *vfsops) | ||
271 | { | ||
272 | struct bhv_desc *bdp; | ||
273 | |||
274 | bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP); | ||
275 | bhv_desc_init(bdp, NULL, vfsp, vfsops); | ||
276 | bhv_insert(&vfsp->vfs_bh, bdp); | ||
277 | } | ||
278 | |||
279 | void | ||
280 | vfs_insertbhv( | ||
281 | struct vfs *vfsp, | ||
282 | struct bhv_desc *bdp, | ||
283 | struct vfsops *vfsops, | ||
284 | void *mount) | ||
285 | { | ||
286 | bhv_desc_init(bdp, mount, vfsp, vfsops); | ||
287 | bhv_insert_initial(&vfsp->vfs_bh, bdp); | ||
288 | } | ||
289 | |||
290 | void | ||
291 | bhv_remove_vfsops( | ||
292 | struct vfs *vfsp, | ||
293 | int pos) | ||
294 | { | ||
295 | struct bhv_desc *bhv; | ||
296 | |||
297 | bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos); | ||
298 | if (!bhv) | ||
299 | return; | ||
300 | bhv_remove(&vfsp->vfs_bh, bhv); | ||
301 | kmem_free(bhv, sizeof(*bhv)); | ||
302 | } | ||
303 | |||
304 | void | ||
305 | bhv_remove_all_vfsops( | ||
306 | struct vfs *vfsp, | ||
307 | int freebase) | ||
308 | { | ||
309 | struct xfs_mount *mp; | ||
310 | |||
311 | bhv_remove_vfsops(vfsp, VFS_POSITION_QM); | ||
312 | bhv_remove_vfsops(vfsp, VFS_POSITION_DM); | ||
313 | if (!freebase) | ||
314 | return; | ||
315 | mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops)); | ||
316 | VFS_REMOVEBHV(vfsp, &mp->m_bhv); | ||
317 | xfs_mount_free(mp, 0); | ||
318 | } | ||
319 | |||
320 | void | ||
321 | bhv_insert_all_vfsops( | ||
322 | struct vfs *vfsp) | ||
323 | { | ||
324 | struct xfs_mount *mp; | ||
325 | |||
326 | mp = xfs_mount_init(); | ||
327 | vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp); | ||
328 | vfs_insertdmapi(vfsp); | ||
329 | vfs_insertquota(vfsp); | ||
330 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h new file mode 100644 index 000000000000..76493991578f --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_vfs.h | |||
@@ -0,0 +1,223 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_VFS_H__ | ||
33 | #define __XFS_VFS_H__ | ||
34 | |||
35 | #include <linux/vfs.h> | ||
36 | #include "xfs_fs.h" | ||
37 | |||
38 | struct fid; | ||
39 | struct vfs; | ||
40 | struct cred; | ||
41 | struct vnode; | ||
42 | struct kstatfs; | ||
43 | struct seq_file; | ||
44 | struct super_block; | ||
45 | struct xfs_mount_args; | ||
46 | |||
47 | typedef struct kstatfs xfs_statfs_t; | ||
48 | |||
49 | typedef struct vfs_sync_work { | ||
50 | struct list_head w_list; | ||
51 | struct vfs *w_vfs; | ||
52 | void *w_data; /* syncer routine argument */ | ||
53 | void (*w_syncer)(struct vfs *, void *); | ||
54 | } vfs_sync_work_t; | ||
55 | |||
56 | typedef struct vfs { | ||
57 | u_int vfs_flag; /* flags */ | ||
58 | xfs_fsid_t vfs_fsid; /* file system ID */ | ||
59 | xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */ | ||
60 | bhv_head_t vfs_bh; /* head of vfs behavior chain */ | ||
61 | struct super_block *vfs_super; /* generic superblock pointer */ | ||
62 | struct task_struct *vfs_sync_task; /* generalised sync thread */ | ||
63 | vfs_sync_work_t vfs_sync_work; /* work item for VFS_SYNC */ | ||
64 | struct list_head vfs_sync_list; /* sync thread work item list */ | ||
65 | spinlock_t vfs_sync_lock; /* work item list lock */ | ||
66 | int vfs_sync_seq; /* sync thread generation no. */ | ||
67 | wait_queue_head_t vfs_wait_single_sync_task; | ||
68 | wait_queue_head_t vfs_wait_sync_task; | ||
69 | } vfs_t; | ||
70 | |||
71 | #define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ | ||
72 | |||
73 | #define bhvtovfs(bdp) ( (struct vfs *)BHV_VOBJ(bdp) ) | ||
74 | #define bhvtovfsops(bdp) ( (struct vfsops *)BHV_OPS(bdp) ) | ||
75 | #define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh ) | ||
76 | #define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) ) | ||
77 | |||
78 | #define VFS_POSITION_BASE BHV_POSITION_BASE /* chain bottom */ | ||
79 | #define VFS_POSITION_TOP BHV_POSITION_TOP /* chain top */ | ||
80 | #define VFS_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */ | ||
81 | |||
82 | typedef enum { | ||
83 | VFS_BHV_UNKNOWN, /* not specified */ | ||
84 | VFS_BHV_XFS, /* xfs */ | ||
85 | VFS_BHV_DM, /* data migration */ | ||
86 | VFS_BHV_QM, /* quota manager */ | ||
87 | VFS_BHV_IO, /* IO path */ | ||
88 | VFS_BHV_END /* housekeeping end-of-range */ | ||
89 | } vfs_bhv_t; | ||
90 | |||
91 | #define VFS_POSITION_XFS (BHV_POSITION_BASE) | ||
92 | #define VFS_POSITION_DM (VFS_POSITION_BASE+10) | ||
93 | #define VFS_POSITION_QM (VFS_POSITION_BASE+20) | ||
94 | #define VFS_POSITION_IO (VFS_POSITION_BASE+30) | ||
95 | |||
96 | #define VFS_RDONLY 0x0001 /* read-only vfs */ | ||
97 | #define VFS_GRPID 0x0002 /* group-ID assigned from directory */ | ||
98 | #define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ | ||
99 | #define VFS_UMOUNT 0x0008 /* unmount in progress */ | ||
100 | #define VFS_END 0x0008 /* max flag */ | ||
101 | |||
102 | #define SYNC_ATTR 0x0001 /* sync attributes */ | ||
103 | #define SYNC_CLOSE 0x0002 /* close file system down */ | ||
104 | #define SYNC_DELWRI 0x0004 /* look at delayed writes */ | ||
105 | #define SYNC_WAIT 0x0008 /* wait for i/o to complete */ | ||
106 | #define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ | ||
107 | #define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ | ||
108 | #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ | ||
109 | #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ | ||
110 | |||
111 | typedef int (*vfs_mount_t)(bhv_desc_t *, | ||
112 | struct xfs_mount_args *, struct cred *); | ||
113 | typedef int (*vfs_parseargs_t)(bhv_desc_t *, char *, | ||
114 | struct xfs_mount_args *, int); | ||
115 | typedef int (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *); | ||
116 | typedef int (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *); | ||
117 | typedef int (*vfs_mntupdate_t)(bhv_desc_t *, int *, | ||
118 | struct xfs_mount_args *); | ||
119 | typedef int (*vfs_root_t)(bhv_desc_t *, struct vnode **); | ||
120 | typedef int (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *); | ||
121 | typedef int (*vfs_sync_t)(bhv_desc_t *, int, struct cred *); | ||
122 | typedef int (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *); | ||
123 | typedef int (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t); | ||
124 | typedef int (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t); | ||
125 | typedef void (*vfs_init_vnode_t)(bhv_desc_t *, | ||
126 | struct vnode *, bhv_desc_t *, int); | ||
127 | typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int); | ||
128 | typedef void (*vfs_freeze_t)(bhv_desc_t *); | ||
129 | |||
130 | typedef struct vfsops { | ||
131 | bhv_position_t vf_position; /* behavior chain position */ | ||
132 | vfs_mount_t vfs_mount; /* mount file system */ | ||
133 | vfs_parseargs_t vfs_parseargs; /* parse mount options */ | ||
134 | vfs_showargs_t vfs_showargs; /* unparse mount options */ | ||
135 | vfs_unmount_t vfs_unmount; /* unmount file system */ | ||
136 | vfs_mntupdate_t vfs_mntupdate; /* update file system options */ | ||
137 | vfs_root_t vfs_root; /* get root vnode */ | ||
138 | vfs_statvfs_t vfs_statvfs; /* file system statistics */ | ||
139 | vfs_sync_t vfs_sync; /* flush files */ | ||
140 | vfs_vget_t vfs_vget; /* get vnode from fid */ | ||
141 | vfs_dmapiops_t vfs_dmapiops; /* data migration */ | ||
142 | vfs_quotactl_t vfs_quotactl; /* disk quota */ | ||
143 | vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */ | ||
144 | vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */ | ||
145 | vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */ | ||
146 | } vfsops_t; | ||
147 | |||
148 | /* | ||
149 | * VFS's. Operates on vfs structure pointers (starts at bhv head). | ||
150 | */ | ||
151 | #define VHEAD(v) ((v)->vfs_fbhv) | ||
152 | #define VFS_MOUNT(v, ma,cr, rv) ((rv) = vfs_mount(VHEAD(v), ma,cr)) | ||
153 | #define VFS_PARSEARGS(v, o,ma,f, rv) ((rv) = vfs_parseargs(VHEAD(v), o,ma,f)) | ||
154 | #define VFS_SHOWARGS(v, m, rv) ((rv) = vfs_showargs(VHEAD(v), m)) | ||
155 | #define VFS_UNMOUNT(v, f, cr, rv) ((rv) = vfs_unmount(VHEAD(v), f,cr)) | ||
156 | #define VFS_MNTUPDATE(v, fl, args, rv) ((rv) = vfs_mntupdate(VHEAD(v), fl, args)) | ||
157 | #define VFS_ROOT(v, vpp, rv) ((rv) = vfs_root(VHEAD(v), vpp)) | ||
158 | #define VFS_STATVFS(v, sp,vp, rv) ((rv) = vfs_statvfs(VHEAD(v), sp,vp)) | ||
159 | #define VFS_SYNC(v, flag,cr, rv) ((rv) = vfs_sync(VHEAD(v), flag,cr)) | ||
160 | #define VFS_VGET(v, vpp,fidp, rv) ((rv) = vfs_vget(VHEAD(v), vpp,fidp)) | ||
161 | #define VFS_DMAPIOPS(v, p, rv) ((rv) = vfs_dmapiops(VHEAD(v), p)) | ||
162 | #define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p)) | ||
163 | #define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) ) | ||
164 | #define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) ) | ||
165 | #define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) ) | ||
166 | |||
167 | /* | ||
168 | * PVFS's. Operates on behavior descriptor pointers. | ||
169 | */ | ||
170 | #define PVFS_MOUNT(b, ma,cr, rv) ((rv) = vfs_mount(b, ma,cr)) | ||
171 | #define PVFS_PARSEARGS(b, o,ma,f, rv) ((rv) = vfs_parseargs(b, o,ma,f)) | ||
172 | #define PVFS_SHOWARGS(b, m, rv) ((rv) = vfs_showargs(b, m)) | ||
173 | #define PVFS_UNMOUNT(b, f,cr, rv) ((rv) = vfs_unmount(b, f,cr)) | ||
174 | #define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = vfs_mntupdate(b, fl, args)) | ||
175 | #define PVFS_ROOT(b, vpp, rv) ((rv) = vfs_root(b, vpp)) | ||
176 | #define PVFS_STATVFS(b, sp,vp, rv) ((rv) = vfs_statvfs(b, sp,vp)) | ||
177 | #define PVFS_SYNC(b, flag,cr, rv) ((rv) = vfs_sync(b, flag,cr)) | ||
178 | #define PVFS_VGET(b, vpp,fidp, rv) ((rv) = vfs_vget(b, vpp,fidp)) | ||
179 | #define PVFS_DMAPIOPS(b, p, rv) ((rv) = vfs_dmapiops(b, p)) | ||
180 | #define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p)) | ||
181 | #define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) ) | ||
182 | #define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) ) | ||
183 | #define PVFS_FREEZE(b) ( vfs_freeze(b) ) | ||
184 | |||
185 | extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *); | ||
186 | extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int); | ||
187 | extern int vfs_showargs(bhv_desc_t *, struct seq_file *); | ||
188 | extern int vfs_unmount(bhv_desc_t *, int, struct cred *); | ||
189 | extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *); | ||
190 | extern int vfs_root(bhv_desc_t *, struct vnode **); | ||
191 | extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *); | ||
192 | extern int vfs_sync(bhv_desc_t *, int, struct cred *); | ||
193 | extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *); | ||
194 | extern int vfs_dmapiops(bhv_desc_t *, caddr_t); | ||
195 | extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t); | ||
196 | extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int); | ||
197 | extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int); | ||
198 | extern void vfs_freeze(bhv_desc_t *); | ||
199 | |||
200 | typedef struct bhv_vfsops { | ||
201 | struct vfsops bhv_common; | ||
202 | void * bhv_custom; | ||
203 | } bhv_vfsops_t; | ||
204 | |||
205 | #define vfs_bhv_lookup(v, id) ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) ) | ||
206 | #define vfs_bhv_custom(b) ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom ) | ||
207 | #define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o)) | ||
208 | #define vfs_bhv_clr_custom(b) ( (b)->bhv_custom = NULL ) | ||
209 | |||
210 | extern vfs_t *vfs_allocate(void); | ||
211 | extern void vfs_deallocate(vfs_t *); | ||
212 | extern void vfs_insertops(vfs_t *, bhv_vfsops_t *); | ||
213 | extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *); | ||
214 | |||
215 | extern void bhv_insert_all_vfsops(struct vfs *); | ||
216 | extern void bhv_remove_all_vfsops(struct vfs *, int); | ||
217 | extern void bhv_remove_vfsops(struct vfs *, int); | ||
218 | |||
219 | #define fs_frozen(vfsp) ((vfsp)->vfs_super->s_frozen) | ||
220 | #define fs_check_frozen(vfsp, level) \ | ||
221 | vfs_check_frozen(vfsp->vfs_super, level); | ||
222 | |||
223 | #endif /* __XFS_VFS_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c new file mode 100644 index 000000000000..849c61c74f3c --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_vnode.c | |||
@@ -0,0 +1,455 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | #include "xfs.h" | ||
34 | |||
35 | |||
36 | uint64_t vn_generation; /* vnode generation number */ | ||
37 | DEFINE_SPINLOCK(vnumber_lock); | ||
38 | |||
39 | /* | ||
40 | * Dedicated vnode inactive/reclaim sync semaphores. | ||
41 | * Prime number of hash buckets since address is used as the key. | ||
42 | */ | ||
43 | #define NVSYNC 37 | ||
44 | #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) | ||
45 | sv_t vsync[NVSYNC]; | ||
46 | |||
47 | /* | ||
48 | * Translate stat(2) file types to vnode types and vice versa. | ||
49 | * Aware of numeric order of S_IFMT and vnode type values. | ||
50 | */ | ||
51 | enum vtype iftovt_tab[] = { | ||
52 | VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, | ||
53 | VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON | ||
54 | }; | ||
55 | |||
56 | u_short vttoif_tab[] = { | ||
57 | 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK | ||
58 | }; | ||
59 | |||
60 | |||
61 | void | ||
62 | vn_init(void) | ||
63 | { | ||
64 | register sv_t *svp; | ||
65 | register int i; | ||
66 | |||
67 | for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) | ||
68 | init_sv(svp, SV_DEFAULT, "vsy", i); | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * Clean a vnode of filesystem-specific data and prepare it for reuse. | ||
73 | */ | ||
74 | STATIC int | ||
75 | vn_reclaim( | ||
76 | struct vnode *vp) | ||
77 | { | ||
78 | int error; | ||
79 | |||
80 | XFS_STATS_INC(vn_reclaim); | ||
81 | vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address); | ||
82 | |||
83 | /* | ||
84 | * Only make the VOP_RECLAIM call if there are behaviors | ||
85 | * to call. | ||
86 | */ | ||
87 | if (vp->v_fbhv) { | ||
88 | VOP_RECLAIM(vp, error); | ||
89 | if (error) | ||
90 | return -error; | ||
91 | } | ||
92 | ASSERT(vp->v_fbhv == NULL); | ||
93 | |||
94 | VN_LOCK(vp); | ||
95 | vp->v_flag &= (VRECLM|VWAIT); | ||
96 | VN_UNLOCK(vp, 0); | ||
97 | |||
98 | vp->v_type = VNON; | ||
99 | vp->v_fbhv = NULL; | ||
100 | |||
101 | #ifdef XFS_VNODE_TRACE | ||
102 | ktrace_free(vp->v_trace); | ||
103 | vp->v_trace = NULL; | ||
104 | #endif | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | STATIC void | ||
110 | vn_wakeup( | ||
111 | struct vnode *vp) | ||
112 | { | ||
113 | VN_LOCK(vp); | ||
114 | if (vp->v_flag & VWAIT) | ||
115 | sv_broadcast(vptosync(vp)); | ||
116 | vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED); | ||
117 | VN_UNLOCK(vp, 0); | ||
118 | } | ||
119 | |||
120 | int | ||
121 | vn_wait( | ||
122 | struct vnode *vp) | ||
123 | { | ||
124 | VN_LOCK(vp); | ||
125 | if (vp->v_flag & (VINACT | VRECLM)) { | ||
126 | vp->v_flag |= VWAIT; | ||
127 | sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); | ||
128 | return 1; | ||
129 | } | ||
130 | VN_UNLOCK(vp, 0); | ||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | struct vnode * | ||
135 | vn_initialize( | ||
136 | struct inode *inode) | ||
137 | { | ||
138 | struct vnode *vp = LINVFS_GET_VP(inode); | ||
139 | |||
140 | XFS_STATS_INC(vn_active); | ||
141 | XFS_STATS_INC(vn_alloc); | ||
142 | |||
143 | vp->v_flag = VMODIFIED; | ||
144 | spinlock_init(&vp->v_lock, "v_lock"); | ||
145 | |||
146 | spin_lock(&vnumber_lock); | ||
147 | if (!++vn_generation) /* v_number shouldn't be zero */ | ||
148 | vn_generation++; | ||
149 | vp->v_number = vn_generation; | ||
150 | spin_unlock(&vnumber_lock); | ||
151 | |||
152 | ASSERT(VN_CACHED(vp) == 0); | ||
153 | |||
154 | /* Initialize the first behavior and the behavior chain head. */ | ||
155 | vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); | ||
156 | |||
157 | #ifdef XFS_VNODE_TRACE | ||
158 | vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); | ||
159 | printk("Allocated VNODE_TRACE at 0x%p\n", vp->v_trace); | ||
160 | #endif /* XFS_VNODE_TRACE */ | ||
161 | |||
162 | vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address); | ||
163 | return vp; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Get a reference on a vnode. | ||
168 | */ | ||
169 | vnode_t * | ||
170 | vn_get( | ||
171 | struct vnode *vp, | ||
172 | vmap_t *vmap) | ||
173 | { | ||
174 | struct inode *inode; | ||
175 | |||
176 | XFS_STATS_INC(vn_get); | ||
177 | inode = LINVFS_GET_IP(vp); | ||
178 | if (inode->i_state & I_FREEING) | ||
179 | return NULL; | ||
180 | |||
181 | inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino); | ||
182 | if (!inode) /* Inode not present */ | ||
183 | return NULL; | ||
184 | |||
185 | vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); | ||
186 | |||
187 | return vp; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Revalidate the Linux inode from the vattr. | ||
192 | * Note: i_size _not_ updated; we must hold the inode | ||
193 | * semaphore when doing that - callers responsibility. | ||
194 | */ | ||
195 | void | ||
196 | vn_revalidate_core( | ||
197 | struct vnode *vp, | ||
198 | vattr_t *vap) | ||
199 | { | ||
200 | struct inode *inode = LINVFS_GET_IP(vp); | ||
201 | |||
202 | inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; | ||
203 | inode->i_nlink = vap->va_nlink; | ||
204 | inode->i_uid = vap->va_uid; | ||
205 | inode->i_gid = vap->va_gid; | ||
206 | inode->i_blocks = vap->va_nblocks; | ||
207 | inode->i_mtime = vap->va_mtime; | ||
208 | inode->i_ctime = vap->va_ctime; | ||
209 | inode->i_atime = vap->va_atime; | ||
210 | if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) | ||
211 | inode->i_flags |= S_IMMUTABLE; | ||
212 | else | ||
213 | inode->i_flags &= ~S_IMMUTABLE; | ||
214 | if (vap->va_xflags & XFS_XFLAG_APPEND) | ||
215 | inode->i_flags |= S_APPEND; | ||
216 | else | ||
217 | inode->i_flags &= ~S_APPEND; | ||
218 | if (vap->va_xflags & XFS_XFLAG_SYNC) | ||
219 | inode->i_flags |= S_SYNC; | ||
220 | else | ||
221 | inode->i_flags &= ~S_SYNC; | ||
222 | if (vap->va_xflags & XFS_XFLAG_NOATIME) | ||
223 | inode->i_flags |= S_NOATIME; | ||
224 | else | ||
225 | inode->i_flags &= ~S_NOATIME; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Revalidate the Linux inode from the vnode. | ||
230 | */ | ||
231 | int | ||
232 | vn_revalidate( | ||
233 | struct vnode *vp) | ||
234 | { | ||
235 | vattr_t va; | ||
236 | int error; | ||
237 | |||
238 | vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address); | ||
239 | ASSERT(vp->v_fbhv != NULL); | ||
240 | |||
241 | va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS; | ||
242 | VOP_GETATTR(vp, &va, 0, NULL, error); | ||
243 | if (!error) { | ||
244 | vn_revalidate_core(vp, &va); | ||
245 | VUNMODIFY(vp); | ||
246 | } | ||
247 | return -error; | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * purge a vnode from the cache | ||
252 | * At this point the vnode is guaranteed to have no references (vn_count == 0) | ||
253 | * The caller has to make sure that there are no ways someone could | ||
254 | * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock). | ||
255 | */ | ||
256 | void | ||
257 | vn_purge( | ||
258 | struct vnode *vp, | ||
259 | vmap_t *vmap) | ||
260 | { | ||
261 | vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); | ||
262 | |||
263 | again: | ||
264 | /* | ||
265 | * Check whether vp has already been reclaimed since our caller | ||
266 | * sampled its version while holding a filesystem cache lock that | ||
267 | * its VOP_RECLAIM function acquires. | ||
268 | */ | ||
269 | VN_LOCK(vp); | ||
270 | if (vp->v_number != vmap->v_number) { | ||
271 | VN_UNLOCK(vp, 0); | ||
272 | return; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * If vp is being reclaimed or inactivated, wait until it is inert, | ||
277 | * then proceed. Can't assume that vnode is actually reclaimed | ||
278 | * just because the reclaimed flag is asserted -- a vn_alloc | ||
279 | * reclaim can fail. | ||
280 | */ | ||
281 | if (vp->v_flag & (VINACT | VRECLM)) { | ||
282 | ASSERT(vn_count(vp) == 0); | ||
283 | vp->v_flag |= VWAIT; | ||
284 | sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); | ||
285 | goto again; | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * Another process could have raced in and gotten this vnode... | ||
290 | */ | ||
291 | if (vn_count(vp) > 0) { | ||
292 | VN_UNLOCK(vp, 0); | ||
293 | return; | ||
294 | } | ||
295 | |||
296 | XFS_STATS_DEC(vn_active); | ||
297 | vp->v_flag |= VRECLM; | ||
298 | VN_UNLOCK(vp, 0); | ||
299 | |||
300 | /* | ||
301 | * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells | ||
302 | * vp's filesystem to flush and invalidate all cached resources. | ||
303 | * When vn_reclaim returns, vp should have no private data, | ||
304 | * either in a system cache or attached to v_data. | ||
305 | */ | ||
306 | if (vn_reclaim(vp) != 0) | ||
307 | panic("vn_purge: cannot reclaim"); | ||
308 | |||
309 | /* | ||
310 | * Wakeup anyone waiting for vp to be reclaimed. | ||
311 | */ | ||
312 | vn_wakeup(vp); | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * Add a reference to a referenced vnode. | ||
317 | */ | ||
318 | struct vnode * | ||
319 | vn_hold( | ||
320 | struct vnode *vp) | ||
321 | { | ||
322 | struct inode *inode; | ||
323 | |||
324 | XFS_STATS_INC(vn_hold); | ||
325 | |||
326 | VN_LOCK(vp); | ||
327 | inode = igrab(LINVFS_GET_IP(vp)); | ||
328 | ASSERT(inode); | ||
329 | VN_UNLOCK(vp, 0); | ||
330 | |||
331 | return vp; | ||
332 | } | ||
333 | |||
334 | /* | ||
335 | * Call VOP_INACTIVE on last reference. | ||
336 | */ | ||
337 | void | ||
338 | vn_rele( | ||
339 | struct vnode *vp) | ||
340 | { | ||
341 | int vcnt; | ||
342 | int cache; | ||
343 | |||
344 | XFS_STATS_INC(vn_rele); | ||
345 | |||
346 | VN_LOCK(vp); | ||
347 | |||
348 | vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address); | ||
349 | vcnt = vn_count(vp); | ||
350 | |||
351 | /* | ||
352 | * Since we always get called from put_inode we know | ||
353 | * that i_count won't be decremented after we | ||
354 | * return. | ||
355 | */ | ||
356 | if (!vcnt) { | ||
357 | /* | ||
358 | * As soon as we turn this on, noone can find us in vn_get | ||
359 | * until we turn off VINACT or VRECLM | ||
360 | */ | ||
361 | vp->v_flag |= VINACT; | ||
362 | VN_UNLOCK(vp, 0); | ||
363 | |||
364 | /* | ||
365 | * Do not make the VOP_INACTIVE call if there | ||
366 | * are no behaviors attached to the vnode to call. | ||
367 | */ | ||
368 | if (vp->v_fbhv) | ||
369 | VOP_INACTIVE(vp, NULL, cache); | ||
370 | |||
371 | VN_LOCK(vp); | ||
372 | if (vp->v_flag & VWAIT) | ||
373 | sv_broadcast(vptosync(vp)); | ||
374 | |||
375 | vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED); | ||
376 | } | ||
377 | |||
378 | VN_UNLOCK(vp, 0); | ||
379 | |||
380 | vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address); | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * Finish the removal of a vnode. | ||
385 | */ | ||
386 | void | ||
387 | vn_remove( | ||
388 | struct vnode *vp) | ||
389 | { | ||
390 | vmap_t vmap; | ||
391 | |||
392 | /* Make sure we don't do this to the same vnode twice */ | ||
393 | if (!(vp->v_fbhv)) | ||
394 | return; | ||
395 | |||
396 | XFS_STATS_INC(vn_remove); | ||
397 | vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address); | ||
398 | |||
399 | /* | ||
400 | * After the following purge the vnode | ||
401 | * will no longer exist. | ||
402 | */ | ||
403 | VMAP(vp, vmap); | ||
404 | vn_purge(vp, &vmap); | ||
405 | } | ||
406 | |||
407 | |||
408 | #ifdef XFS_VNODE_TRACE | ||
409 | |||
410 | #define KTRACE_ENTER(vp, vk, s, line, ra) \ | ||
411 | ktrace_enter( (vp)->v_trace, \ | ||
412 | /* 0 */ (void *)(__psint_t)(vk), \ | ||
413 | /* 1 */ (void *)(s), \ | ||
414 | /* 2 */ (void *)(__psint_t) line, \ | ||
415 | /* 3 */ (void *)(vn_count(vp)), \ | ||
416 | /* 4 */ (void *)(ra), \ | ||
417 | /* 5 */ (void *)(__psunsigned_t)(vp)->v_flag, \ | ||
418 | /* 6 */ (void *)(__psint_t)current_cpu(), \ | ||
419 | /* 7 */ (void *)(__psint_t)current_pid(), \ | ||
420 | /* 8 */ (void *)__return_address, \ | ||
421 | /* 9 */ 0, 0, 0, 0, 0, 0, 0) | ||
422 | |||
423 | /* | ||
424 | * Vnode tracing code. | ||
425 | */ | ||
426 | void | ||
427 | vn_trace_entry(vnode_t *vp, char *func, inst_t *ra) | ||
428 | { | ||
429 | KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra); | ||
430 | } | ||
431 | |||
432 | void | ||
433 | vn_trace_exit(vnode_t *vp, char *func, inst_t *ra) | ||
434 | { | ||
435 | KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra); | ||
436 | } | ||
437 | |||
438 | void | ||
439 | vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra) | ||
440 | { | ||
441 | KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra); | ||
442 | } | ||
443 | |||
444 | void | ||
445 | vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra) | ||
446 | { | ||
447 | KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra); | ||
448 | } | ||
449 | |||
450 | void | ||
451 | vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra) | ||
452 | { | ||
453 | KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra); | ||
454 | } | ||
455 | #endif /* XFS_VNODE_TRACE */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h new file mode 100644 index 000000000000..da76c1f1e11c --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_vnode.h | |||
@@ -0,0 +1,666 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | * | ||
32 | * Portions Copyright (c) 1989, 1993 | ||
33 | * The Regents of the University of California. All rights reserved. | ||
34 | * | ||
35 | * Redistribution and use in source and binary forms, with or without | ||
36 | * modification, are permitted provided that the following conditions | ||
37 | * are met: | ||
38 | * 1. Redistributions of source code must retain the above copyright | ||
39 | * notice, this list of conditions and the following disclaimer. | ||
40 | * 2. Redistributions in binary form must reproduce the above copyright | ||
41 | * notice, this list of conditions and the following disclaimer in the | ||
42 | * documentation and/or other materials provided with the distribution. | ||
43 | * 3. Neither the name of the University nor the names of its contributors | ||
44 | * may be used to endorse or promote products derived from this software | ||
45 | * without specific prior written permission. | ||
46 | * | ||
47 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | ||
48 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
49 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
50 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
51 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
52 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
53 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
54 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
55 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
56 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
57 | * SUCH DAMAGE. | ||
58 | */ | ||
59 | #ifndef __XFS_VNODE_H__ | ||
60 | #define __XFS_VNODE_H__ | ||
61 | |||
62 | struct uio; | ||
63 | struct file; | ||
64 | struct vattr; | ||
65 | struct xfs_iomap; | ||
66 | struct attrlist_cursor_kern; | ||
67 | |||
68 | /* | ||
69 | * Vnode types. VNON means no type. | ||
70 | */ | ||
71 | enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK }; | ||
72 | |||
73 | typedef xfs_ino_t vnumber_t; | ||
74 | typedef struct dentry vname_t; | ||
75 | typedef bhv_head_t vn_bhv_head_t; | ||
76 | |||
77 | /* | ||
78 | * MP locking protocols: | ||
79 | * v_flag, v_vfsp VN_LOCK/VN_UNLOCK | ||
80 | * v_type read-only or fs-dependent | ||
81 | */ | ||
82 | typedef struct vnode { | ||
83 | __u32 v_flag; /* vnode flags (see below) */ | ||
84 | enum vtype v_type; /* vnode type */ | ||
85 | struct vfs *v_vfsp; /* ptr to containing VFS */ | ||
86 | vnumber_t v_number; /* in-core vnode number */ | ||
87 | vn_bhv_head_t v_bh; /* behavior head */ | ||
88 | spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ | ||
89 | struct inode v_inode; /* Linux inode */ | ||
90 | #ifdef XFS_VNODE_TRACE | ||
91 | struct ktrace *v_trace; /* trace header structure */ | ||
92 | #endif | ||
93 | } vnode_t; | ||
94 | |||
95 | #define v_fbhv v_bh.bh_first /* first behavior */ | ||
96 | #define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ | ||
97 | |||
98 | #define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */ | ||
99 | #define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */ | ||
100 | #define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */ | ||
101 | |||
102 | typedef enum { | ||
103 | VN_BHV_UNKNOWN, /* not specified */ | ||
104 | VN_BHV_XFS, /* xfs */ | ||
105 | VN_BHV_DM, /* data migration */ | ||
106 | VN_BHV_QM, /* quota manager */ | ||
107 | VN_BHV_IO, /* IO path */ | ||
108 | VN_BHV_END /* housekeeping end-of-range */ | ||
109 | } vn_bhv_t; | ||
110 | |||
111 | #define VNODE_POSITION_XFS (VNODE_POSITION_BASE) | ||
112 | #define VNODE_POSITION_DM (VNODE_POSITION_BASE+10) | ||
113 | #define VNODE_POSITION_QM (VNODE_POSITION_BASE+20) | ||
114 | #define VNODE_POSITION_IO (VNODE_POSITION_BASE+30) | ||
115 | |||
116 | /* | ||
117 | * Macros for dealing with the behavior descriptor inside of the vnode. | ||
118 | */ | ||
119 | #define BHV_TO_VNODE(bdp) ((vnode_t *)BHV_VOBJ(bdp)) | ||
120 | #define BHV_TO_VNODE_NULL(bdp) ((vnode_t *)BHV_VOBJNULL(bdp)) | ||
121 | |||
122 | #define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh))) | ||
123 | #define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name) | ||
124 | #define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp) | ||
125 | #define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops) | ||
126 | #define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops) | ||
127 | |||
128 | /* | ||
129 | * Vnode to Linux inode mapping. | ||
130 | */ | ||
131 | #define LINVFS_GET_VP(inode) ((vnode_t *)list_entry(inode, vnode_t, v_inode)) | ||
132 | #define LINVFS_GET_IP(vp) (&(vp)->v_inode) | ||
133 | |||
134 | /* | ||
135 | * Convert between vnode types and inode formats (since POSIX.1 | ||
136 | * defines mode word of stat structure in terms of inode formats). | ||
137 | */ | ||
138 | extern enum vtype iftovt_tab[]; | ||
139 | extern u_short vttoif_tab[]; | ||
140 | #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) | ||
141 | #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) | ||
142 | #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) | ||
143 | |||
144 | |||
145 | /* | ||
146 | * Vnode flags. | ||
147 | */ | ||
148 | #define VINACT 0x1 /* vnode is being inactivated */ | ||
149 | #define VRECLM 0x2 /* vnode is being reclaimed */ | ||
150 | #define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */ | ||
151 | #define VMODIFIED 0x8 /* XFS inode state possibly differs */ | ||
152 | /* to the Linux inode state. */ | ||
153 | |||
154 | /* | ||
155 | * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter. | ||
156 | */ | ||
157 | typedef enum vrwlock { | ||
158 | VRWLOCK_NONE, | ||
159 | VRWLOCK_READ, | ||
160 | VRWLOCK_WRITE, | ||
161 | VRWLOCK_WRITE_DIRECT, | ||
162 | VRWLOCK_TRY_READ, | ||
163 | VRWLOCK_TRY_WRITE | ||
164 | } vrwlock_t; | ||
165 | |||
166 | /* | ||
167 | * Return values for VOP_INACTIVE. A return value of | ||
168 | * VN_INACTIVE_NOCACHE implies that the file system behavior | ||
169 | * has disassociated its state and bhv_desc_t from the vnode. | ||
170 | */ | ||
171 | #define VN_INACTIVE_CACHE 0 | ||
172 | #define VN_INACTIVE_NOCACHE 1 | ||
173 | |||
174 | /* | ||
175 | * Values for the cmd code given to VOP_VNODE_CHANGE. | ||
176 | */ | ||
177 | typedef enum vchange { | ||
178 | VCHANGE_FLAGS_FRLOCKS = 0, | ||
179 | VCHANGE_FLAGS_ENF_LOCKING = 1, | ||
180 | VCHANGE_FLAGS_TRUNCATED = 2, | ||
181 | VCHANGE_FLAGS_PAGE_DIRTY = 3, | ||
182 | VCHANGE_FLAGS_IOEXCL_COUNT = 4 | ||
183 | } vchange_t; | ||
184 | |||
185 | |||
186 | typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); | ||
187 | typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *, | ||
188 | const struct iovec *, unsigned int, | ||
189 | loff_t *, int, struct cred *); | ||
190 | typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *, | ||
191 | const struct iovec *, unsigned int, | ||
192 | loff_t *, int, struct cred *); | ||
193 | typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, | ||
194 | loff_t *, int, size_t, read_actor_t, | ||
195 | void *, struct cred *); | ||
196 | typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, | ||
197 | int, unsigned int, void __user *); | ||
198 | typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int, | ||
199 | struct cred *); | ||
200 | typedef int (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int, | ||
201 | struct cred *); | ||
202 | typedef int (*vop_access_t)(bhv_desc_t *, int, struct cred *); | ||
203 | typedef int (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **, | ||
204 | int, vnode_t *, struct cred *); | ||
205 | typedef int (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *, | ||
206 | vnode_t **, struct cred *); | ||
207 | typedef int (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *); | ||
208 | typedef int (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *, | ||
209 | struct cred *); | ||
210 | typedef int (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *, | ||
211 | struct cred *); | ||
212 | typedef int (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *, | ||
213 | vnode_t **, struct cred *); | ||
214 | typedef int (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *); | ||
215 | typedef int (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *, | ||
216 | int *); | ||
217 | typedef int (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *, | ||
218 | char *, vnode_t **, struct cred *); | ||
219 | typedef int (*vop_readlink_t)(bhv_desc_t *, struct uio *, int, | ||
220 | struct cred *); | ||
221 | typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *, | ||
222 | xfs_off_t, xfs_off_t); | ||
223 | typedef int (*vop_inactive_t)(bhv_desc_t *, struct cred *); | ||
224 | typedef int (*vop_fid2_t)(bhv_desc_t *, struct fid *); | ||
225 | typedef int (*vop_release_t)(bhv_desc_t *); | ||
226 | typedef int (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t); | ||
227 | typedef void (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t); | ||
228 | typedef int (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, | ||
229 | struct xfs_iomap *, int *); | ||
230 | typedef int (*vop_reclaim_t)(bhv_desc_t *); | ||
231 | typedef int (*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int, | ||
232 | struct cred *); | ||
233 | typedef int (*vop_attr_set_t)(bhv_desc_t *, char *, char *, int, int, | ||
234 | struct cred *); | ||
235 | typedef int (*vop_attr_remove_t)(bhv_desc_t *, char *, int, struct cred *); | ||
236 | typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int, | ||
237 | struct attrlist_cursor_kern *, struct cred *); | ||
238 | typedef void (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int); | ||
239 | typedef void (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t); | ||
240 | typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); | ||
241 | typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); | ||
242 | typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, | ||
243 | uint64_t, int); | ||
244 | typedef int (*vop_iflush_t)(bhv_desc_t *, int); | ||
245 | |||
246 | |||
247 | typedef struct vnodeops { | ||
248 | bhv_position_t vn_position; /* position within behavior chain */ | ||
249 | vop_open_t vop_open; | ||
250 | vop_read_t vop_read; | ||
251 | vop_write_t vop_write; | ||
252 | vop_sendfile_t vop_sendfile; | ||
253 | vop_ioctl_t vop_ioctl; | ||
254 | vop_getattr_t vop_getattr; | ||
255 | vop_setattr_t vop_setattr; | ||
256 | vop_access_t vop_access; | ||
257 | vop_lookup_t vop_lookup; | ||
258 | vop_create_t vop_create; | ||
259 | vop_remove_t vop_remove; | ||
260 | vop_link_t vop_link; | ||
261 | vop_rename_t vop_rename; | ||
262 | vop_mkdir_t vop_mkdir; | ||
263 | vop_rmdir_t vop_rmdir; | ||
264 | vop_readdir_t vop_readdir; | ||
265 | vop_symlink_t vop_symlink; | ||
266 | vop_readlink_t vop_readlink; | ||
267 | vop_fsync_t vop_fsync; | ||
268 | vop_inactive_t vop_inactive; | ||
269 | vop_fid2_t vop_fid2; | ||
270 | vop_rwlock_t vop_rwlock; | ||
271 | vop_rwunlock_t vop_rwunlock; | ||
272 | vop_bmap_t vop_bmap; | ||
273 | vop_reclaim_t vop_reclaim; | ||
274 | vop_attr_get_t vop_attr_get; | ||
275 | vop_attr_set_t vop_attr_set; | ||
276 | vop_attr_remove_t vop_attr_remove; | ||
277 | vop_attr_list_t vop_attr_list; | ||
278 | vop_link_removed_t vop_link_removed; | ||
279 | vop_vnode_change_t vop_vnode_change; | ||
280 | vop_ptossvp_t vop_tosspages; | ||
281 | vop_pflushinvalvp_t vop_flushinval_pages; | ||
282 | vop_pflushvp_t vop_flush_pages; | ||
283 | vop_release_t vop_release; | ||
284 | vop_iflush_t vop_iflush; | ||
285 | } vnodeops_t; | ||
286 | |||
287 | /* | ||
288 | * VOP's. | ||
289 | */ | ||
290 | #define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op) | ||
291 | |||
292 | #define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \ | ||
293 | rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) | ||
294 | #define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \ | ||
295 | rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) | ||
296 | #define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \ | ||
297 | rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr) | ||
298 | #define VOP_BMAP(vp,of,sz,rw,b,n,rv) \ | ||
299 | rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) | ||
300 | #define VOP_OPEN(vp, cr, rv) \ | ||
301 | rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr) | ||
302 | #define VOP_GETATTR(vp, vap, f, cr, rv) \ | ||
303 | rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr) | ||
304 | #define VOP_SETATTR(vp, vap, f, cr, rv) \ | ||
305 | rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr) | ||
306 | #define VOP_ACCESS(vp, mode, cr, rv) \ | ||
307 | rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr) | ||
308 | #define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv) \ | ||
309 | rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr) | ||
310 | #define VOP_CREATE(dvp,d,vap,vpp,cr,rv) \ | ||
311 | rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr) | ||
312 | #define VOP_REMOVE(dvp,d,cr,rv) \ | ||
313 | rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr) | ||
314 | #define VOP_LINK(tdvp,fvp,d,cr,rv) \ | ||
315 | rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr) | ||
316 | #define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv) \ | ||
317 | rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr) | ||
318 | #define VOP_MKDIR(dp,d,vap,vpp,cr,rv) \ | ||
319 | rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr) | ||
320 | #define VOP_RMDIR(dp,d,cr,rv) \ | ||
321 | rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr) | ||
322 | #define VOP_READDIR(vp,uiop,cr,eofp,rv) \ | ||
323 | rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp) | ||
324 | #define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv) \ | ||
325 | rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr) | ||
326 | #define VOP_READLINK(vp,uiop,fl,cr,rv) \ | ||
327 | rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr) | ||
328 | #define VOP_FSYNC(vp,f,cr,b,e,rv) \ | ||
329 | rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e) | ||
330 | #define VOP_INACTIVE(vp, cr, rv) \ | ||
331 | rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr) | ||
332 | #define VOP_RELEASE(vp, rv) \ | ||
333 | rv = _VOP_(vop_release, vp)((vp)->v_fbhv) | ||
334 | #define VOP_FID2(vp, fidp, rv) \ | ||
335 | rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp) | ||
336 | #define VOP_RWLOCK(vp,i) \ | ||
337 | (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i) | ||
338 | #define VOP_RWLOCK_TRY(vp,i) \ | ||
339 | _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i) | ||
340 | #define VOP_RWUNLOCK(vp,i) \ | ||
341 | (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i) | ||
342 | #define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv) \ | ||
343 | rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr) | ||
344 | #define VOP_RECLAIM(vp, rv) \ | ||
345 | rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv) | ||
346 | #define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv) \ | ||
347 | rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred) | ||
348 | #define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv) \ | ||
349 | rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred) | ||
350 | #define VOP_ATTR_REMOVE(vp, name, flags, cred, rv) \ | ||
351 | rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred) | ||
352 | #define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv) \ | ||
353 | rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred) | ||
354 | #define VOP_LINK_REMOVED(vp, dvp, linkzero) \ | ||
355 | (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero) | ||
356 | #define VOP_VNODE_CHANGE(vp, cmd, val) \ | ||
357 | (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val) | ||
358 | /* | ||
359 | * These are page cache functions that now go thru VOPs. | ||
360 | * 'last' parameter is unused and left in for IRIX compatibility | ||
361 | */ | ||
362 | #define VOP_TOSS_PAGES(vp, first, last, fiopt) \ | ||
363 | _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt) | ||
364 | /* | ||
365 | * 'last' parameter is unused and left in for IRIX compatibility | ||
366 | */ | ||
367 | #define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt) \ | ||
368 | _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt) | ||
369 | /* | ||
370 | * 'last' parameter is unused and left in for IRIX compatibility | ||
371 | */ | ||
372 | #define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv) \ | ||
373 | rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt) | ||
374 | #define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv) \ | ||
375 | rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg) | ||
376 | #define VOP_IFLUSH(vp, flags, rv) \ | ||
377 | rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags) | ||
378 | |||
379 | /* | ||
380 | * Flags for read/write calls - same values as IRIX | ||
381 | */ | ||
382 | #define IO_ISAIO 0x00001 /* don't wait for completion */ | ||
383 | #define IO_ISDIRECT 0x00004 /* bypass page cache */ | ||
384 | #define IO_INVIS 0x00020 /* don't update inode timestamps */ | ||
385 | |||
386 | /* | ||
387 | * Flags for VOP_IFLUSH call | ||
388 | */ | ||
389 | #define FLUSH_SYNC 1 /* wait for flush to complete */ | ||
390 | #define FLUSH_INODE 2 /* flush the inode itself */ | ||
391 | #define FLUSH_LOG 4 /* force the last log entry for | ||
392 | * this inode out to disk */ | ||
393 | |||
394 | /* | ||
395 | * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and | ||
396 | * VOP_FLUSH_PAGES. | ||
397 | */ | ||
398 | #define FI_NONE 0 /* none */ | ||
399 | #define FI_REMAPF 1 /* Do a remapf prior to the operation */ | ||
400 | #define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. | ||
401 | Prevent VM access to the pages until | ||
402 | the operation completes. */ | ||
403 | |||
404 | /* | ||
405 | * Vnode attributes. va_mask indicates those attributes the caller | ||
406 | * wants to set or extract. | ||
407 | */ | ||
408 | typedef struct vattr { | ||
409 | int va_mask; /* bit-mask of attributes present */ | ||
410 | enum vtype va_type; /* vnode type (for create) */ | ||
411 | mode_t va_mode; /* file access mode and type */ | ||
412 | nlink_t va_nlink; /* number of references to file */ | ||
413 | uid_t va_uid; /* owner user id */ | ||
414 | gid_t va_gid; /* owner group id */ | ||
415 | xfs_ino_t va_nodeid; /* file id */ | ||
416 | xfs_off_t va_size; /* file size in bytes */ | ||
417 | u_long va_blocksize; /* blocksize preferred for i/o */ | ||
418 | struct timespec va_atime; /* time of last access */ | ||
419 | struct timespec va_mtime; /* time of last modification */ | ||
420 | struct timespec va_ctime; /* time file changed */ | ||
421 | u_int va_gen; /* generation number of file */ | ||
422 | xfs_dev_t va_rdev; /* device the special file represents */ | ||
423 | __int64_t va_nblocks; /* number of blocks allocated */ | ||
424 | u_long va_xflags; /* random extended file flags */ | ||
425 | u_long va_extsize; /* file extent size */ | ||
426 | u_long va_nextents; /* number of extents in file */ | ||
427 | u_long va_anextents; /* number of attr extents in file */ | ||
428 | int va_projid; /* project id */ | ||
429 | } vattr_t; | ||
430 | |||
431 | /* | ||
432 | * setattr or getattr attributes | ||
433 | */ | ||
434 | #define XFS_AT_TYPE 0x00000001 | ||
435 | #define XFS_AT_MODE 0x00000002 | ||
436 | #define XFS_AT_UID 0x00000004 | ||
437 | #define XFS_AT_GID 0x00000008 | ||
438 | #define XFS_AT_FSID 0x00000010 | ||
439 | #define XFS_AT_NODEID 0x00000020 | ||
440 | #define XFS_AT_NLINK 0x00000040 | ||
441 | #define XFS_AT_SIZE 0x00000080 | ||
442 | #define XFS_AT_ATIME 0x00000100 | ||
443 | #define XFS_AT_MTIME 0x00000200 | ||
444 | #define XFS_AT_CTIME 0x00000400 | ||
445 | #define XFS_AT_RDEV 0x00000800 | ||
446 | #define XFS_AT_BLKSIZE 0x00001000 | ||
447 | #define XFS_AT_NBLOCKS 0x00002000 | ||
448 | #define XFS_AT_VCODE 0x00004000 | ||
449 | #define XFS_AT_MAC 0x00008000 | ||
450 | #define XFS_AT_UPDATIME 0x00010000 | ||
451 | #define XFS_AT_UPDMTIME 0x00020000 | ||
452 | #define XFS_AT_UPDCTIME 0x00040000 | ||
453 | #define XFS_AT_ACL 0x00080000 | ||
454 | #define XFS_AT_CAP 0x00100000 | ||
455 | #define XFS_AT_INF 0x00200000 | ||
456 | #define XFS_AT_XFLAGS 0x00400000 | ||
457 | #define XFS_AT_EXTSIZE 0x00800000 | ||
458 | #define XFS_AT_NEXTENTS 0x01000000 | ||
459 | #define XFS_AT_ANEXTENTS 0x02000000 | ||
460 | #define XFS_AT_PROJID 0x04000000 | ||
461 | #define XFS_AT_SIZE_NOPERM 0x08000000 | ||
462 | #define XFS_AT_GENCOUNT 0x10000000 | ||
463 | |||
464 | #define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ | ||
465 | XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ | ||
466 | XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ | ||
467 | XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\ | ||
468 | XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\ | ||
469 | XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT) | ||
470 | |||
471 | #define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ | ||
472 | XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ | ||
473 | XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ | ||
474 | XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID) | ||
475 | |||
476 | #define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME) | ||
477 | |||
478 | #define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME) | ||
479 | |||
480 | #define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\ | ||
481 | XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\ | ||
482 | XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT) | ||
483 | |||
484 | /* | ||
485 | * Modes. | ||
486 | */ | ||
487 | #define VSUID S_ISUID /* set user id on execution */ | ||
488 | #define VSGID S_ISGID /* set group id on execution */ | ||
489 | #define VSVTX S_ISVTX /* save swapped text even after use */ | ||
490 | #define VREAD S_IRUSR /* read, write, execute permissions */ | ||
491 | #define VWRITE S_IWUSR | ||
492 | #define VEXEC S_IXUSR | ||
493 | |||
494 | #define MODEMASK S_IALLUGO /* mode bits plus permission bits */ | ||
495 | |||
496 | /* | ||
497 | * Check whether mandatory file locking is enabled. | ||
498 | */ | ||
499 | #define MANDLOCK(vp, mode) \ | ||
500 | ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) | ||
501 | |||
502 | extern void vn_init(void); | ||
503 | extern int vn_wait(struct vnode *); | ||
504 | extern vnode_t *vn_initialize(struct inode *); | ||
505 | |||
506 | /* | ||
507 | * Acquiring and invalidating vnodes: | ||
508 | * | ||
509 | * if (vn_get(vp, version, 0)) | ||
510 | * ...; | ||
511 | * vn_purge(vp, version); | ||
512 | * | ||
513 | * vn_get and vn_purge must be called with vmap_t arguments, sampled | ||
514 | * while a lock that the vnode's VOP_RECLAIM function acquires is | ||
515 | * held, to ensure that the vnode sampled with the lock held isn't | ||
516 | * recycled (VOP_RECLAIMed) or deallocated between the release of the lock | ||
517 | * and the subsequent vn_get or vn_purge. | ||
518 | */ | ||
519 | |||
520 | /* | ||
521 | * vnode_map structures _must_ match vn_epoch and vnode structure sizes. | ||
522 | */ | ||
523 | typedef struct vnode_map { | ||
524 | vfs_t *v_vfsp; | ||
525 | vnumber_t v_number; /* in-core vnode number */ | ||
526 | xfs_ino_t v_ino; /* inode # */ | ||
527 | } vmap_t; | ||
528 | |||
529 | #define VMAP(vp, vmap) {(vmap).v_vfsp = (vp)->v_vfsp, \ | ||
530 | (vmap).v_number = (vp)->v_number, \ | ||
531 | (vmap).v_ino = (vp)->v_inode.i_ino; } | ||
532 | |||
533 | extern void vn_purge(struct vnode *, vmap_t *); | ||
534 | extern vnode_t *vn_get(struct vnode *, vmap_t *); | ||
535 | extern int vn_revalidate(struct vnode *); | ||
536 | extern void vn_revalidate_core(struct vnode *, vattr_t *); | ||
537 | extern void vn_remove(struct vnode *); | ||
538 | |||
539 | static inline int vn_count(struct vnode *vp) | ||
540 | { | ||
541 | return atomic_read(&LINVFS_GET_IP(vp)->i_count); | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * Vnode reference counting functions (and macros for compatibility). | ||
546 | */ | ||
547 | extern vnode_t *vn_hold(struct vnode *); | ||
548 | extern void vn_rele(struct vnode *); | ||
549 | |||
550 | #if defined(XFS_VNODE_TRACE) | ||
551 | #define VN_HOLD(vp) \ | ||
552 | ((void)vn_hold(vp), \ | ||
553 | vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address)) | ||
554 | #define VN_RELE(vp) \ | ||
555 | (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \ | ||
556 | iput(LINVFS_GET_IP(vp))) | ||
557 | #else | ||
558 | #define VN_HOLD(vp) ((void)vn_hold(vp)) | ||
559 | #define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) | ||
560 | #endif | ||
561 | |||
562 | /* | ||
563 | * Vname handling macros. | ||
564 | */ | ||
565 | #define VNAME(dentry) ((char *) (dentry)->d_name.name) | ||
566 | #define VNAMELEN(dentry) ((dentry)->d_name.len) | ||
567 | #define VNAME_TO_VNODE(dentry) (LINVFS_GET_VP((dentry)->d_inode)) | ||
568 | |||
569 | /* | ||
570 | * Vnode spinlock manipulation. | ||
571 | */ | ||
572 | #define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock) | ||
573 | #define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s) | ||
574 | #define VN_FLAGSET(vp,b) vn_flagset(vp,b) | ||
575 | #define VN_FLAGCLR(vp,b) vn_flagclr(vp,b) | ||
576 | |||
577 | static __inline__ void vn_flagset(struct vnode *vp, uint flag) | ||
578 | { | ||
579 | spin_lock(&vp->v_lock); | ||
580 | vp->v_flag |= flag; | ||
581 | spin_unlock(&vp->v_lock); | ||
582 | } | ||
583 | |||
584 | static __inline__ void vn_flagclr(struct vnode *vp, uint flag) | ||
585 | { | ||
586 | spin_lock(&vp->v_lock); | ||
587 | vp->v_flag &= ~flag; | ||
588 | spin_unlock(&vp->v_lock); | ||
589 | } | ||
590 | |||
591 | /* | ||
592 | * Update modify/access/change times on the vnode | ||
593 | */ | ||
594 | #define VN_MTIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_mtime = *(tvp)) | ||
595 | #define VN_ATIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_atime = *(tvp)) | ||
596 | #define VN_CTIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_ctime = *(tvp)) | ||
597 | |||
598 | /* | ||
599 | * Dealing with bad inodes | ||
600 | */ | ||
601 | static inline void vn_mark_bad(struct vnode *vp) | ||
602 | { | ||
603 | make_bad_inode(LINVFS_GET_IP(vp)); | ||
604 | } | ||
605 | |||
606 | static inline int VN_BAD(struct vnode *vp) | ||
607 | { | ||
608 | return is_bad_inode(LINVFS_GET_IP(vp)); | ||
609 | } | ||
610 | |||
611 | /* | ||
612 | * Some useful predicates. | ||
613 | */ | ||
614 | #define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping) | ||
615 | #define VN_CACHED(vp) (LINVFS_GET_IP(vp)->i_mapping->nrpages) | ||
616 | #define VN_DIRTY(vp) mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \ | ||
617 | PAGECACHE_TAG_DIRTY) | ||
618 | #define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED) | ||
619 | #define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED) | ||
620 | |||
621 | /* | ||
622 | * Flags to VOP_SETATTR/VOP_GETATTR. | ||
623 | */ | ||
624 | #define ATTR_UTIME 0x01 /* non-default utime(2) request */ | ||
625 | #define ATTR_DMI 0x08 /* invocation from a DMI function */ | ||
626 | #define ATTR_LAZY 0x80 /* set/get attributes lazily */ | ||
627 | #define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */ | ||
628 | |||
629 | /* | ||
630 | * Flags to VOP_FSYNC and VOP_RECLAIM. | ||
631 | */ | ||
632 | #define FSYNC_NOWAIT 0 /* asynchronous flush */ | ||
633 | #define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ | ||
634 | #define FSYNC_INVAL 0x2 /* flush and invalidate cached data */ | ||
635 | #define FSYNC_DATA 0x4 /* synchronous fsync of data only */ | ||
636 | |||
637 | /* | ||
638 | * Tracking vnode activity. | ||
639 | */ | ||
640 | #if defined(XFS_VNODE_TRACE) | ||
641 | |||
642 | #define VNODE_TRACE_SIZE 16 /* number of trace entries */ | ||
643 | #define VNODE_KTRACE_ENTRY 1 | ||
644 | #define VNODE_KTRACE_EXIT 2 | ||
645 | #define VNODE_KTRACE_HOLD 3 | ||
646 | #define VNODE_KTRACE_REF 4 | ||
647 | #define VNODE_KTRACE_RELE 5 | ||
648 | |||
649 | extern void vn_trace_entry(struct vnode *, char *, inst_t *); | ||
650 | extern void vn_trace_exit(struct vnode *, char *, inst_t *); | ||
651 | extern void vn_trace_hold(struct vnode *, char *, int, inst_t *); | ||
652 | extern void vn_trace_ref(struct vnode *, char *, int, inst_t *); | ||
653 | extern void vn_trace_rele(struct vnode *, char *, int, inst_t *); | ||
654 | |||
655 | #define VN_TRACE(vp) \ | ||
656 | vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address) | ||
657 | #else | ||
658 | #define vn_trace_entry(a,b,c) | ||
659 | #define vn_trace_exit(a,b,c) | ||
660 | #define vn_trace_hold(a,b,c,d) | ||
661 | #define vn_trace_ref(a,b,c,d) | ||
662 | #define vn_trace_rele(a,b,c,d) | ||
663 | #define VN_TRACE(vp) | ||
664 | #endif | ||
665 | |||
666 | #endif /* __XFS_VNODE_H__ */ | ||