aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/kmem.c134
-rw-r--r--fs/xfs/linux-2.6/kmem.h157
-rw-r--r--fs/xfs/linux-2.6/mrlock.h106
-rw-r--r--fs/xfs/linux-2.6/mutex.h53
-rw-r--r--fs/xfs/linux-2.6/sema.h67
-rw-r--r--fs/xfs/linux-2.6/spin.h56
-rw-r--r--fs/xfs/linux-2.6/sv.h89
-rw-r--r--fs/xfs/linux-2.6/time.h51
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1275
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1980
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h591
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h50
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c205
-rw-r--r--fs/xfs/linux-2.6/xfs_export.h122
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c573
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c124
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h48
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c74
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h44
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c1336
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c163
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h34
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c680
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h51
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h374
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c1082
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h116
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c132
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h166
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c912
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h138
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c174
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h114
-rw-r--r--fs/xfs/linux-2.6/xfs_version.h44
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.c330
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h223
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c455
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h666
38 files changed, 12989 insertions, 0 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
new file mode 100644
index 000000000000..364ea8c386b1
--- /dev/null
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -0,0 +1,134 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include <linux/sched.h>
34#include <linux/mm.h>
35#include <linux/vmalloc.h>
36#include <linux/highmem.h>
37#include <linux/swap.h>
38#include <linux/blkdev.h>
39
40#include "time.h"
41#include "kmem.h"
42
43#define MAX_VMALLOCS 6
44#define MAX_SLAB_SIZE 0x20000
45
46
47void *
48kmem_alloc(size_t size, int flags)
49{
50 int retries = 0;
51 int lflags = kmem_flags_convert(flags);
52 void *ptr;
53
54 do {
55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
56 ptr = kmalloc(size, lflags);
57 else
58 ptr = __vmalloc(size, lflags, PAGE_KERNEL);
59 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
60 return ptr;
61 if (!(++retries % 100))
62 printk(KERN_ERR "XFS: possible memory allocation "
63 "deadlock in %s (mode:0x%x)\n",
64 __FUNCTION__, lflags);
65 blk_congestion_wait(WRITE, HZ/50);
66 } while (1);
67}
68
69void *
70kmem_zalloc(size_t size, int flags)
71{
72 void *ptr;
73
74 ptr = kmem_alloc(size, flags);
75 if (ptr)
76 memset((char *)ptr, 0, (int)size);
77 return ptr;
78}
79
80void
81kmem_free(void *ptr, size_t size)
82{
83 if (((unsigned long)ptr < VMALLOC_START) ||
84 ((unsigned long)ptr >= VMALLOC_END)) {
85 kfree(ptr);
86 } else {
87 vfree(ptr);
88 }
89}
90
91void *
92kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
93{
94 void *new;
95
96 new = kmem_alloc(newsize, flags);
97 if (ptr) {
98 if (new)
99 memcpy(new, ptr,
100 ((oldsize < newsize) ? oldsize : newsize));
101 kmem_free(ptr, oldsize);
102 }
103 return new;
104}
105
106void *
107kmem_zone_alloc(kmem_zone_t *zone, int flags)
108{
109 int retries = 0;
110 int lflags = kmem_flags_convert(flags);
111 void *ptr;
112
113 do {
114 ptr = kmem_cache_alloc(zone, lflags);
115 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
116 return ptr;
117 if (!(++retries % 100))
118 printk(KERN_ERR "XFS: possible memory allocation "
119 "deadlock in %s (mode:0x%x)\n",
120 __FUNCTION__, lflags);
121 blk_congestion_wait(WRITE, HZ/50);
122 } while (1);
123}
124
125void *
126kmem_zone_zalloc(kmem_zone_t *zone, int flags)
127{
128 void *ptr;
129
130 ptr = kmem_zone_alloc(zone, flags);
131 if (ptr)
132 memset((char *)ptr, 0, kmem_cache_size(zone));
133 return ptr;
134}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
new file mode 100644
index 000000000000..1397b669b059
--- /dev/null
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -0,0 +1,157 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUPPORT_KMEM_H__
33#define __XFS_SUPPORT_KMEM_H__
34
35#include <linux/slab.h>
36#include <linux/sched.h>
37#include <linux/mm.h>
38
39/*
40 * memory management routines
41 */
42#define KM_SLEEP 0x0001
43#define KM_NOSLEEP 0x0002
44#define KM_NOFS 0x0004
45#define KM_MAYFAIL 0x0008
46
47#define kmem_zone kmem_cache_s
48#define kmem_zone_t kmem_cache_t
49
50typedef unsigned long xfs_pflags_t;
51
52#define PFLAGS_TEST_NOIO() (current->flags & PF_NOIO)
53#define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS)
54
55#define PFLAGS_SET_NOIO() do { \
56 current->flags |= PF_NOIO; \
57} while (0)
58
59#define PFLAGS_CLEAR_NOIO() do { \
60 current->flags &= ~PF_NOIO; \
61} while (0)
62
63/* these could be nested, so we save state */
64#define PFLAGS_SET_FSTRANS(STATEP) do { \
65 *(STATEP) = current->flags; \
66 current->flags |= PF_FSTRANS; \
67} while (0)
68
69#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \
70 *(STATEP) = current->flags; \
71 current->flags &= ~PF_FSTRANS; \
72} while (0)
73
74/* Restore the PF_FSTRANS state to what was saved in STATEP */
75#define PFLAGS_RESTORE_FSTRANS(STATEP) do { \
76 current->flags = ((current->flags & ~PF_FSTRANS) | \
77 (*(STATEP) & PF_FSTRANS)); \
78} while (0)
79
80#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
81 *(NSTATEP) = *(OSTATEP); \
82} while (0)
83
84static __inline unsigned int kmem_flags_convert(int flags)
85{
86 int lflags = __GFP_NOWARN; /* we'll report problems, if need be */
87
88#ifdef DEBUG
89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
90 printk(KERN_WARNING
91 "XFS: memory allocation with wrong flags (%x)\n", flags);
92 BUG();
93 }
94#endif
95
96 if (flags & KM_NOSLEEP) {
97 lflags |= GFP_ATOMIC;
98 } else {
99 lflags |= GFP_KERNEL;
100
101 /* avoid recusive callbacks to filesystem during transactions */
102 if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
103 lflags &= ~__GFP_FS;
104 }
105
106 return lflags;
107}
108
109static __inline kmem_zone_t *
110kmem_zone_init(int size, char *zone_name)
111{
112 return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
113}
114
115static __inline void
116kmem_zone_free(kmem_zone_t *zone, void *ptr)
117{
118 kmem_cache_free(zone, ptr);
119}
120
121static __inline void
122kmem_zone_destroy(kmem_zone_t *zone)
123{
124 if (zone && kmem_cache_destroy(zone))
125 BUG();
126}
127
128extern void *kmem_zone_zalloc(kmem_zone_t *, int);
129extern void *kmem_zone_alloc(kmem_zone_t *, int);
130
131extern void *kmem_alloc(size_t, int);
132extern void *kmem_realloc(void *, size_t, size_t, int);
133extern void *kmem_zalloc(size_t, int);
134extern void kmem_free(void *, size_t);
135
136typedef struct shrinker *kmem_shaker_t;
137typedef int (*kmem_shake_func_t)(int, unsigned int);
138
139static __inline kmem_shaker_t
140kmem_shake_register(kmem_shake_func_t sfunc)
141{
142 return set_shrinker(DEFAULT_SEEKS, sfunc);
143}
144
145static __inline void
146kmem_shake_deregister(kmem_shaker_t shrinker)
147{
148 remove_shrinker(shrinker);
149}
150
151static __inline int
152kmem_shake_allow(unsigned int gfp_mask)
153{
154 return (gfp_mask & __GFP_WAIT);
155}
156
157#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
new file mode 100644
index 000000000000..d2c11a098ff2
--- /dev/null
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -0,0 +1,106 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUPPORT_MRLOCK_H__
33#define __XFS_SUPPORT_MRLOCK_H__
34
35#include <linux/rwsem.h>
36
37enum { MR_NONE, MR_ACCESS, MR_UPDATE };
38
39typedef struct {
40 struct rw_semaphore mr_lock;
41 int mr_writer;
42} mrlock_t;
43
44#define mrinit(mrp, name) \
45 ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) )
46#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
47#define mrfree(mrp) do { } while (0)
48#define mraccess(mrp) mraccessf(mrp, 0)
49#define mrupdate(mrp) mrupdatef(mrp, 0)
50
51static inline void mraccessf(mrlock_t *mrp, int flags)
52{
53 down_read(&mrp->mr_lock);
54}
55
56static inline void mrupdatef(mrlock_t *mrp, int flags)
57{
58 down_write(&mrp->mr_lock);
59 mrp->mr_writer = 1;
60}
61
62static inline int mrtryaccess(mrlock_t *mrp)
63{
64 return down_read_trylock(&mrp->mr_lock);
65}
66
67static inline int mrtryupdate(mrlock_t *mrp)
68{
69 if (!down_write_trylock(&mrp->mr_lock))
70 return 0;
71 mrp->mr_writer = 1;
72 return 1;
73}
74
75static inline void mrunlock(mrlock_t *mrp)
76{
77 if (mrp->mr_writer) {
78 mrp->mr_writer = 0;
79 up_write(&mrp->mr_lock);
80 } else {
81 up_read(&mrp->mr_lock);
82 }
83}
84
85static inline void mrdemote(mrlock_t *mrp)
86{
87 mrp->mr_writer = 0;
88 downgrade_write(&mrp->mr_lock);
89}
90
91#ifdef DEBUG
92/*
93 * Debug-only routine, without some platform-specific asm code, we can
94 * now only answer requests regarding whether we hold the lock for write
95 * (reader state is outside our visibility, we only track writer state).
96 * Note: means !ismrlocked would give false positivies, so don't do that.
97 */
98static inline int ismrlocked(mrlock_t *mrp, int type)
99{
100 if (mrp && type == MR_UPDATE)
101 return mrp->mr_writer;
102 return 1;
103}
104#endif
105
106#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
new file mode 100644
index 000000000000..0b296bb944cb
--- /dev/null
+++ b/fs/xfs/linux-2.6/mutex.h
@@ -0,0 +1,53 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUPPORT_MUTEX_H__
33#define __XFS_SUPPORT_MUTEX_H__
34
35#include <linux/spinlock.h>
36#include <asm/semaphore.h>
37
38/*
39 * Map the mutex'es from IRIX to Linux semaphores.
40 *
41 * Destroy just simply initializes to -99 which should block all other
42 * callers.
43 */
44#define MUTEX_DEFAULT 0x0
45typedef struct semaphore mutex_t;
46
47#define mutex_init(lock, type, name) sema_init(lock, 1)
48#define mutex_destroy(lock) sema_init(lock, -99)
49#define mutex_lock(lock, num) down(lock)
50#define mutex_trylock(lock) (down_trylock(lock) ? 0 : 1)
51#define mutex_unlock(lock) up(lock)
52
53#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
new file mode 100644
index 000000000000..30b67b4e1cbf
--- /dev/null
+++ b/fs/xfs/linux-2.6/sema.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUPPORT_SEMA_H__
33#define __XFS_SUPPORT_SEMA_H__
34
35#include <linux/time.h>
36#include <linux/wait.h>
37#include <asm/atomic.h>
38#include <asm/semaphore.h>
39
40/*
41 * sema_t structure just maps to struct semaphore in Linux kernel.
42 */
43
44typedef struct semaphore sema_t;
45
46#define init_sema(sp, val, c, d) sema_init(sp, val)
47#define initsema(sp, val) sema_init(sp, val)
48#define initnsema(sp, val, name) sema_init(sp, val)
49#define psema(sp, b) down(sp)
50#define vsema(sp) up(sp)
51#define valusema(sp) (atomic_read(&(sp)->count))
52#define freesema(sema)
53
54/*
55 * Map cpsema (try to get the sema) to down_trylock. We need to switch
56 * the return values since cpsema returns 1 (acquired) 0 (failed) and
57 * down_trylock returns the reverse 0 (acquired) 1 (failed).
58 */
59
60#define cpsema(sp) (down_trylock(sp) ? 0 : 1)
61
62/*
63 * Didn't do cvsema(sp). Not sure how to map this to up/down/...
64 * It does a vsema if the values is < 0 other wise nothing.
65 */
66
67#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
new file mode 100644
index 000000000000..bcf60a0b8df0
--- /dev/null
+++ b/fs/xfs/linux-2.6/spin.h
@@ -0,0 +1,56 @@
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUPPORT_SPIN_H__
33#define __XFS_SUPPORT_SPIN_H__
34
35#include <linux/sched.h> /* preempt needs this */
36#include <linux/spinlock.h>
37
38/*
39 * Map lock_t from IRIX to Linux spinlocks.
40 *
41 * We do not make use of lock_t from interrupt context, so we do not
42 * have to worry about disabling interrupts at all (unlike IRIX).
43 */
44
45typedef spinlock_t lock_t;
46
47#define SPLDECL(s) unsigned long s
48
49#define spinlock_init(lock, name) spin_lock_init(lock)
50#define spinlock_destroy(lock)
51#define mutex_spinlock(lock) ({ spin_lock(lock); 0; })
52#define mutex_spinunlock(lock, s) do { spin_unlock(lock); (void)s; } while (0)
53#define nested_spinlock(lock) spin_lock(lock)
54#define nested_spinunlock(lock) spin_unlock(lock)
55
56#endif /* __XFS_SUPPORT_SPIN_H__ */
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
new file mode 100644
index 000000000000..821d3167e05b
--- /dev/null
+++ b/fs/xfs/linux-2.6/sv.h
@@ -0,0 +1,89 @@
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUPPORT_SV_H__
33#define __XFS_SUPPORT_SV_H__
34
35#include <linux/wait.h>
36#include <linux/sched.h>
37#include <linux/spinlock.h>
38
39/*
40 * Synchronisation variables.
41 *
42 * (Parameters "pri", "svf" and "rts" are not implemented)
43 */
44
45typedef struct sv_s {
46 wait_queue_head_t waiters;
47} sv_t;
48
49#define SV_FIFO 0x0 /* sv_t is FIFO type */
50#define SV_LIFO 0x2 /* sv_t is LIFO type */
51#define SV_PRIO 0x4 /* sv_t is PRIO type */
52#define SV_KEYED 0x6 /* sv_t is KEYED type */
53#define SV_DEFAULT SV_FIFO
54
55
56static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
57 unsigned long timeout)
58{
59 DECLARE_WAITQUEUE(wait, current);
60
61 add_wait_queue_exclusive(&sv->waiters, &wait);
62 __set_current_state(state);
63 spin_unlock(lock);
64
65 schedule_timeout(timeout);
66
67 remove_wait_queue(&sv->waiters, &wait);
68}
69
70#define init_sv(sv,type,name,flag) \
71 init_waitqueue_head(&(sv)->waiters)
72#define sv_init(sv,flag,name) \
73 init_waitqueue_head(&(sv)->waiters)
74#define sv_destroy(sv) \
75 /*NOTHING*/
76#define sv_wait(sv, pri, lock, s) \
77 _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
78#define sv_wait_sig(sv, pri, lock, s) \
79 _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
80#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
81 _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
82#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
83 _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
84#define sv_signal(sv) \
85 wake_up(&(sv)->waiters)
86#define sv_broadcast(sv) \
87 wake_up_all(&(sv)->waiters)
88
89#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
new file mode 100644
index 000000000000..6c6fd0faa8e1
--- /dev/null
+++ b/fs/xfs/linux-2.6/time.h
@@ -0,0 +1,51 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUPPORT_TIME_H__
33#define __XFS_SUPPORT_TIME_H__
34
35#include <linux/sched.h>
36#include <linux/time.h>
37
38typedef struct timespec timespec_t;
39
40static inline void delay(long ticks)
41{
42 set_current_state(TASK_UNINTERRUPTIBLE);
43 schedule_timeout(ticks);
44}
45
46static inline void nanotime(struct timespec *tvp)
47{
48 *tvp = CURRENT_TIME;
49}
50
51#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
new file mode 100644
index 000000000000..76a84758073a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -0,0 +1,1275 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34#include "xfs_inum.h"
35#include "xfs_log.h"
36#include "xfs_sb.h"
37#include "xfs_dir.h"
38#include "xfs_dir2.h"
39#include "xfs_trans.h"
40#include "xfs_dmapi.h"
41#include "xfs_mount.h"
42#include "xfs_bmap_btree.h"
43#include "xfs_alloc_btree.h"
44#include "xfs_ialloc_btree.h"
45#include "xfs_alloc.h"
46#include "xfs_btree.h"
47#include "xfs_attr_sf.h"
48#include "xfs_dir_sf.h"
49#include "xfs_dir2_sf.h"
50#include "xfs_dinode.h"
51#include "xfs_inode.h"
52#include "xfs_error.h"
53#include "xfs_rw.h"
54#include "xfs_iomap.h"
55#include <linux/mpage.h>
56#include <linux/writeback.h>
57
58STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
59STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
60 struct writeback_control *wbc, void *, int, int);
61
62#if defined(XFS_RW_TRACE)
63void
64xfs_page_trace(
65 int tag,
66 struct inode *inode,
67 struct page *page,
68 int mask)
69{
70 xfs_inode_t *ip;
71 bhv_desc_t *bdp;
72 vnode_t *vp = LINVFS_GET_VP(inode);
73 loff_t isize = i_size_read(inode);
74 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
75 int delalloc = -1, unmapped = -1, unwritten = -1;
76
77 if (page_has_buffers(page))
78 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
79
80 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
81 ip = XFS_BHVTOI(bdp);
82 if (!ip->i_rwtrace)
83 return;
84
85 ktrace_enter(ip->i_rwtrace,
86 (void *)((unsigned long)tag),
87 (void *)ip,
88 (void *)inode,
89 (void *)page,
90 (void *)((unsigned long)mask),
91 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
92 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
93 (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
94 (void *)((unsigned long)(isize & 0xffffffff)),
95 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
96 (void *)((unsigned long)(offset & 0xffffffff)),
97 (void *)((unsigned long)delalloc),
98 (void *)((unsigned long)unmapped),
99 (void *)((unsigned long)unwritten),
100 (void *)NULL,
101 (void *)NULL);
102}
103#else
104#define xfs_page_trace(tag, inode, page, mask)
105#endif
106
107void
108linvfs_unwritten_done(
109 struct buffer_head *bh,
110 int uptodate)
111{
112 xfs_buf_t *pb = (xfs_buf_t *)bh->b_private;
113
114 ASSERT(buffer_unwritten(bh));
115 bh->b_end_io = NULL;
116 clear_buffer_unwritten(bh);
117 if (!uptodate)
118 pagebuf_ioerror(pb, EIO);
119 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
120 pagebuf_iodone(pb, 1, 1);
121 }
122 end_buffer_async_write(bh, uptodate);
123}
124
125/*
126 * Issue transactions to convert a buffer range from unwritten
127 * to written extents (buffered IO).
128 */
129STATIC void
130linvfs_unwritten_convert(
131 xfs_buf_t *bp)
132{
133 vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
134 int error;
135
136 BUG_ON(atomic_read(&bp->pb_hold) < 1);
137 VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
138 BMAPI_UNWRITTEN, NULL, NULL, error);
139 XFS_BUF_SET_FSPRIVATE(bp, NULL);
140 XFS_BUF_CLR_IODONE_FUNC(bp);
141 XFS_BUF_UNDATAIO(bp);
142 iput(LINVFS_GET_IP(vp));
143 pagebuf_iodone(bp, 0, 0);
144}
145
146/*
147 * Issue transactions to convert a buffer range from unwritten
148 * to written extents (direct IO).
149 */
150STATIC void
151linvfs_unwritten_convert_direct(
152 struct inode *inode,
153 loff_t offset,
154 ssize_t size,
155 void *private)
156{
157 ASSERT(!private || inode == (struct inode *)private);
158
159 /* private indicates an unwritten extent lay beneath this IO */
160 if (private && size > 0) {
161 vnode_t *vp = LINVFS_GET_VP(inode);
162 int error;
163
164 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
165 }
166}
167
168STATIC int
169xfs_map_blocks(
170 struct inode *inode,
171 loff_t offset,
172 ssize_t count,
173 xfs_iomap_t *mapp,
174 int flags)
175{
176 vnode_t *vp = LINVFS_GET_VP(inode);
177 int error, nmaps = 1;
178
179 VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error);
180 if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
181 VMODIFY(vp);
182 return -error;
183}
184
185/*
186 * Finds the corresponding mapping in block @map array of the
187 * given @offset within a @page.
188 */
189STATIC xfs_iomap_t *
190xfs_offset_to_map(
191 struct page *page,
192 xfs_iomap_t *iomapp,
193 unsigned long offset)
194{
195 loff_t full_offset; /* offset from start of file */
196
197 ASSERT(offset < PAGE_CACHE_SIZE);
198
199 full_offset = page->index; /* NB: using 64bit number */
200 full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */
201 full_offset += offset; /* offset from page start */
202
203 if (full_offset < iomapp->iomap_offset)
204 return NULL;
205 if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
206 return iomapp;
207 return NULL;
208}
209
210STATIC void
211xfs_map_at_offset(
212 struct page *page,
213 struct buffer_head *bh,
214 unsigned long offset,
215 int block_bits,
216 xfs_iomap_t *iomapp)
217{
218 xfs_daddr_t bn;
219 loff_t delta;
220 int sector_shift;
221
222 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
223 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
224 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
225
226 delta = page->index;
227 delta <<= PAGE_CACHE_SHIFT;
228 delta += offset;
229 delta -= iomapp->iomap_offset;
230 delta >>= block_bits;
231
232 sector_shift = block_bits - BBSHIFT;
233 bn = iomapp->iomap_bn >> sector_shift;
234 bn += delta;
235 BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
236 ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
237
238 lock_buffer(bh);
239 bh->b_blocknr = bn;
240 bh->b_bdev = iomapp->iomap_target->pbr_bdev;
241 set_buffer_mapped(bh);
242 clear_buffer_delay(bh);
243}
244
245/*
246 * Look for a page at index which is unlocked and contains our
247 * unwritten extent flagged buffers at its head. Returns page
248 * locked and with an extra reference count, and length of the
249 * unwritten extent component on this page that we can write,
250 * in units of filesystem blocks.
251 */
252STATIC struct page *
253xfs_probe_unwritten_page(
254 struct address_space *mapping,
255 pgoff_t index,
256 xfs_iomap_t *iomapp,
257 xfs_buf_t *pb,
258 unsigned long max_offset,
259 unsigned long *fsbs,
260 unsigned int bbits)
261{
262 struct page *page;
263
264 page = find_trylock_page(mapping, index);
265 if (!page)
266 return NULL;
267 if (PageWriteback(page))
268 goto out;
269
270 if (page->mapping && page_has_buffers(page)) {
271 struct buffer_head *bh, *head;
272 unsigned long p_offset = 0;
273
274 *fsbs = 0;
275 bh = head = page_buffers(page);
276 do {
277 if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
278 break;
279 if (!xfs_offset_to_map(page, iomapp, p_offset))
280 break;
281 if (p_offset >= max_offset)
282 break;
283 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
284 set_buffer_unwritten_io(bh);
285 bh->b_private = pb;
286 p_offset += bh->b_size;
287 (*fsbs)++;
288 } while ((bh = bh->b_this_page) != head);
289
290 if (p_offset)
291 return page;
292 }
293
294out:
295 unlock_page(page);
296 return NULL;
297}
298
299/*
300 * Look for a page at index which is unlocked and not mapped
301 * yet - clustering for mmap write case.
302 */
303STATIC unsigned int
304xfs_probe_unmapped_page(
305 struct address_space *mapping,
306 pgoff_t index,
307 unsigned int pg_offset)
308{
309 struct page *page;
310 int ret = 0;
311
312 page = find_trylock_page(mapping, index);
313 if (!page)
314 return 0;
315 if (PageWriteback(page))
316 goto out;
317
318 if (page->mapping && PageDirty(page)) {
319 if (page_has_buffers(page)) {
320 struct buffer_head *bh, *head;
321
322 bh = head = page_buffers(page);
323 do {
324 if (buffer_mapped(bh) || !buffer_uptodate(bh))
325 break;
326 ret += bh->b_size;
327 if (ret >= pg_offset)
328 break;
329 } while ((bh = bh->b_this_page) != head);
330 } else
331 ret = PAGE_CACHE_SIZE;
332 }
333
334out:
335 unlock_page(page);
336 return ret;
337}
338
339STATIC unsigned int
340xfs_probe_unmapped_cluster(
341 struct inode *inode,
342 struct page *startpage,
343 struct buffer_head *bh,
344 struct buffer_head *head)
345{
346 pgoff_t tindex, tlast, tloff;
347 unsigned int pg_offset, len, total = 0;
348 struct address_space *mapping = inode->i_mapping;
349
350 /* First sum forwards in this page */
351 do {
352 if (buffer_mapped(bh))
353 break;
354 total += bh->b_size;
355 } while ((bh = bh->b_this_page) != head);
356
357 /* If we reached the end of the page, sum forwards in
358 * following pages.
359 */
360 if (bh == head) {
361 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
362 /* Prune this back to avoid pathological behavior */
363 tloff = min(tlast, startpage->index + 64);
364 for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
365 len = xfs_probe_unmapped_page(mapping, tindex,
366 PAGE_CACHE_SIZE);
367 if (!len)
368 return total;
369 total += len;
370 }
371 if (tindex == tlast &&
372 (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
373 total += xfs_probe_unmapped_page(mapping,
374 tindex, pg_offset);
375 }
376 }
377 return total;
378}
379
380/*
381 * Probe for a given page (index) in the inode and test if it is delayed
382 * and without unwritten buffers. Returns page locked and with an extra
383 * reference count.
384 */
385STATIC struct page *
386xfs_probe_delalloc_page(
387 struct inode *inode,
388 pgoff_t index)
389{
390 struct page *page;
391
392 page = find_trylock_page(inode->i_mapping, index);
393 if (!page)
394 return NULL;
395 if (PageWriteback(page))
396 goto out;
397
398 if (page->mapping && page_has_buffers(page)) {
399 struct buffer_head *bh, *head;
400 int acceptable = 0;
401
402 bh = head = page_buffers(page);
403 do {
404 if (buffer_unwritten(bh)) {
405 acceptable = 0;
406 break;
407 } else if (buffer_delay(bh)) {
408 acceptable = 1;
409 }
410 } while ((bh = bh->b_this_page) != head);
411
412 if (acceptable)
413 return page;
414 }
415
416out:
417 unlock_page(page);
418 return NULL;
419}
420
421STATIC int
422xfs_map_unwritten(
423 struct inode *inode,
424 struct page *start_page,
425 struct buffer_head *head,
426 struct buffer_head *curr,
427 unsigned long p_offset,
428 int block_bits,
429 xfs_iomap_t *iomapp,
430 struct writeback_control *wbc,
431 int startio,
432 int all_bh)
433{
434 struct buffer_head *bh = curr;
435 xfs_iomap_t *tmp;
436 xfs_buf_t *pb;
437 loff_t offset, size;
438 unsigned long nblocks = 0;
439
440 offset = start_page->index;
441 offset <<= PAGE_CACHE_SHIFT;
442 offset += p_offset;
443
444 /* get an "empty" pagebuf to manage IO completion
445 * Proper values will be set before returning */
446 pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
447 if (!pb)
448 return -EAGAIN;
449
450 /* Take a reference to the inode to prevent it from
451 * being reclaimed while we have outstanding unwritten
452 * extent IO on it.
453 */
454 if ((igrab(inode)) != inode) {
455 pagebuf_free(pb);
456 return -EAGAIN;
457 }
458
459 /* Set the count to 1 initially, this will stop an I/O
460 * completion callout which happens before we have started
461 * all the I/O from calling pagebuf_iodone too early.
462 */
463 atomic_set(&pb->pb_io_remaining, 1);
464
465 /* First map forwards in the page consecutive buffers
466 * covering this unwritten extent
467 */
468 do {
469 if (!buffer_unwritten(bh))
470 break;
471 tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
472 if (!tmp)
473 break;
474 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
475 set_buffer_unwritten_io(bh);
476 bh->b_private = pb;
477 p_offset += bh->b_size;
478 nblocks++;
479 } while ((bh = bh->b_this_page) != head);
480
481 atomic_add(nblocks, &pb->pb_io_remaining);
482
483 /* If we reached the end of the page, map forwards in any
484 * following pages which are also covered by this extent.
485 */
486 if (bh == head) {
487 struct address_space *mapping = inode->i_mapping;
488 pgoff_t tindex, tloff, tlast;
489 unsigned long bs;
490 unsigned int pg_offset, bbits = inode->i_blkbits;
491 struct page *page;
492
493 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
494 tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
495 tloff = min(tlast, tloff);
496 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
497 page = xfs_probe_unwritten_page(mapping,
498 tindex, iomapp, pb,
499 PAGE_CACHE_SIZE, &bs, bbits);
500 if (!page)
501 break;
502 nblocks += bs;
503 atomic_add(bs, &pb->pb_io_remaining);
504 xfs_convert_page(inode, page, iomapp, wbc, pb,
505 startio, all_bh);
506 /* stop if converting the next page might add
507 * enough blocks that the corresponding byte
508 * count won't fit in our ulong page buf length */
509 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
510 goto enough;
511 }
512
513 if (tindex == tlast &&
514 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
515 page = xfs_probe_unwritten_page(mapping,
516 tindex, iomapp, pb,
517 pg_offset, &bs, bbits);
518 if (page) {
519 nblocks += bs;
520 atomic_add(bs, &pb->pb_io_remaining);
521 xfs_convert_page(inode, page, iomapp, wbc, pb,
522 startio, all_bh);
523 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
524 goto enough;
525 }
526 }
527 }
528
529enough:
530 size = nblocks; /* NB: using 64bit number here */
531 size <<= block_bits; /* convert fsb's to byte range */
532
533 XFS_BUF_DATAIO(pb);
534 XFS_BUF_ASYNC(pb);
535 XFS_BUF_SET_SIZE(pb, size);
536 XFS_BUF_SET_COUNT(pb, size);
537 XFS_BUF_SET_OFFSET(pb, offset);
538 XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
539 XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
540
541 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
542 pagebuf_iodone(pb, 1, 1);
543 }
544
545 return 0;
546}
547
548STATIC void
549xfs_submit_page(
550 struct page *page,
551 struct writeback_control *wbc,
552 struct buffer_head *bh_arr[],
553 int bh_count,
554 int probed_page,
555 int clear_dirty)
556{
557 struct buffer_head *bh;
558 int i;
559
560 BUG_ON(PageWriteback(page));
561 set_page_writeback(page);
562 if (clear_dirty)
563 clear_page_dirty(page);
564 unlock_page(page);
565
566 if (bh_count) {
567 for (i = 0; i < bh_count; i++) {
568 bh = bh_arr[i];
569 mark_buffer_async_write(bh);
570 if (buffer_unwritten(bh))
571 set_buffer_unwritten_io(bh);
572 set_buffer_uptodate(bh);
573 clear_buffer_dirty(bh);
574 }
575
576 for (i = 0; i < bh_count; i++)
577 submit_bh(WRITE, bh_arr[i]);
578
579 if (probed_page && clear_dirty)
580 wbc->nr_to_write--; /* Wrote an "extra" page */
581 } else {
582 end_page_writeback(page);
583 wbc->pages_skipped++; /* We didn't write this page */
584 }
585}
586
587/*
588 * Allocate & map buffers for page given the extent map. Write it out.
589 * except for the original page of a writepage, this is called on
590 * delalloc/unwritten pages only, for the original page it is possible
591 * that the page has no mapping at all.
592 */
593STATIC void
594xfs_convert_page(
595 struct inode *inode,
596 struct page *page,
597 xfs_iomap_t *iomapp,
598 struct writeback_control *wbc,
599 void *private,
600 int startio,
601 int all_bh)
602{
603 struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
604 xfs_iomap_t *mp = iomapp, *tmp;
605 unsigned long end, offset;
606 pgoff_t end_index;
607 int i = 0, index = 0;
608 int bbits = inode->i_blkbits;
609
610 end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
611 if (page->index < end_index) {
612 end = PAGE_CACHE_SIZE;
613 } else {
614 end = i_size_read(inode) & (PAGE_CACHE_SIZE-1);
615 }
616 bh = head = page_buffers(page);
617 do {
618 offset = i << bbits;
619 if (offset >= end)
620 break;
621 if (!(PageUptodate(page) || buffer_uptodate(bh)))
622 continue;
623 if (buffer_mapped(bh) && all_bh &&
624 !(buffer_unwritten(bh) || buffer_delay(bh))) {
625 if (startio) {
626 lock_buffer(bh);
627 bh_arr[index++] = bh;
628 }
629 continue;
630 }
631 tmp = xfs_offset_to_map(page, mp, offset);
632 if (!tmp)
633 continue;
634 ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
635 ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
636
637 /* If this is a new unwritten extent buffer (i.e. one
638 * that we haven't passed in private data for, we must
639 * now map this buffer too.
640 */
641 if (buffer_unwritten(bh) && !bh->b_end_io) {
642 ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
643 xfs_map_unwritten(inode, page, head, bh, offset,
644 bbits, tmp, wbc, startio, all_bh);
645 } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
646 xfs_map_at_offset(page, bh, offset, bbits, tmp);
647 if (buffer_unwritten(bh)) {
648 set_buffer_unwritten_io(bh);
649 bh->b_private = private;
650 ASSERT(private);
651 }
652 }
653 if (startio) {
654 bh_arr[index++] = bh;
655 } else {
656 set_buffer_dirty(bh);
657 unlock_buffer(bh);
658 mark_buffer_dirty(bh);
659 }
660 } while (i++, (bh = bh->b_this_page) != head);
661
662 if (startio) {
663 xfs_submit_page(page, wbc, bh_arr, index, 1, index == i);
664 } else {
665 unlock_page(page);
666 }
667}
668
669/*
670 * Convert & write out a cluster of pages in the same extent as defined
671 * by mp and following the start page.
672 */
673STATIC void
674xfs_cluster_write(
675 struct inode *inode,
676 pgoff_t tindex,
677 xfs_iomap_t *iomapp,
678 struct writeback_control *wbc,
679 int startio,
680 int all_bh,
681 pgoff_t tlast)
682{
683 struct page *page;
684
685 for (; tindex <= tlast; tindex++) {
686 page = xfs_probe_delalloc_page(inode, tindex);
687 if (!page)
688 break;
689 xfs_convert_page(inode, page, iomapp, wbc, NULL,
690 startio, all_bh);
691 }
692}
693
694/*
695 * Calling this without startio set means we are being asked to make a dirty
696 * page ready for freeing it's buffers. When called with startio set then
697 * we are coming from writepage.
698 *
699 * When called with startio set it is important that we write the WHOLE
700 * page if possible.
701 * The bh->b_state's cannot know if any of the blocks or which block for
702 * that matter are dirty due to mmap writes, and therefore bh uptodate is
703 * only vaild if the page itself isn't completely uptodate. Some layers
704 * may clear the page dirty flag prior to calling write page, under the
705 * assumption the entire page will be written out; by not writing out the
706 * whole page the page can be reused before all valid dirty data is
707 * written out. Note: in the case of a page that has been dirty'd by
708 * mapwrite and but partially setup by block_prepare_write the
709 * bh->b_states's will not agree and only ones setup by BPW/BCW will have
710 * valid state, thus the whole page must be written out thing.
711 */
712
713STATIC int
714xfs_page_state_convert(
715 struct inode *inode,
716 struct page *page,
717 struct writeback_control *wbc,
718 int startio,
719 int unmapped) /* also implies page uptodate */
720{
721 struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
722 xfs_iomap_t *iomp, iomap;
723 loff_t offset;
724 unsigned long p_offset = 0;
725 __uint64_t end_offset;
726 pgoff_t end_index, last_index, tlast;
727 int len, err, i, cnt = 0, uptodate = 1;
728 int flags = startio ? 0 : BMAPI_TRYLOCK;
729 int page_dirty, delalloc = 0;
730
731 /* Is this page beyond the end of the file? */
732 offset = i_size_read(inode);
733 end_index = offset >> PAGE_CACHE_SHIFT;
734 last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
735 if (page->index >= end_index) {
736 if ((page->index >= end_index + 1) ||
737 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
738 err = -EIO;
739 goto error;
740 }
741 }
742
743 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
744 end_offset = min_t(unsigned long long,
745 offset + PAGE_CACHE_SIZE, i_size_read(inode));
746
747 bh = head = page_buffers(page);
748 iomp = NULL;
749
750 /*
751 * page_dirty is initially a count of buffers on the page and
752 * is decrememted as we move each into a cleanable state.
753 */
754 len = bh->b_size;
755 page_dirty = PAGE_CACHE_SIZE / len;
756
757 do {
758 if (offset >= end_offset)
759 break;
760 if (!buffer_uptodate(bh))
761 uptodate = 0;
762 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)
763 continue;
764
765 if (iomp) {
766 iomp = xfs_offset_to_map(page, &iomap, p_offset);
767 }
768
769 /*
770 * First case, map an unwritten extent and prepare for
771 * extent state conversion transaction on completion.
772 */
773 if (buffer_unwritten(bh)) {
774 if (!startio)
775 continue;
776 if (!iomp) {
777 err = xfs_map_blocks(inode, offset, len, &iomap,
778 BMAPI_READ|BMAPI_IGNSTATE);
779 if (err) {
780 goto error;
781 }
782 iomp = xfs_offset_to_map(page, &iomap,
783 p_offset);
784 }
785 if (iomp) {
786 if (!bh->b_end_io) {
787 err = xfs_map_unwritten(inode, page,
788 head, bh, p_offset,
789 inode->i_blkbits, iomp,
790 wbc, startio, unmapped);
791 if (err) {
792 goto error;
793 }
794 } else {
795 set_bit(BH_Lock, &bh->b_state);
796 }
797 BUG_ON(!buffer_locked(bh));
798 bh_arr[cnt++] = bh;
799 page_dirty--;
800 }
801 /*
802 * Second case, allocate space for a delalloc buffer.
803 * We can return EAGAIN here in the release page case.
804 */
805 } else if (buffer_delay(bh)) {
806 if (!iomp) {
807 delalloc = 1;
808 err = xfs_map_blocks(inode, offset, len, &iomap,
809 BMAPI_ALLOCATE | flags);
810 if (err) {
811 goto error;
812 }
813 iomp = xfs_offset_to_map(page, &iomap,
814 p_offset);
815 }
816 if (iomp) {
817 xfs_map_at_offset(page, bh, p_offset,
818 inode->i_blkbits, iomp);
819 if (startio) {
820 bh_arr[cnt++] = bh;
821 } else {
822 set_buffer_dirty(bh);
823 unlock_buffer(bh);
824 mark_buffer_dirty(bh);
825 }
826 page_dirty--;
827 }
828 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
829 (unmapped || startio)) {
830
831 if (!buffer_mapped(bh)) {
832 int size;
833
834 /*
835 * Getting here implies an unmapped buffer
836 * was found, and we are in a path where we
837 * need to write the whole page out.
838 */
839 if (!iomp) {
840 size = xfs_probe_unmapped_cluster(
841 inode, page, bh, head);
842 err = xfs_map_blocks(inode, offset,
843 size, &iomap,
844 BMAPI_WRITE|BMAPI_MMAP);
845 if (err) {
846 goto error;
847 }
848 iomp = xfs_offset_to_map(page, &iomap,
849 p_offset);
850 }
851 if (iomp) {
852 xfs_map_at_offset(page,
853 bh, p_offset,
854 inode->i_blkbits, iomp);
855 if (startio) {
856 bh_arr[cnt++] = bh;
857 } else {
858 set_buffer_dirty(bh);
859 unlock_buffer(bh);
860 mark_buffer_dirty(bh);
861 }
862 page_dirty--;
863 }
864 } else if (startio) {
865 if (buffer_uptodate(bh) &&
866 !test_and_set_bit(BH_Lock, &bh->b_state)) {
867 bh_arr[cnt++] = bh;
868 page_dirty--;
869 }
870 }
871 }
872 } while (offset += len, p_offset += len,
873 ((bh = bh->b_this_page) != head));
874
875 if (uptodate && bh == head)
876 SetPageUptodate(page);
877
878 if (startio)
879 xfs_submit_page(page, wbc, bh_arr, cnt, 0, 1);
880
881 if (iomp) {
882 tlast = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
883 PAGE_CACHE_SHIFT;
884 if (delalloc && (tlast > last_index))
885 tlast = last_index;
886 xfs_cluster_write(inode, page->index + 1, iomp, wbc,
887 startio, unmapped, tlast);
888 }
889
890 return page_dirty;
891
892error:
893 for (i = 0; i < cnt; i++) {
894 unlock_buffer(bh_arr[i]);
895 }
896
897 /*
898 * If it's delalloc and we have nowhere to put it,
899 * throw it away, unless the lower layers told
900 * us to try again.
901 */
902 if (err != -EAGAIN) {
903 if (!unmapped) {
904 block_invalidatepage(page, 0);
905 }
906 ClearPageUptodate(page);
907 }
908 return err;
909}
910
911STATIC int
912__linvfs_get_block(
913 struct inode *inode,
914 sector_t iblock,
915 unsigned long blocks,
916 struct buffer_head *bh_result,
917 int create,
918 int direct,
919 bmapi_flags_t flags)
920{
921 vnode_t *vp = LINVFS_GET_VP(inode);
922 xfs_iomap_t iomap;
923 int retpbbm = 1;
924 int error;
925 ssize_t size;
926 loff_t offset = (loff_t)iblock << inode->i_blkbits;
927
928 if (blocks)
929 size = blocks << inode->i_blkbits;
930 else
931 size = 1 << inode->i_blkbits;
932
933 VOP_BMAP(vp, offset, size,
934 create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
935 if (error)
936 return -error;
937
938 if (retpbbm == 0)
939 return 0;
940
941 if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
942 xfs_daddr_t bn;
943 loff_t delta;
944
945 /* For unwritten extents do not report a disk address on
946 * the read case (treat as if we're reading into a hole).
947 */
948 if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
949 delta = offset - iomap.iomap_offset;
950 delta >>= inode->i_blkbits;
951
952 bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT);
953 bn += delta;
954 BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME));
955 bh_result->b_blocknr = bn;
956 set_buffer_mapped(bh_result);
957 }
958 if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
959 if (direct)
960 bh_result->b_private = inode;
961 set_buffer_unwritten(bh_result);
962 set_buffer_delay(bh_result);
963 }
964 }
965
966 /* If this is a realtime file, data might be on a new device */
967 bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
968
969 /* If we previously allocated a block out beyond eof and
970 * we are now coming back to use it then we will need to
971 * flag it as new even if it has a disk address.
972 */
973 if (create &&
974 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
975 (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) {
976 set_buffer_new(bh_result);
977 }
978
979 if (iomap.iomap_flags & IOMAP_DELAY) {
980 BUG_ON(direct);
981 if (create) {
982 set_buffer_uptodate(bh_result);
983 set_buffer_mapped(bh_result);
984 set_buffer_delay(bh_result);
985 }
986 }
987
988 if (blocks) {
989 bh_result->b_size = (ssize_t)min(
990 (loff_t)(iomap.iomap_bsize - iomap.iomap_delta),
991 (loff_t)(blocks << inode->i_blkbits));
992 }
993
994 return 0;
995}
996
997int
998linvfs_get_block(
999 struct inode *inode,
1000 sector_t iblock,
1001 struct buffer_head *bh_result,
1002 int create)
1003{
1004 return __linvfs_get_block(inode, iblock, 0, bh_result,
1005 create, 0, BMAPI_WRITE);
1006}
1007
1008STATIC int
1009linvfs_get_blocks_direct(
1010 struct inode *inode,
1011 sector_t iblock,
1012 unsigned long max_blocks,
1013 struct buffer_head *bh_result,
1014 int create)
1015{
1016 return __linvfs_get_block(inode, iblock, max_blocks, bh_result,
1017 create, 1, BMAPI_WRITE|BMAPI_DIRECT);
1018}
1019
1020STATIC ssize_t
1021linvfs_direct_IO(
1022 int rw,
1023 struct kiocb *iocb,
1024 const struct iovec *iov,
1025 loff_t offset,
1026 unsigned long nr_segs)
1027{
1028 struct file *file = iocb->ki_filp;
1029 struct inode *inode = file->f_mapping->host;
1030 vnode_t *vp = LINVFS_GET_VP(inode);
1031 xfs_iomap_t iomap;
1032 int maps = 1;
1033 int error;
1034
1035 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
1036 if (error)
1037 return -error;
1038
1039 return blockdev_direct_IO_own_locking(rw, iocb, inode,
1040 iomap.iomap_target->pbr_bdev,
1041 iov, offset, nr_segs,
1042 linvfs_get_blocks_direct,
1043 linvfs_unwritten_convert_direct);
1044}
1045
1046
1047STATIC sector_t
1048linvfs_bmap(
1049 struct address_space *mapping,
1050 sector_t block)
1051{
1052 struct inode *inode = (struct inode *)mapping->host;
1053 vnode_t *vp = LINVFS_GET_VP(inode);
1054 int error;
1055
1056 vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address);
1057
1058 VOP_RWLOCK(vp, VRWLOCK_READ);
1059 VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
1060 VOP_RWUNLOCK(vp, VRWLOCK_READ);
1061 return generic_block_bmap(mapping, block, linvfs_get_block);
1062}
1063
1064STATIC int
1065linvfs_readpage(
1066 struct file *unused,
1067 struct page *page)
1068{
1069 return mpage_readpage(page, linvfs_get_block);
1070}
1071
1072STATIC int
1073linvfs_readpages(
1074 struct file *unused,
1075 struct address_space *mapping,
1076 struct list_head *pages,
1077 unsigned nr_pages)
1078{
1079 return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
1080}
1081
1082STATIC void
1083xfs_count_page_state(
1084 struct page *page,
1085 int *delalloc,
1086 int *unmapped,
1087 int *unwritten)
1088{
1089 struct buffer_head *bh, *head;
1090
1091 *delalloc = *unmapped = *unwritten = 0;
1092
1093 bh = head = page_buffers(page);
1094 do {
1095 if (buffer_uptodate(bh) && !buffer_mapped(bh))
1096 (*unmapped) = 1;
1097 else if (buffer_unwritten(bh) && !buffer_delay(bh))
1098 clear_buffer_unwritten(bh);
1099 else if (buffer_unwritten(bh))
1100 (*unwritten) = 1;
1101 else if (buffer_delay(bh))
1102 (*delalloc) = 1;
1103 } while ((bh = bh->b_this_page) != head);
1104}
1105
1106
1107/*
1108 * writepage: Called from one of two places:
1109 *
1110 * 1. we are flushing a delalloc buffer head.
1111 *
1112 * 2. we are writing out a dirty page. Typically the page dirty
1113 * state is cleared before we get here. In this case is it
1114 * conceivable we have no buffer heads.
1115 *
1116 * For delalloc space on the page we need to allocate space and
1117 * flush it. For unmapped buffer heads on the page we should
1118 * allocate space if the page is uptodate. For any other dirty
1119 * buffer heads on the page we should flush them.
1120 *
1121 * If we detect that a transaction would be required to flush
1122 * the page, we have to check the process flags first, if we
1123 * are already in a transaction or disk I/O during allocations
1124 * is off, we need to fail the writepage and redirty the page.
1125 */
1126
1127STATIC int
1128linvfs_writepage(
1129 struct page *page,
1130 struct writeback_control *wbc)
1131{
1132 int error;
1133 int need_trans;
1134 int delalloc, unmapped, unwritten;
1135 struct inode *inode = page->mapping->host;
1136
1137 xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
1138
1139 /*
1140 * We need a transaction if:
1141 * 1. There are delalloc buffers on the page
1142 * 2. The page is uptodate and we have unmapped buffers
1143 * 3. The page is uptodate and we have no buffers
1144 * 4. There are unwritten buffers on the page
1145 */
1146
1147 if (!page_has_buffers(page)) {
1148 unmapped = 1;
1149 need_trans = 1;
1150 } else {
1151 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1152 if (!PageUptodate(page))
1153 unmapped = 0;
1154 need_trans = delalloc + unmapped + unwritten;
1155 }
1156
1157 /*
1158 * If we need a transaction and the process flags say
1159 * we are already in a transaction, or no IO is allowed
1160 * then mark the page dirty again and leave the page
1161 * as is.
1162 */
1163 if (PFLAGS_TEST_FSTRANS() && need_trans)
1164 goto out_fail;
1165
1166 /*
1167 * Delay hooking up buffer heads until we have
1168 * made our go/no-go decision.
1169 */
1170 if (!page_has_buffers(page))
1171 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
1172
1173 /*
1174 * Convert delayed allocate, unwritten or unmapped space
1175 * to real space and flush out to disk.
1176 */
1177 error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
1178 if (error == -EAGAIN)
1179 goto out_fail;
1180 if (unlikely(error < 0))
1181 goto out_unlock;
1182
1183 return 0;
1184
1185out_fail:
1186 redirty_page_for_writepage(wbc, page);
1187 unlock_page(page);
1188 return 0;
1189out_unlock:
1190 unlock_page(page);
1191 return error;
1192}
1193
1194/*
1195 * Called to move a page into cleanable state - and from there
1196 * to be released. Possibly the page is already clean. We always
1197 * have buffer heads in this call.
1198 *
1199 * Returns 0 if the page is ok to release, 1 otherwise.
1200 *
1201 * Possible scenarios are:
1202 *
1203 * 1. We are being called to release a page which has been written
1204 * to via regular I/O. buffer heads will be dirty and possibly
1205 * delalloc. If no delalloc buffer heads in this case then we
1206 * can just return zero.
1207 *
1208 * 2. We are called to release a page which has been written via
1209 * mmap, all we need to do is ensure there is no delalloc
1210 * state in the buffer heads, if not we can let the caller
1211 * free them and we should come back later via writepage.
1212 */
1213STATIC int
1214linvfs_release_page(
1215 struct page *page,
1216 int gfp_mask)
1217{
1218 struct inode *inode = page->mapping->host;
1219 int dirty, delalloc, unmapped, unwritten;
1220 struct writeback_control wbc = {
1221 .sync_mode = WB_SYNC_ALL,
1222 .nr_to_write = 1,
1223 };
1224
1225 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
1226
1227 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1228 if (!delalloc && !unwritten)
1229 goto free_buffers;
1230
1231 if (!(gfp_mask & __GFP_FS))
1232 return 0;
1233
1234 /* If we are already inside a transaction or the thread cannot
1235 * do I/O, we cannot release this page.
1236 */
1237 if (PFLAGS_TEST_FSTRANS())
1238 return 0;
1239
1240 /*
1241 * Convert delalloc space to real space, do not flush the
1242 * data out to disk, that will be done by the caller.
1243 * Never need to allocate space here - we will always
1244 * come back to writepage in that case.
1245 */
1246 dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
1247 if (dirty == 0 && !unwritten)
1248 goto free_buffers;
1249 return 0;
1250
1251free_buffers:
1252 return try_to_free_buffers(page);
1253}
1254
1255STATIC int
1256linvfs_prepare_write(
1257 struct file *file,
1258 struct page *page,
1259 unsigned int from,
1260 unsigned int to)
1261{
1262 return block_prepare_write(page, from, to, linvfs_get_block);
1263}
1264
1265struct address_space_operations linvfs_aops = {
1266 .readpage = linvfs_readpage,
1267 .readpages = linvfs_readpages,
1268 .writepage = linvfs_writepage,
1269 .sync_page = block_sync_page,
1270 .releasepage = linvfs_release_page,
1271 .prepare_write = linvfs_prepare_write,
1272 .commit_write = generic_commit_write,
1273 .bmap = linvfs_bmap,
1274 .direct_IO = linvfs_direct_IO,
1275};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
new file mode 100644
index 000000000000..23e0eb67fc25
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -0,0 +1,1980 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33/*
34 * The xfs_buf.c code provides an abstract buffer cache model on top
35 * of the Linux page cache. Cached metadata blocks for a file system
36 * are hashed to the inode for the block device. xfs_buf.c assembles
37 * buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
38 *
39 * Written by Steve Lord, Jim Mostek, Russell Cattelan
40 * and Rajagopal Ananthanarayanan ("ananth") at SGI.
41 *
42 */
43
44#include <linux/stddef.h>
45#include <linux/errno.h>
46#include <linux/slab.h>
47#include <linux/pagemap.h>
48#include <linux/init.h>
49#include <linux/vmalloc.h>
50#include <linux/bio.h>
51#include <linux/sysctl.h>
52#include <linux/proc_fs.h>
53#include <linux/workqueue.h>
54#include <linux/percpu.h>
55#include <linux/blkdev.h>
56#include <linux/hash.h>
57
58#include "xfs_linux.h"
59
60/*
61 * File wide globals
62 */
63
64STATIC kmem_cache_t *pagebuf_cache;
65STATIC kmem_shaker_t pagebuf_shake;
66STATIC int pagebuf_daemon_wakeup(int, unsigned int);
67STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
68STATIC struct workqueue_struct *pagebuf_logio_workqueue;
69STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
70
71/*
72 * Pagebuf debugging
73 */
74
75#ifdef PAGEBUF_TRACE
76void
77pagebuf_trace(
78 xfs_buf_t *pb,
79 char *id,
80 void *data,
81 void *ra)
82{
83 ktrace_enter(pagebuf_trace_buf,
84 pb, id,
85 (void *)(unsigned long)pb->pb_flags,
86 (void *)(unsigned long)pb->pb_hold.counter,
87 (void *)(unsigned long)pb->pb_sema.count.counter,
88 (void *)current,
89 data, ra,
90 (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
91 (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
92 (void *)(unsigned long)pb->pb_buffer_length,
93 NULL, NULL, NULL, NULL, NULL);
94}
95ktrace_t *pagebuf_trace_buf;
96#define PAGEBUF_TRACE_SIZE 4096
97#define PB_TRACE(pb, id, data) \
98 pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
99#else
100#define PB_TRACE(pb, id, data) do { } while (0)
101#endif
102
103#ifdef PAGEBUF_LOCK_TRACKING
104# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid)
105# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1)
106# define PB_GET_OWNER(pb) ((pb)->pb_last_holder)
107#else
108# define PB_SET_OWNER(pb) do { } while (0)
109# define PB_CLEAR_OWNER(pb) do { } while (0)
110# define PB_GET_OWNER(pb) do { } while (0)
111#endif
112
113/*
114 * Pagebuf allocation / freeing.
115 */
116
117#define pb_to_gfp(flags) \
118 ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
119 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
120
121#define pb_to_km(flags) \
122 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
123
124
125#define pagebuf_allocate(flags) \
126 kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))
127#define pagebuf_deallocate(pb) \
128 kmem_zone_free(pagebuf_cache, (pb));
129
130/*
131 * Page Region interfaces.
132 *
133 * For pages in filesystems where the blocksize is smaller than the
134 * pagesize, we use the page->private field (long) to hold a bitmap
135 * of uptodate regions within the page.
136 *
137 * Each such region is "bytes per page / bits per long" bytes long.
138 *
139 * NBPPR == number-of-bytes-per-page-region
140 * BTOPR == bytes-to-page-region (rounded up)
141 * BTOPRT == bytes-to-page-region-truncated (rounded down)
142 */
143#if (BITS_PER_LONG == 32)
144#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
145#elif (BITS_PER_LONG == 64)
146#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
147#else
148#error BITS_PER_LONG must be 32 or 64
149#endif
150#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
151#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
152#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
153
154STATIC unsigned long
155page_region_mask(
156 size_t offset,
157 size_t length)
158{
159 unsigned long mask;
160 int first, final;
161
162 first = BTOPR(offset);
163 final = BTOPRT(offset + length - 1);
164 first = min(first, final);
165
166 mask = ~0UL;
167 mask <<= BITS_PER_LONG - (final - first);
168 mask >>= BITS_PER_LONG - (final);
169
170 ASSERT(offset + length <= PAGE_CACHE_SIZE);
171 ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
172
173 return mask;
174}
175
176STATIC inline void
177set_page_region(
178 struct page *page,
179 size_t offset,
180 size_t length)
181{
182 page->private |= page_region_mask(offset, length);
183 if (page->private == ~0UL)
184 SetPageUptodate(page);
185}
186
187STATIC inline int
188test_page_region(
189 struct page *page,
190 size_t offset,
191 size_t length)
192{
193 unsigned long mask = page_region_mask(offset, length);
194
195 return (mask && (page->private & mask) == mask);
196}
197
198/*
199 * Mapping of multi-page buffers into contiguous virtual space
200 */
201
202typedef struct a_list {
203 void *vm_addr;
204 struct a_list *next;
205} a_list_t;
206
207STATIC a_list_t *as_free_head;
208STATIC int as_list_len;
209STATIC DEFINE_SPINLOCK(as_lock);
210
211/*
212 * Try to batch vunmaps because they are costly.
213 */
214STATIC void
215free_address(
216 void *addr)
217{
218 a_list_t *aentry;
219
220 aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);
221 if (likely(aentry)) {
222 spin_lock(&as_lock);
223 aentry->next = as_free_head;
224 aentry->vm_addr = addr;
225 as_free_head = aentry;
226 as_list_len++;
227 spin_unlock(&as_lock);
228 } else {
229 vunmap(addr);
230 }
231}
232
233STATIC void
234purge_addresses(void)
235{
236 a_list_t *aentry, *old;
237
238 if (as_free_head == NULL)
239 return;
240
241 spin_lock(&as_lock);
242 aentry = as_free_head;
243 as_free_head = NULL;
244 as_list_len = 0;
245 spin_unlock(&as_lock);
246
247 while ((old = aentry) != NULL) {
248 vunmap(aentry->vm_addr);
249 aentry = aentry->next;
250 kfree(old);
251 }
252}
253
254/*
255 * Internal pagebuf object manipulation
256 */
257
258STATIC void
259_pagebuf_initialize(
260 xfs_buf_t *pb,
261 xfs_buftarg_t *target,
262 loff_t range_base,
263 size_t range_length,
264 page_buf_flags_t flags)
265{
266 /*
267 * We don't want certain flags to appear in pb->pb_flags.
268 */
269 flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
270
271 memset(pb, 0, sizeof(xfs_buf_t));
272 atomic_set(&pb->pb_hold, 1);
273 init_MUTEX_LOCKED(&pb->pb_iodonesema);
274 INIT_LIST_HEAD(&pb->pb_list);
275 INIT_LIST_HEAD(&pb->pb_hash_list);
276 init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
277 PB_SET_OWNER(pb);
278 pb->pb_target = target;
279 pb->pb_file_offset = range_base;
280 /*
281 * Set buffer_length and count_desired to the same value initially.
282 * I/O routines should use count_desired, which will be the same in
283 * most cases but may be reset (e.g. XFS recovery).
284 */
285 pb->pb_buffer_length = pb->pb_count_desired = range_length;
286 pb->pb_flags = flags | PBF_NONE;
287 pb->pb_bn = XFS_BUF_DADDR_NULL;
288 atomic_set(&pb->pb_pin_count, 0);
289 init_waitqueue_head(&pb->pb_waiters);
290
291 XFS_STATS_INC(pb_create);
292 PB_TRACE(pb, "initialize", target);
293}
294
295/*
296 * Allocate a page array capable of holding a specified number
297 * of pages, and point the page buf at it.
298 */
299STATIC int
300_pagebuf_get_pages(
301 xfs_buf_t *pb,
302 int page_count,
303 page_buf_flags_t flags)
304{
305 /* Make sure that we have a page list */
306 if (pb->pb_pages == NULL) {
307 pb->pb_offset = page_buf_poff(pb->pb_file_offset);
308 pb->pb_page_count = page_count;
309 if (page_count <= PB_PAGES) {
310 pb->pb_pages = pb->pb_page_array;
311 } else {
312 pb->pb_pages = kmem_alloc(sizeof(struct page *) *
313 page_count, pb_to_km(flags));
314 if (pb->pb_pages == NULL)
315 return -ENOMEM;
316 }
317 memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
318 }
319 return 0;
320}
321
322/*
323 * Frees pb_pages if it was malloced.
324 */
325STATIC void
326_pagebuf_free_pages(
327 xfs_buf_t *bp)
328{
329 if (bp->pb_pages != bp->pb_page_array) {
330 kmem_free(bp->pb_pages,
331 bp->pb_page_count * sizeof(struct page *));
332 }
333}
334
335/*
336 * Releases the specified buffer.
337 *
338 * The modification state of any associated pages is left unchanged.
339 * The buffer most not be on any hash - use pagebuf_rele instead for
340 * hashed and refcounted buffers
341 */
342void
343pagebuf_free(
344 xfs_buf_t *bp)
345{
346 PB_TRACE(bp, "free", 0);
347
348 ASSERT(list_empty(&bp->pb_hash_list));
349
350 if (bp->pb_flags & _PBF_PAGE_CACHE) {
351 uint i;
352
353 if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
354 free_address(bp->pb_addr - bp->pb_offset);
355
356 for (i = 0; i < bp->pb_page_count; i++)
357 page_cache_release(bp->pb_pages[i]);
358 _pagebuf_free_pages(bp);
359 } else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
360 /*
361 * XXX(hch): bp->pb_count_desired might be incorrect (see
362 * pagebuf_associate_memory for details), but fortunately
363 * the Linux version of kmem_free ignores the len argument..
364 */
365 kmem_free(bp->pb_addr, bp->pb_count_desired);
366 _pagebuf_free_pages(bp);
367 }
368
369 pagebuf_deallocate(bp);
370}
371
372/*
373 * Finds all pages for buffer in question and builds it's page list.
374 */
375STATIC int
376_pagebuf_lookup_pages(
377 xfs_buf_t *bp,
378 uint flags)
379{
380 struct address_space *mapping = bp->pb_target->pbr_mapping;
381 size_t blocksize = bp->pb_target->pbr_bsize;
382 size_t size = bp->pb_count_desired;
383 size_t nbytes, offset;
384 int gfp_mask = pb_to_gfp(flags);
385 unsigned short page_count, i;
386 pgoff_t first;
387 loff_t end;
388 int error;
389
390 end = bp->pb_file_offset + bp->pb_buffer_length;
391 page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
392
393 error = _pagebuf_get_pages(bp, page_count, flags);
394 if (unlikely(error))
395 return error;
396 bp->pb_flags |= _PBF_PAGE_CACHE;
397
398 offset = bp->pb_offset;
399 first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
400
401 for (i = 0; i < bp->pb_page_count; i++) {
402 struct page *page;
403 uint retries = 0;
404
405 retry:
406 page = find_or_create_page(mapping, first + i, gfp_mask);
407 if (unlikely(page == NULL)) {
408 if (flags & PBF_READ_AHEAD) {
409 bp->pb_page_count = i;
410 for (i = 0; i < bp->pb_page_count; i++)
411 unlock_page(bp->pb_pages[i]);
412 return -ENOMEM;
413 }
414
415 /*
416 * This could deadlock.
417 *
418 * But until all the XFS lowlevel code is revamped to
419 * handle buffer allocation failures we can't do much.
420 */
421 if (!(++retries % 100))
422 printk(KERN_ERR
423 "XFS: possible memory allocation "
424 "deadlock in %s (mode:0x%x)\n",
425 __FUNCTION__, gfp_mask);
426
427 XFS_STATS_INC(pb_page_retries);
428 pagebuf_daemon_wakeup(0, gfp_mask);
429 blk_congestion_wait(WRITE, HZ/50);
430 goto retry;
431 }
432
433 XFS_STATS_INC(pb_page_found);
434
435 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
436 size -= nbytes;
437
438 if (!PageUptodate(page)) {
439 page_count--;
440 if (blocksize >= PAGE_CACHE_SIZE) {
441 if (flags & PBF_READ)
442 bp->pb_locked = 1;
443 } else if (!PagePrivate(page)) {
444 if (test_page_region(page, offset, nbytes))
445 page_count++;
446 }
447 }
448
449 bp->pb_pages[i] = page;
450 offset = 0;
451 }
452
453 if (!bp->pb_locked) {
454 for (i = 0; i < bp->pb_page_count; i++)
455 unlock_page(bp->pb_pages[i]);
456 }
457
458 if (page_count) {
459 /* if we have any uptodate pages, mark that in the buffer */
460 bp->pb_flags &= ~PBF_NONE;
461
462 /* if some pages aren't uptodate, mark that in the buffer */
463 if (page_count != bp->pb_page_count)
464 bp->pb_flags |= PBF_PARTIAL;
465 }
466
467 PB_TRACE(bp, "lookup_pages", (long)page_count);
468 return error;
469}
470
471/*
472 * Map buffer into kernel address-space if nessecary.
473 */
474STATIC int
475_pagebuf_map_pages(
476 xfs_buf_t *bp,
477 uint flags)
478{
479 /* A single page buffer is always mappable */
480 if (bp->pb_page_count == 1) {
481 bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
482 bp->pb_flags |= PBF_MAPPED;
483 } else if (flags & PBF_MAPPED) {
484 if (as_list_len > 64)
485 purge_addresses();
486 bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
487 VM_MAP, PAGE_KERNEL);
488 if (unlikely(bp->pb_addr == NULL))
489 return -ENOMEM;
490 bp->pb_addr += bp->pb_offset;
491 bp->pb_flags |= PBF_MAPPED;
492 }
493
494 return 0;
495}
496
497/*
498 * Finding and Reading Buffers
499 */
500
501/*
502 * _pagebuf_find
503 *
504 * Looks up, and creates if absent, a lockable buffer for
505 * a given range of an inode. The buffer is returned
506 * locked. If other overlapping buffers exist, they are
507 * released before the new buffer is created and locked,
508 * which may imply that this call will block until those buffers
509 * are unlocked. No I/O is implied by this call.
510 */
511xfs_buf_t *
512_pagebuf_find(
513 xfs_buftarg_t *btp, /* block device target */
514 loff_t ioff, /* starting offset of range */
515 size_t isize, /* length of range */
516 page_buf_flags_t flags, /* PBF_TRYLOCK */
517 xfs_buf_t *new_pb)/* newly allocated buffer */
518{
519 loff_t range_base;
520 size_t range_length;
521 xfs_bufhash_t *hash;
522 xfs_buf_t *pb, *n;
523
524 range_base = (ioff << BBSHIFT);
525 range_length = (isize << BBSHIFT);
526
527 /* Check for IOs smaller than the sector size / not sector aligned */
528 ASSERT(!(range_length < (1 << btp->pbr_sshift)));
529 ASSERT(!(range_base & (loff_t)btp->pbr_smask));
530
531 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
532
533 spin_lock(&hash->bh_lock);
534
535 list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
536 ASSERT(btp == pb->pb_target);
537 if (pb->pb_file_offset == range_base &&
538 pb->pb_buffer_length == range_length) {
539 /*
540 * If we look at something bring it to the
541 * front of the list for next time.
542 */
543 atomic_inc(&pb->pb_hold);
544 list_move(&pb->pb_hash_list, &hash->bh_list);
545 goto found;
546 }
547 }
548
549 /* No match found */
550 if (new_pb) {
551 _pagebuf_initialize(new_pb, btp, range_base,
552 range_length, flags);
553 new_pb->pb_hash = hash;
554 list_add(&new_pb->pb_hash_list, &hash->bh_list);
555 } else {
556 XFS_STATS_INC(pb_miss_locked);
557 }
558
559 spin_unlock(&hash->bh_lock);
560 return new_pb;
561
562found:
563 spin_unlock(&hash->bh_lock);
564
565 /* Attempt to get the semaphore without sleeping,
566 * if this does not work then we need to drop the
567 * spinlock and do a hard attempt on the semaphore.
568 */
569 if (down_trylock(&pb->pb_sema)) {
570 if (!(flags & PBF_TRYLOCK)) {
571 /* wait for buffer ownership */
572 PB_TRACE(pb, "get_lock", 0);
573 pagebuf_lock(pb);
574 XFS_STATS_INC(pb_get_locked_waited);
575 } else {
576 /* We asked for a trylock and failed, no need
577 * to look at file offset and length here, we
578 * know that this pagebuf at least overlaps our
579 * pagebuf and is locked, therefore our buffer
580 * either does not exist, or is this buffer
581 */
582
583 pagebuf_rele(pb);
584 XFS_STATS_INC(pb_busy_locked);
585 return (NULL);
586 }
587 } else {
588 /* trylock worked */
589 PB_SET_OWNER(pb);
590 }
591
592 if (pb->pb_flags & PBF_STALE)
593 pb->pb_flags &= PBF_MAPPED;
594 PB_TRACE(pb, "got_lock", 0);
595 XFS_STATS_INC(pb_get_locked);
596 return (pb);
597}
598
599/*
600 * xfs_buf_get_flags assembles a buffer covering the specified range.
601 *
602 * Storage in memory for all portions of the buffer will be allocated,
603 * although backing storage may not be.
604 */
605xfs_buf_t *
606xfs_buf_get_flags( /* allocate a buffer */
607 xfs_buftarg_t *target,/* target for buffer */
608 loff_t ioff, /* starting offset of range */
609 size_t isize, /* length of range */
610 page_buf_flags_t flags) /* PBF_TRYLOCK */
611{
612 xfs_buf_t *pb, *new_pb;
613 int error = 0, i;
614
615 new_pb = pagebuf_allocate(flags);
616 if (unlikely(!new_pb))
617 return NULL;
618
619 pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
620 if (pb == new_pb) {
621 error = _pagebuf_lookup_pages(pb, flags);
622 if (error)
623 goto no_buffer;
624 } else {
625 pagebuf_deallocate(new_pb);
626 if (unlikely(pb == NULL))
627 return NULL;
628 }
629
630 for (i = 0; i < pb->pb_page_count; i++)
631 mark_page_accessed(pb->pb_pages[i]);
632
633 if (!(pb->pb_flags & PBF_MAPPED)) {
634 error = _pagebuf_map_pages(pb, flags);
635 if (unlikely(error)) {
636 printk(KERN_WARNING "%s: failed to map pages\n",
637 __FUNCTION__);
638 goto no_buffer;
639 }
640 }
641
642 XFS_STATS_INC(pb_get);
643
644 /*
645 * Always fill in the block number now, the mapped cases can do
646 * their own overlay of this later.
647 */
648 pb->pb_bn = ioff;
649 pb->pb_count_desired = pb->pb_buffer_length;
650
651 PB_TRACE(pb, "get", (unsigned long)flags);
652 return pb;
653
654 no_buffer:
655 if (flags & (PBF_LOCK | PBF_TRYLOCK))
656 pagebuf_unlock(pb);
657 pagebuf_rele(pb);
658 return NULL;
659}
660
661xfs_buf_t *
662xfs_buf_read_flags(
663 xfs_buftarg_t *target,
664 loff_t ioff,
665 size_t isize,
666 page_buf_flags_t flags)
667{
668 xfs_buf_t *pb;
669
670 flags |= PBF_READ;
671
672 pb = xfs_buf_get_flags(target, ioff, isize, flags);
673 if (pb) {
674 if (PBF_NOT_DONE(pb)) {
675 PB_TRACE(pb, "read", (unsigned long)flags);
676 XFS_STATS_INC(pb_get_read);
677 pagebuf_iostart(pb, flags);
678 } else if (flags & PBF_ASYNC) {
679 PB_TRACE(pb, "read_async", (unsigned long)flags);
680 /*
681 * Read ahead call which is already satisfied,
682 * drop the buffer
683 */
684 goto no_buffer;
685 } else {
686 PB_TRACE(pb, "read_done", (unsigned long)flags);
687 /* We do not want read in the flags */
688 pb->pb_flags &= ~PBF_READ;
689 }
690 }
691
692 return pb;
693
694 no_buffer:
695 if (flags & (PBF_LOCK | PBF_TRYLOCK))
696 pagebuf_unlock(pb);
697 pagebuf_rele(pb);
698 return NULL;
699}
700
701/*
702 * Create a skeletal pagebuf (no pages associated with it).
703 */
704xfs_buf_t *
705pagebuf_lookup(
706 xfs_buftarg_t *target,
707 loff_t ioff,
708 size_t isize,
709 page_buf_flags_t flags)
710{
711 xfs_buf_t *pb;
712
713 pb = pagebuf_allocate(flags);
714 if (pb) {
715 _pagebuf_initialize(pb, target, ioff, isize, flags);
716 }
717 return pb;
718}
719
720/*
721 * If we are not low on memory then do the readahead in a deadlock
722 * safe manner.
723 */
724void
725pagebuf_readahead(
726 xfs_buftarg_t *target,
727 loff_t ioff,
728 size_t isize,
729 page_buf_flags_t flags)
730{
731 struct backing_dev_info *bdi;
732
733 bdi = target->pbr_mapping->backing_dev_info;
734 if (bdi_read_congested(bdi))
735 return;
736
737 flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
738 xfs_buf_read_flags(target, ioff, isize, flags);
739}
740
741xfs_buf_t *
742pagebuf_get_empty(
743 size_t len,
744 xfs_buftarg_t *target)
745{
746 xfs_buf_t *pb;
747
748 pb = pagebuf_allocate(0);
749 if (pb)
750 _pagebuf_initialize(pb, target, 0, len, 0);
751 return pb;
752}
753
754static inline struct page *
755mem_to_page(
756 void *addr)
757{
758 if (((unsigned long)addr < VMALLOC_START) ||
759 ((unsigned long)addr >= VMALLOC_END)) {
760 return virt_to_page(addr);
761 } else {
762 return vmalloc_to_page(addr);
763 }
764}
765
766int
767pagebuf_associate_memory(
768 xfs_buf_t *pb,
769 void *mem,
770 size_t len)
771{
772 int rval;
773 int i = 0;
774 size_t ptr;
775 size_t end, end_cur;
776 off_t offset;
777 int page_count;
778
779 page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
780 offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
781 if (offset && (len > PAGE_CACHE_SIZE))
782 page_count++;
783
784 /* Free any previous set of page pointers */
785 if (pb->pb_pages)
786 _pagebuf_free_pages(pb);
787
788 pb->pb_pages = NULL;
789 pb->pb_addr = mem;
790
791 rval = _pagebuf_get_pages(pb, page_count, 0);
792 if (rval)
793 return rval;
794
795 pb->pb_offset = offset;
796 ptr = (size_t) mem & PAGE_CACHE_MASK;
797 end = PAGE_CACHE_ALIGN((size_t) mem + len);
798 end_cur = end;
799 /* set up first page */
800 pb->pb_pages[0] = mem_to_page(mem);
801
802 ptr += PAGE_CACHE_SIZE;
803 pb->pb_page_count = ++i;
804 while (ptr < end) {
805 pb->pb_pages[i] = mem_to_page((void *)ptr);
806 pb->pb_page_count = ++i;
807 ptr += PAGE_CACHE_SIZE;
808 }
809 pb->pb_locked = 0;
810
811 pb->pb_count_desired = pb->pb_buffer_length = len;
812 pb->pb_flags |= PBF_MAPPED;
813
814 return 0;
815}
816
817xfs_buf_t *
818pagebuf_get_no_daddr(
819 size_t len,
820 xfs_buftarg_t *target)
821{
822 size_t malloc_len = len;
823 xfs_buf_t *bp;
824 void *data;
825 int error;
826
827 bp = pagebuf_allocate(0);
828 if (unlikely(bp == NULL))
829 goto fail;
830 _pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO);
831
832 try_again:
833 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
834 if (unlikely(data == NULL))
835 goto fail_free_buf;
836
837 /* check whether alignment matches.. */
838 if ((__psunsigned_t)data !=
839 ((__psunsigned_t)data & ~target->pbr_smask)) {
840 /* .. else double the size and try again */
841 kmem_free(data, malloc_len);
842 malloc_len <<= 1;
843 goto try_again;
844 }
845
846 error = pagebuf_associate_memory(bp, data, len);
847 if (error)
848 goto fail_free_mem;
849 bp->pb_flags |= _PBF_KMEM_ALLOC;
850
851 pagebuf_unlock(bp);
852
853 PB_TRACE(bp, "no_daddr", data);
854 return bp;
855 fail_free_mem:
856 kmem_free(data, malloc_len);
857 fail_free_buf:
858 pagebuf_free(bp);
859 fail:
860 return NULL;
861}
862
863/*
864 * pagebuf_hold
865 *
866 * Increment reference count on buffer, to hold the buffer concurrently
867 * with another thread which may release (free) the buffer asynchronously.
868 *
869 * Must hold the buffer already to call this function.
870 */
871void
872pagebuf_hold(
873 xfs_buf_t *pb)
874{
875 atomic_inc(&pb->pb_hold);
876 PB_TRACE(pb, "hold", 0);
877}
878
879/*
880 * pagebuf_rele
881 *
882 * pagebuf_rele releases a hold on the specified buffer. If the
883 * the hold count is 1, pagebuf_rele calls pagebuf_free.
884 */
885void
886pagebuf_rele(
887 xfs_buf_t *pb)
888{
889 xfs_bufhash_t *hash = pb->pb_hash;
890
891 PB_TRACE(pb, "rele", pb->pb_relse);
892
893 /*
894 * pagebuf_lookup buffers are not hashed, not delayed write,
895 * and don't have their own release routines. Special case.
896 */
897 if (unlikely(!hash)) {
898 ASSERT(!pb->pb_relse);
899 if (atomic_dec_and_test(&pb->pb_hold))
900 xfs_buf_free(pb);
901 return;
902 }
903
904 if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
905 int do_free = 1;
906
907 if (pb->pb_relse) {
908 atomic_inc(&pb->pb_hold);
909 spin_unlock(&hash->bh_lock);
910 (*(pb->pb_relse)) (pb);
911 spin_lock(&hash->bh_lock);
912 do_free = 0;
913 }
914
915 if (pb->pb_flags & PBF_DELWRI) {
916 pb->pb_flags |= PBF_ASYNC;
917 atomic_inc(&pb->pb_hold);
918 pagebuf_delwri_queue(pb, 0);
919 do_free = 0;
920 } else if (pb->pb_flags & PBF_FS_MANAGED) {
921 do_free = 0;
922 }
923
924 if (do_free) {
925 list_del_init(&pb->pb_hash_list);
926 spin_unlock(&hash->bh_lock);
927 pagebuf_free(pb);
928 } else {
929 spin_unlock(&hash->bh_lock);
930 }
931 }
932}
933
934
935/*
936 * Mutual exclusion on buffers. Locking model:
937 *
938 * Buffers associated with inodes for which buffer locking
939 * is not enabled are not protected by semaphores, and are
940 * assumed to be exclusively owned by the caller. There is a
941 * spinlock in the buffer, used by the caller when concurrent
942 * access is possible.
943 */
944
945/*
946 * pagebuf_cond_lock
947 *
948 * pagebuf_cond_lock locks a buffer object, if it is not already locked.
949 * Note that this in no way
950 * locks the underlying pages, so it is only useful for synchronizing
951 * concurrent use of page buffer objects, not for synchronizing independent
952 * access to the underlying pages.
953 */
954int
955pagebuf_cond_lock( /* lock buffer, if not locked */
956 /* returns -EBUSY if locked) */
957 xfs_buf_t *pb)
958{
959 int locked;
960
961 locked = down_trylock(&pb->pb_sema) == 0;
962 if (locked) {
963 PB_SET_OWNER(pb);
964 }
965 PB_TRACE(pb, "cond_lock", (long)locked);
966 return(locked ? 0 : -EBUSY);
967}
968
969#if defined(DEBUG) || defined(XFS_BLI_TRACE)
970/*
971 * pagebuf_lock_value
972 *
973 * Return lock value for a pagebuf
974 */
975int
976pagebuf_lock_value(
977 xfs_buf_t *pb)
978{
979 return(atomic_read(&pb->pb_sema.count));
980}
981#endif
982
983/*
984 * pagebuf_lock
985 *
986 * pagebuf_lock locks a buffer object. Note that this in no way
987 * locks the underlying pages, so it is only useful for synchronizing
988 * concurrent use of page buffer objects, not for synchronizing independent
989 * access to the underlying pages.
990 */
991int
992pagebuf_lock(
993 xfs_buf_t *pb)
994{
995 PB_TRACE(pb, "lock", 0);
996 if (atomic_read(&pb->pb_io_remaining))
997 blk_run_address_space(pb->pb_target->pbr_mapping);
998 down(&pb->pb_sema);
999 PB_SET_OWNER(pb);
1000 PB_TRACE(pb, "locked", 0);
1001 return 0;
1002}
1003
1004/*
1005 * pagebuf_unlock
1006 *
1007 * pagebuf_unlock releases the lock on the buffer object created by
1008 * pagebuf_lock or pagebuf_cond_lock (not any
1009 * pinning of underlying pages created by pagebuf_pin).
1010 */
1011void
1012pagebuf_unlock( /* unlock buffer */
1013 xfs_buf_t *pb) /* buffer to unlock */
1014{
1015 PB_CLEAR_OWNER(pb);
1016 up(&pb->pb_sema);
1017 PB_TRACE(pb, "unlock", 0);
1018}
1019
1020
1021/*
1022 * Pinning Buffer Storage in Memory
1023 */
1024
1025/*
1026 * pagebuf_pin
1027 *
1028 * pagebuf_pin locks all of the memory represented by a buffer in
1029 * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for
1030 * the same or different buffers affecting a given page, will
1031 * properly count the number of outstanding "pin" requests. The
1032 * buffer may be released after the pagebuf_pin and a different
1033 * buffer used when calling pagebuf_unpin, if desired.
1034 * pagebuf_pin should be used by the file system when it wants be
1035 * assured that no attempt will be made to force the affected
1036 * memory to disk. It does not assure that a given logical page
1037 * will not be moved to a different physical page.
1038 */
1039void
1040pagebuf_pin(
1041 xfs_buf_t *pb)
1042{
1043 atomic_inc(&pb->pb_pin_count);
1044 PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
1045}
1046
1047/*
1048 * pagebuf_unpin
1049 *
1050 * pagebuf_unpin reverses the locking of memory performed by
1051 * pagebuf_pin. Note that both functions affected the logical
1052 * pages associated with the buffer, not the buffer itself.
1053 */
1054void
1055pagebuf_unpin(
1056 xfs_buf_t *pb)
1057{
1058 if (atomic_dec_and_test(&pb->pb_pin_count)) {
1059 wake_up_all(&pb->pb_waiters);
1060 }
1061 PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
1062}
1063
1064int
1065pagebuf_ispin(
1066 xfs_buf_t *pb)
1067{
1068 return atomic_read(&pb->pb_pin_count);
1069}
1070
1071/*
1072 * pagebuf_wait_unpin
1073 *
1074 * pagebuf_wait_unpin waits until all of the memory associated
1075 * with the buffer is not longer locked in memory. It returns
1076 * immediately if none of the affected pages are locked.
1077 */
1078static inline void
1079_pagebuf_wait_unpin(
1080 xfs_buf_t *pb)
1081{
1082 DECLARE_WAITQUEUE (wait, current);
1083
1084 if (atomic_read(&pb->pb_pin_count) == 0)
1085 return;
1086
1087 add_wait_queue(&pb->pb_waiters, &wait);
1088 for (;;) {
1089 set_current_state(TASK_UNINTERRUPTIBLE);
1090 if (atomic_read(&pb->pb_pin_count) == 0)
1091 break;
1092 if (atomic_read(&pb->pb_io_remaining))
1093 blk_run_address_space(pb->pb_target->pbr_mapping);
1094 schedule();
1095 }
1096 remove_wait_queue(&pb->pb_waiters, &wait);
1097 set_current_state(TASK_RUNNING);
1098}
1099
1100/*
1101 * Buffer Utility Routines
1102 */
1103
1104/*
1105 * pagebuf_iodone
1106 *
1107 * pagebuf_iodone marks a buffer for which I/O is in progress
1108 * done with respect to that I/O. The pb_iodone routine, if
1109 * present, will be called as a side-effect.
1110 */
1111STATIC void
1112pagebuf_iodone_work(
1113 void *v)
1114{
1115 xfs_buf_t *bp = (xfs_buf_t *)v;
1116
1117 if (bp->pb_iodone)
1118 (*(bp->pb_iodone))(bp);
1119 else if (bp->pb_flags & PBF_ASYNC)
1120 xfs_buf_relse(bp);
1121}
1122
1123void
1124pagebuf_iodone(
1125 xfs_buf_t *pb,
1126 int dataio,
1127 int schedule)
1128{
1129 pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
1130 if (pb->pb_error == 0) {
1131 pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE);
1132 }
1133
1134 PB_TRACE(pb, "iodone", pb->pb_iodone);
1135
1136 if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
1137 if (schedule) {
1138 INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
1139 queue_work(dataio ? pagebuf_dataio_workqueue :
1140 pagebuf_logio_workqueue, &pb->pb_iodone_work);
1141 } else {
1142 pagebuf_iodone_work(pb);
1143 }
1144 } else {
1145 up(&pb->pb_iodonesema);
1146 }
1147}
1148
1149/*
1150 * pagebuf_ioerror
1151 *
1152 * pagebuf_ioerror sets the error code for a buffer.
1153 */
1154void
1155pagebuf_ioerror( /* mark/clear buffer error flag */
1156 xfs_buf_t *pb, /* buffer to mark */
1157 int error) /* error to store (0 if none) */
1158{
1159 ASSERT(error >= 0 && error <= 0xffff);
1160 pb->pb_error = (unsigned short)error;
1161 PB_TRACE(pb, "ioerror", (unsigned long)error);
1162}
1163
1164/*
1165 * pagebuf_iostart
1166 *
1167 * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
1168 * If necessary, it will arrange for any disk space allocation required,
1169 * and it will break up the request if the block mappings require it.
1170 * The pb_iodone routine in the buffer supplied will only be called
1171 * when all of the subsidiary I/O requests, if any, have been completed.
1172 * pagebuf_iostart calls the pagebuf_ioinitiate routine or
1173 * pagebuf_iorequest, if the former routine is not defined, to start
1174 * the I/O on a given low-level request.
1175 */
1176int
1177pagebuf_iostart( /* start I/O on a buffer */
1178 xfs_buf_t *pb, /* buffer to start */
1179 page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */
1180 /* PBF_WRITE, PBF_DELWRI, */
1181 /* PBF_DONT_BLOCK */
1182{
1183 int status = 0;
1184
1185 PB_TRACE(pb, "iostart", (unsigned long)flags);
1186
1187 if (flags & PBF_DELWRI) {
1188 pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
1189 pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);
1190 pagebuf_delwri_queue(pb, 1);
1191 return status;
1192 }
1193
1194 pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
1195 PBF_READ_AHEAD | _PBF_RUN_QUEUES);
1196 pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
1197 PBF_READ_AHEAD | _PBF_RUN_QUEUES);
1198
1199 BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
1200
1201 /* For writes allow an alternate strategy routine to precede
1202 * the actual I/O request (which may not be issued at all in
1203 * a shutdown situation, for example).
1204 */
1205 status = (flags & PBF_WRITE) ?
1206 pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
1207
1208 /* Wait for I/O if we are not an async request.
1209 * Note: async I/O request completion will release the buffer,
1210 * and that can already be done by this point. So using the
1211 * buffer pointer from here on, after async I/O, is invalid.
1212 */
1213 if (!status && !(flags & PBF_ASYNC))
1214 status = pagebuf_iowait(pb);
1215
1216 return status;
1217}
1218
1219/*
1220 * Helper routine for pagebuf_iorequest
1221 */
1222
1223STATIC __inline__ int
1224_pagebuf_iolocked(
1225 xfs_buf_t *pb)
1226{
1227 ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
1228 if (pb->pb_flags & PBF_READ)
1229 return pb->pb_locked;
1230 return 0;
1231}
1232
1233STATIC __inline__ void
1234_pagebuf_iodone(
1235 xfs_buf_t *pb,
1236 int schedule)
1237{
1238 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
1239 pb->pb_locked = 0;
1240 pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);
1241 }
1242}
1243
1244STATIC int
1245bio_end_io_pagebuf(
1246 struct bio *bio,
1247 unsigned int bytes_done,
1248 int error)
1249{
1250 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private;
1251 unsigned int i, blocksize = pb->pb_target->pbr_bsize;
1252 struct bio_vec *bvec = bio->bi_io_vec;
1253
1254 if (bio->bi_size)
1255 return 1;
1256
1257 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1258 pb->pb_error = EIO;
1259
1260 for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
1261 struct page *page = bvec->bv_page;
1262
1263 if (pb->pb_error) {
1264 SetPageError(page);
1265 } else if (blocksize == PAGE_CACHE_SIZE) {
1266 SetPageUptodate(page);
1267 } else if (!PagePrivate(page) &&
1268 (pb->pb_flags & _PBF_PAGE_CACHE)) {
1269 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1270 }
1271
1272 if (_pagebuf_iolocked(pb)) {
1273 unlock_page(page);
1274 }
1275 }
1276
1277 _pagebuf_iodone(pb, 1);
1278 bio_put(bio);
1279 return 0;
1280}
1281
1282STATIC void
1283_pagebuf_ioapply(
1284 xfs_buf_t *pb)
1285{
1286 int i, rw, map_i, total_nr_pages, nr_pages;
1287 struct bio *bio;
1288 int offset = pb->pb_offset;
1289 int size = pb->pb_count_desired;
1290 sector_t sector = pb->pb_bn;
1291 unsigned int blocksize = pb->pb_target->pbr_bsize;
1292 int locking = _pagebuf_iolocked(pb);
1293
1294 total_nr_pages = pb->pb_page_count;
1295 map_i = 0;
1296
1297 if (pb->pb_flags & _PBF_RUN_QUEUES) {
1298 pb->pb_flags &= ~_PBF_RUN_QUEUES;
1299 rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC;
1300 } else {
1301 rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
1302 }
1303
1304 /* Special code path for reading a sub page size pagebuf in --
1305 * we populate up the whole page, and hence the other metadata
1306 * in the same page. This optimization is only valid when the
1307 * filesystem block size and the page size are equal.
1308 */
1309 if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
1310 (pb->pb_flags & PBF_READ) && locking &&
1311 (blocksize == PAGE_CACHE_SIZE)) {
1312 bio = bio_alloc(GFP_NOIO, 1);
1313
1314 bio->bi_bdev = pb->pb_target->pbr_bdev;
1315 bio->bi_sector = sector - (offset >> BBSHIFT);
1316 bio->bi_end_io = bio_end_io_pagebuf;
1317 bio->bi_private = pb;
1318
1319 bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
1320 size = 0;
1321
1322 atomic_inc(&pb->pb_io_remaining);
1323
1324 goto submit_io;
1325 }
1326
1327 /* Lock down the pages which we need to for the request */
1328 if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
1329 for (i = 0; size; i++) {
1330 int nbytes = PAGE_CACHE_SIZE - offset;
1331 struct page *page = pb->pb_pages[i];
1332
1333 if (nbytes > size)
1334 nbytes = size;
1335
1336 lock_page(page);
1337
1338 size -= nbytes;
1339 offset = 0;
1340 }
1341 offset = pb->pb_offset;
1342 size = pb->pb_count_desired;
1343 }
1344
1345next_chunk:
1346 atomic_inc(&pb->pb_io_remaining);
1347 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
1348 if (nr_pages > total_nr_pages)
1349 nr_pages = total_nr_pages;
1350
1351 bio = bio_alloc(GFP_NOIO, nr_pages);
1352 bio->bi_bdev = pb->pb_target->pbr_bdev;
1353 bio->bi_sector = sector;
1354 bio->bi_end_io = bio_end_io_pagebuf;
1355 bio->bi_private = pb;
1356
1357 for (; size && nr_pages; nr_pages--, map_i++) {
1358 int nbytes = PAGE_CACHE_SIZE - offset;
1359
1360 if (nbytes > size)
1361 nbytes = size;
1362
1363 if (bio_add_page(bio, pb->pb_pages[map_i],
1364 nbytes, offset) < nbytes)
1365 break;
1366
1367 offset = 0;
1368 sector += nbytes >> BBSHIFT;
1369 size -= nbytes;
1370 total_nr_pages--;
1371 }
1372
1373submit_io:
1374 if (likely(bio->bi_size)) {
1375 submit_bio(rw, bio);
1376 if (size)
1377 goto next_chunk;
1378 } else {
1379 bio_put(bio);
1380 pagebuf_ioerror(pb, EIO);
1381 }
1382}
1383
1384/*
1385 * pagebuf_iorequest -- the core I/O request routine.
1386 */
1387int
1388pagebuf_iorequest( /* start real I/O */
1389 xfs_buf_t *pb) /* buffer to convey to device */
1390{
1391 PB_TRACE(pb, "iorequest", 0);
1392
1393 if (pb->pb_flags & PBF_DELWRI) {
1394 pagebuf_delwri_queue(pb, 1);
1395 return 0;
1396 }
1397
1398 if (pb->pb_flags & PBF_WRITE) {
1399 _pagebuf_wait_unpin(pb);
1400 }
1401
1402 pagebuf_hold(pb);
1403
1404 /* Set the count to 1 initially, this will stop an I/O
1405 * completion callout which happens before we have started
1406 * all the I/O from calling pagebuf_iodone too early.
1407 */
1408 atomic_set(&pb->pb_io_remaining, 1);
1409 _pagebuf_ioapply(pb);
1410 _pagebuf_iodone(pb, 0);
1411
1412 pagebuf_rele(pb);
1413 return 0;
1414}
1415
1416/*
1417 * pagebuf_iowait
1418 *
1419 * pagebuf_iowait waits for I/O to complete on the buffer supplied.
1420 * It returns immediately if no I/O is pending. In any case, it returns
1421 * the error code, if any, or 0 if there is no error.
1422 */
1423int
1424pagebuf_iowait(
1425 xfs_buf_t *pb)
1426{
1427 PB_TRACE(pb, "iowait", 0);
1428 if (atomic_read(&pb->pb_io_remaining))
1429 blk_run_address_space(pb->pb_target->pbr_mapping);
1430 down(&pb->pb_iodonesema);
1431 PB_TRACE(pb, "iowaited", (long)pb->pb_error);
1432 return pb->pb_error;
1433}
1434
1435caddr_t
1436pagebuf_offset(
1437 xfs_buf_t *pb,
1438 size_t offset)
1439{
1440 struct page *page;
1441
1442 offset += pb->pb_offset;
1443
1444 page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
1445 return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
1446}
1447
1448/*
1449 * pagebuf_iomove
1450 *
1451 * Move data into or out of a buffer.
1452 */
1453void
1454pagebuf_iomove(
1455 xfs_buf_t *pb, /* buffer to process */
1456 size_t boff, /* starting buffer offset */
1457 size_t bsize, /* length to copy */
1458 caddr_t data, /* data address */
1459 page_buf_rw_t mode) /* read/write flag */
1460{
1461 size_t bend, cpoff, csize;
1462 struct page *page;
1463
1464 bend = boff + bsize;
1465 while (boff < bend) {
1466 page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
1467 cpoff = page_buf_poff(boff + pb->pb_offset);
1468 csize = min_t(size_t,
1469 PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
1470
1471 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
1472
1473 switch (mode) {
1474 case PBRW_ZERO:
1475 memset(page_address(page) + cpoff, 0, csize);
1476 break;
1477 case PBRW_READ:
1478 memcpy(data, page_address(page) + cpoff, csize);
1479 break;
1480 case PBRW_WRITE:
1481 memcpy(page_address(page) + cpoff, data, csize);
1482 }
1483
1484 boff += csize;
1485 data += csize;
1486 }
1487}
1488
1489/*
1490 * Handling of buftargs.
1491 */
1492
1493/*
1494 * Wait for any bufs with callbacks that have been submitted but
1495 * have not yet returned... walk the hash list for the target.
1496 */
1497void
1498xfs_wait_buftarg(
1499 xfs_buftarg_t *btp)
1500{
1501 xfs_buf_t *bp, *n;
1502 xfs_bufhash_t *hash;
1503 uint i;
1504
1505 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1506 hash = &btp->bt_hash[i];
1507again:
1508 spin_lock(&hash->bh_lock);
1509 list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
1510 ASSERT(btp == bp->pb_target);
1511 if (!(bp->pb_flags & PBF_FS_MANAGED)) {
1512 spin_unlock(&hash->bh_lock);
1513 delay(100);
1514 goto again;
1515 }
1516 }
1517 spin_unlock(&hash->bh_lock);
1518 }
1519}
1520
1521/*
1522 * Allocate buffer hash table for a given target.
1523 * For devices containing metadata (i.e. not the log/realtime devices)
1524 * we need to allocate a much larger hash table.
1525 */
1526STATIC void
1527xfs_alloc_bufhash(
1528 xfs_buftarg_t *btp,
1529 int external)
1530{
1531 unsigned int i;
1532
1533 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
1534 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
1535 btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
1536 sizeof(xfs_bufhash_t), KM_SLEEP);
1537 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1538 spin_lock_init(&btp->bt_hash[i].bh_lock);
1539 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
1540 }
1541}
1542
1543STATIC void
1544xfs_free_bufhash(
1545 xfs_buftarg_t *btp)
1546{
1547 kmem_free(btp->bt_hash,
1548 (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
1549 btp->bt_hash = NULL;
1550}
1551
1552void
1553xfs_free_buftarg(
1554 xfs_buftarg_t *btp,
1555 int external)
1556{
1557 xfs_flush_buftarg(btp, 1);
1558 if (external)
1559 xfs_blkdev_put(btp->pbr_bdev);
1560 xfs_free_bufhash(btp);
1561 iput(btp->pbr_mapping->host);
1562 kmem_free(btp, sizeof(*btp));
1563}
1564
1565void
1566xfs_incore_relse(
1567 xfs_buftarg_t *btp,
1568 int delwri_only,
1569 int wait)
1570{
1571 invalidate_bdev(btp->pbr_bdev, 1);
1572 truncate_inode_pages(btp->pbr_mapping, 0LL);
1573}
1574
1575STATIC int
1576xfs_setsize_buftarg_flags(
1577 xfs_buftarg_t *btp,
1578 unsigned int blocksize,
1579 unsigned int sectorsize,
1580 int verbose)
1581{
1582 btp->pbr_bsize = blocksize;
1583 btp->pbr_sshift = ffs(sectorsize) - 1;
1584 btp->pbr_smask = sectorsize - 1;
1585
1586 if (set_blocksize(btp->pbr_bdev, sectorsize)) {
1587 printk(KERN_WARNING
1588 "XFS: Cannot set_blocksize to %u on device %s\n",
1589 sectorsize, XFS_BUFTARG_NAME(btp));
1590 return EINVAL;
1591 }
1592
1593 if (verbose &&
1594 (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
1595 printk(KERN_WARNING
1596 "XFS: %u byte sectors in use on device %s. "
1597 "This is suboptimal; %u or greater is ideal.\n",
1598 sectorsize, XFS_BUFTARG_NAME(btp),
1599 (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
1600 }
1601
1602 return 0;
1603}
1604
1605/*
1606* When allocating the initial buffer target we have not yet
1607* read in the superblock, so don't know what sized sectors
1608* are being used is at this early stage. Play safe.
1609*/
1610STATIC int
1611xfs_setsize_buftarg_early(
1612 xfs_buftarg_t *btp,
1613 struct block_device *bdev)
1614{
1615 return xfs_setsize_buftarg_flags(btp,
1616 PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
1617}
1618
1619int
1620xfs_setsize_buftarg(
1621 xfs_buftarg_t *btp,
1622 unsigned int blocksize,
1623 unsigned int sectorsize)
1624{
1625 return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
1626}
1627
1628STATIC int
1629xfs_mapping_buftarg(
1630 xfs_buftarg_t *btp,
1631 struct block_device *bdev)
1632{
1633 struct backing_dev_info *bdi;
1634 struct inode *inode;
1635 struct address_space *mapping;
1636 static struct address_space_operations mapping_aops = {
1637 .sync_page = block_sync_page,
1638 };
1639
1640 inode = new_inode(bdev->bd_inode->i_sb);
1641 if (!inode) {
1642 printk(KERN_WARNING
1643 "XFS: Cannot allocate mapping inode for device %s\n",
1644 XFS_BUFTARG_NAME(btp));
1645 return ENOMEM;
1646 }
1647 inode->i_mode = S_IFBLK;
1648 inode->i_bdev = bdev;
1649 inode->i_rdev = bdev->bd_dev;
1650 bdi = blk_get_backing_dev_info(bdev);
1651 if (!bdi)
1652 bdi = &default_backing_dev_info;
1653 mapping = &inode->i_data;
1654 mapping->a_ops = &mapping_aops;
1655 mapping->backing_dev_info = bdi;
1656 mapping_set_gfp_mask(mapping, GFP_NOFS);
1657 btp->pbr_mapping = mapping;
1658 return 0;
1659}
1660
1661xfs_buftarg_t *
1662xfs_alloc_buftarg(
1663 struct block_device *bdev,
1664 int external)
1665{
1666 xfs_buftarg_t *btp;
1667
1668 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
1669
1670 btp->pbr_dev = bdev->bd_dev;
1671 btp->pbr_bdev = bdev;
1672 if (xfs_setsize_buftarg_early(btp, bdev))
1673 goto error;
1674 if (xfs_mapping_buftarg(btp, bdev))
1675 goto error;
1676 xfs_alloc_bufhash(btp, external);
1677 return btp;
1678
1679error:
1680 kmem_free(btp, sizeof(*btp));
1681 return NULL;
1682}
1683
1684
1685/*
1686 * Pagebuf delayed write buffer handling
1687 */
1688
1689STATIC LIST_HEAD(pbd_delwrite_queue);
1690STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
1691
1692STATIC void
1693pagebuf_delwri_queue(
1694 xfs_buf_t *pb,
1695 int unlock)
1696{
1697 PB_TRACE(pb, "delwri_q", (long)unlock);
1698 ASSERT(pb->pb_flags & PBF_DELWRI);
1699
1700 spin_lock(&pbd_delwrite_lock);
1701 /* If already in the queue, dequeue and place at tail */
1702 if (!list_empty(&pb->pb_list)) {
1703 if (unlock) {
1704 atomic_dec(&pb->pb_hold);
1705 }
1706 list_del(&pb->pb_list);
1707 }
1708
1709 list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
1710 pb->pb_queuetime = jiffies;
1711 spin_unlock(&pbd_delwrite_lock);
1712
1713 if (unlock)
1714 pagebuf_unlock(pb);
1715}
1716
1717void
1718pagebuf_delwri_dequeue(
1719 xfs_buf_t *pb)
1720{
1721 int dequeued = 0;
1722
1723 spin_lock(&pbd_delwrite_lock);
1724 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
1725 list_del_init(&pb->pb_list);
1726 dequeued = 1;
1727 }
1728 pb->pb_flags &= ~PBF_DELWRI;
1729 spin_unlock(&pbd_delwrite_lock);
1730
1731 if (dequeued)
1732 pagebuf_rele(pb);
1733
1734 PB_TRACE(pb, "delwri_dq", (long)dequeued);
1735}
1736
1737STATIC void
1738pagebuf_runall_queues(
1739 struct workqueue_struct *queue)
1740{
1741 flush_workqueue(queue);
1742}
1743
1744/* Defines for pagebuf daemon */
1745STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
1746STATIC struct task_struct *pagebuf_daemon_task;
1747STATIC int pagebuf_daemon_active;
1748STATIC int force_flush;
1749
1750
1751STATIC int
1752pagebuf_daemon_wakeup(
1753 int priority,
1754 unsigned int mask)
1755{
1756 force_flush = 1;
1757 barrier();
1758 wake_up_process(pagebuf_daemon_task);
1759 return 0;
1760}
1761
1762STATIC int
1763pagebuf_daemon(
1764 void *data)
1765{
1766 struct list_head tmp;
1767 unsigned long age;
1768 xfs_buftarg_t *target;
1769 xfs_buf_t *pb, *n;
1770
1771 /* Set up the thread */
1772 daemonize("xfsbufd");
1773 current->flags |= PF_MEMALLOC;
1774
1775 pagebuf_daemon_task = current;
1776 pagebuf_daemon_active = 1;
1777 barrier();
1778
1779 INIT_LIST_HEAD(&tmp);
1780 do {
1781 try_to_freeze(PF_FREEZE);
1782
1783 set_current_state(TASK_INTERRUPTIBLE);
1784 schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
1785
1786 age = (xfs_buf_age_centisecs * HZ) / 100;
1787 spin_lock(&pbd_delwrite_lock);
1788 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
1789 PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
1790 ASSERT(pb->pb_flags & PBF_DELWRI);
1791
1792 if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
1793 if (!force_flush &&
1794 time_before(jiffies,
1795 pb->pb_queuetime + age)) {
1796 pagebuf_unlock(pb);
1797 break;
1798 }
1799
1800 pb->pb_flags &= ~PBF_DELWRI;
1801 pb->pb_flags |= PBF_WRITE;
1802 list_move(&pb->pb_list, &tmp);
1803 }
1804 }
1805 spin_unlock(&pbd_delwrite_lock);
1806
1807 while (!list_empty(&tmp)) {
1808 pb = list_entry(tmp.next, xfs_buf_t, pb_list);
1809 target = pb->pb_target;
1810
1811 list_del_init(&pb->pb_list);
1812 pagebuf_iostrategy(pb);
1813
1814 blk_run_address_space(target->pbr_mapping);
1815 }
1816
1817 if (as_list_len > 0)
1818 purge_addresses();
1819
1820 force_flush = 0;
1821 } while (pagebuf_daemon_active);
1822
1823 complete_and_exit(&pagebuf_daemon_done, 0);
1824}
1825
1826/*
1827 * Go through all incore buffers, and release buffers if they belong to
1828 * the given device. This is used in filesystem error handling to
1829 * preserve the consistency of its metadata.
1830 */
1831int
1832xfs_flush_buftarg(
1833 xfs_buftarg_t *target,
1834 int wait)
1835{
1836 struct list_head tmp;
1837 xfs_buf_t *pb, *n;
1838 int pincount = 0;
1839
1840 pagebuf_runall_queues(pagebuf_dataio_workqueue);
1841 pagebuf_runall_queues(pagebuf_logio_workqueue);
1842
1843 INIT_LIST_HEAD(&tmp);
1844 spin_lock(&pbd_delwrite_lock);
1845 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
1846
1847 if (pb->pb_target != target)
1848 continue;
1849
1850 ASSERT(pb->pb_flags & PBF_DELWRI);
1851 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
1852 if (pagebuf_ispin(pb)) {
1853 pincount++;
1854 continue;
1855 }
1856
1857 pb->pb_flags &= ~PBF_DELWRI;
1858 pb->pb_flags |= PBF_WRITE;
1859 list_move(&pb->pb_list, &tmp);
1860 }
1861 spin_unlock(&pbd_delwrite_lock);
1862
1863 /*
1864 * Dropped the delayed write list lock, now walk the temporary list
1865 */
1866 list_for_each_entry_safe(pb, n, &tmp, pb_list) {
1867 if (wait)
1868 pb->pb_flags &= ~PBF_ASYNC;
1869 else
1870 list_del_init(&pb->pb_list);
1871
1872 pagebuf_lock(pb);
1873 pagebuf_iostrategy(pb);
1874 }
1875
1876 /*
1877 * Remaining list items must be flushed before returning
1878 */
1879 while (!list_empty(&tmp)) {
1880 pb = list_entry(tmp.next, xfs_buf_t, pb_list);
1881
1882 list_del_init(&pb->pb_list);
1883 xfs_iowait(pb);
1884 xfs_buf_relse(pb);
1885 }
1886
1887 if (wait)
1888 blk_run_address_space(target->pbr_mapping);
1889
1890 return pincount;
1891}
1892
1893STATIC int
1894pagebuf_daemon_start(void)
1895{
1896 int rval;
1897
1898 pagebuf_logio_workqueue = create_workqueue("xfslogd");
1899 if (!pagebuf_logio_workqueue)
1900 return -ENOMEM;
1901
1902 pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
1903 if (!pagebuf_dataio_workqueue) {
1904 destroy_workqueue(pagebuf_logio_workqueue);
1905 return -ENOMEM;
1906 }
1907
1908 rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
1909 if (rval < 0) {
1910 destroy_workqueue(pagebuf_logio_workqueue);
1911 destroy_workqueue(pagebuf_dataio_workqueue);
1912 }
1913
1914 return rval;
1915}
1916
1917/*
1918 * pagebuf_daemon_stop
1919 *
1920 * Note: do not mark as __exit, it is called from pagebuf_terminate.
1921 */
1922STATIC void
1923pagebuf_daemon_stop(void)
1924{
1925 pagebuf_daemon_active = 0;
1926 barrier();
1927 wait_for_completion(&pagebuf_daemon_done);
1928
1929 destroy_workqueue(pagebuf_logio_workqueue);
1930 destroy_workqueue(pagebuf_dataio_workqueue);
1931}
1932
1933/*
1934 * Initialization and Termination
1935 */
1936
1937int __init
1938pagebuf_init(void)
1939{
1940 pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
1941 SLAB_HWCACHE_ALIGN, NULL, NULL);
1942 if (pagebuf_cache == NULL) {
1943 printk("XFS: couldn't init xfs_buf_t cache\n");
1944 pagebuf_terminate();
1945 return -ENOMEM;
1946 }
1947
1948#ifdef PAGEBUF_TRACE
1949 pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
1950#endif
1951
1952 pagebuf_daemon_start();
1953
1954 pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup);
1955 if (pagebuf_shake == NULL) {
1956 pagebuf_terminate();
1957 return -ENOMEM;
1958 }
1959
1960 return 0;
1961}
1962
1963
1964/*
1965 * pagebuf_terminate.
1966 *
1967 * Note: do not mark as __exit, this is also called from the __init code.
1968 */
1969void
1970pagebuf_terminate(void)
1971{
1972 pagebuf_daemon_stop();
1973
1974#ifdef PAGEBUF_TRACE
1975 ktrace_free(pagebuf_trace_buf);
1976#endif
1977
1978 kmem_zone_destroy(pagebuf_cache);
1979 kmem_shake_deregister(pagebuf_shake);
1980}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
new file mode 100644
index 000000000000..74deed8e6d90
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -0,0 +1,591 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33/*
34 * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
35 */
36
37#ifndef __XFS_BUF_H__
38#define __XFS_BUF_H__
39
40#include <linux/config.h>
41#include <linux/list.h>
42#include <linux/types.h>
43#include <linux/spinlock.h>
44#include <asm/system.h>
45#include <linux/mm.h>
46#include <linux/fs.h>
47#include <linux/buffer_head.h>
48#include <linux/uio.h>
49
50/*
51 * Base types
52 */
53
54#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
55
56#define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
57#define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
58#define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
59#define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
60
61typedef enum page_buf_rw_e {
62 PBRW_READ = 1, /* transfer into target memory */
63 PBRW_WRITE = 2, /* transfer from target memory */
64 PBRW_ZERO = 3 /* Zero target memory */
65} page_buf_rw_t;
66
67
68typedef enum page_buf_flags_e { /* pb_flags values */
69 PBF_READ = (1 << 0), /* buffer intended for reading from device */
70 PBF_WRITE = (1 << 1), /* buffer intended for writing to device */
71 PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */
72 PBF_PARTIAL = (1 << 3), /* buffer partially read */
73 PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
74 PBF_NONE = (1 << 5), /* buffer not read at all */
75 PBF_DELWRI = (1 << 6), /* buffer has dirty pages */
76 PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
77 PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
78 PBF_FS_DATAIOD = (1 << 9), /* schedule IO completion on fs datad */
79 PBF_FORCEIO = (1 << 10), /* ignore any cache state */
80 PBF_FLUSH = (1 << 11), /* flush disk write cache */
81 PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
82
83 /* flags used only as arguments to access routines */
84 PBF_LOCK = (1 << 14), /* lock requested */
85 PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
86 PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
87
88 /* flags used only internally */
89 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
90 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
91 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
92} page_buf_flags_t;
93
94#define PBF_UPDATE (PBF_READ | PBF_WRITE)
95#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
96#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
97
98typedef struct xfs_bufhash {
99 struct list_head bh_list;
100 spinlock_t bh_lock;
101} xfs_bufhash_t;
102
103typedef struct xfs_buftarg {
104 dev_t pbr_dev;
105 struct block_device *pbr_bdev;
106 struct address_space *pbr_mapping;
107 unsigned int pbr_bsize;
108 unsigned int pbr_sshift;
109 size_t pbr_smask;
110
111 /* per-device buffer hash table */
112 uint bt_hashmask;
113 uint bt_hashshift;
114 xfs_bufhash_t *bt_hash;
115} xfs_buftarg_t;
116
117/*
118 * xfs_buf_t: Buffer structure for page cache-based buffers
119 *
120 * This buffer structure is used by the page cache buffer management routines
121 * to refer to an assembly of pages forming a logical buffer. The actual I/O
122 * is performed with buffer_head structures, as required by drivers.
123 *
124 * The buffer structure is used on temporary basis only, and discarded when
125 * released. The real data storage is recorded in the page cache. Metadata is
126 * hashed to the block device on which the file system resides.
127 */
128
129struct xfs_buf;
130
131/* call-back function on I/O completion */
132typedef void (*page_buf_iodone_t)(struct xfs_buf *);
133/* call-back function on I/O completion */
134typedef void (*page_buf_relse_t)(struct xfs_buf *);
135/* pre-write function */
136typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
137
138#define PB_PAGES 2
139
140typedef struct xfs_buf {
141 struct semaphore pb_sema; /* semaphore for lockables */
142 unsigned long pb_queuetime; /* time buffer was queued */
143 atomic_t pb_pin_count; /* pin count */
144 wait_queue_head_t pb_waiters; /* unpin waiters */
145 struct list_head pb_list;
146 page_buf_flags_t pb_flags; /* status flags */
147 struct list_head pb_hash_list; /* hash table list */
148 xfs_bufhash_t *pb_hash; /* hash table list start */
149 xfs_buftarg_t *pb_target; /* buffer target (device) */
150 atomic_t pb_hold; /* reference count */
151 xfs_daddr_t pb_bn; /* block number for I/O */
152 loff_t pb_file_offset; /* offset in file */
153 size_t pb_buffer_length; /* size of buffer in bytes */
154 size_t pb_count_desired; /* desired transfer size */
155 void *pb_addr; /* virtual address of buffer */
156 struct work_struct pb_iodone_work;
157 atomic_t pb_io_remaining;/* #outstanding I/O requests */
158 page_buf_iodone_t pb_iodone; /* I/O completion function */
159 page_buf_relse_t pb_relse; /* releasing function */
160 page_buf_bdstrat_t pb_strat; /* pre-write function */
161 struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */
162 void *pb_fspriv;
163 void *pb_fspriv2;
164 void *pb_fspriv3;
165 unsigned short pb_error; /* error code on I/O */
166 unsigned short pb_locked; /* page array is locked */
167 unsigned int pb_page_count; /* size of page array */
168 unsigned int pb_offset; /* page offset in first page */
169 struct page **pb_pages; /* array of page pointers */
170 struct page *pb_page_array[PB_PAGES]; /* inline pages */
171#ifdef PAGEBUF_LOCK_TRACKING
172 int pb_last_holder;
173#endif
174} xfs_buf_t;
175
176
177/* Finding and Reading Buffers */
178
179extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */
180 /* the block is in memory */
181 xfs_buftarg_t *, /* inode for block */
182 loff_t, /* starting offset of range */
183 size_t, /* length of range */
184 page_buf_flags_t, /* PBF_LOCK */
185 xfs_buf_t *); /* newly allocated buffer */
186
187#define xfs_incore(buftarg,blkno,len,lockit) \
188 _pagebuf_find(buftarg, blkno ,len, lockit, NULL)
189
190extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */
191 xfs_buftarg_t *, /* inode for buffer */
192 loff_t, /* starting offset of range */
193 size_t, /* length of range */
194 page_buf_flags_t); /* PBF_LOCK, PBF_READ, */
195 /* PBF_ASYNC */
196
197#define xfs_buf_get(target, blkno, len, flags) \
198 xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
199
200extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */
201 xfs_buftarg_t *, /* inode for buffer */
202 loff_t, /* starting offset of range */
203 size_t, /* length of range */
204 page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */
205
206#define xfs_buf_read(target, blkno, len, flags) \
207 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
208
209extern xfs_buf_t *pagebuf_lookup(
210 xfs_buftarg_t *,
211 loff_t, /* starting offset of range */
212 size_t, /* length of range */
213 page_buf_flags_t); /* PBF_READ, PBF_WRITE, */
214 /* PBF_FORCEIO, */
215
216extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
217 /* no memory or disk address */
218 size_t len,
219 xfs_buftarg_t *); /* mount point "fake" inode */
220
221extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */
222 /* without disk address */
223 size_t len,
224 xfs_buftarg_t *); /* mount point "fake" inode */
225
226extern int pagebuf_associate_memory(
227 xfs_buf_t *,
228 void *,
229 size_t);
230
231extern void pagebuf_hold( /* increment reference count */
232 xfs_buf_t *); /* buffer to hold */
233
234extern void pagebuf_readahead( /* read ahead into cache */
235 xfs_buftarg_t *, /* target for buffer (or NULL) */
236 loff_t, /* starting offset of range */
237 size_t, /* length of range */
238 page_buf_flags_t); /* additional read flags */
239
240/* Releasing Buffers */
241
242extern void pagebuf_free( /* deallocate a buffer */
243 xfs_buf_t *); /* buffer to deallocate */
244
245extern void pagebuf_rele( /* release hold on a buffer */
246 xfs_buf_t *); /* buffer to release */
247
248/* Locking and Unlocking Buffers */
249
250extern int pagebuf_cond_lock( /* lock buffer, if not locked */
251 /* (returns -EBUSY if locked) */
252 xfs_buf_t *); /* buffer to lock */
253
254extern int pagebuf_lock_value( /* return count on lock */
255 xfs_buf_t *); /* buffer to check */
256
257extern int pagebuf_lock( /* lock buffer */
258 xfs_buf_t *); /* buffer to lock */
259
260extern void pagebuf_unlock( /* unlock buffer */
261 xfs_buf_t *); /* buffer to unlock */
262
263/* Buffer Read and Write Routines */
264
265extern void pagebuf_iodone( /* mark buffer I/O complete */
266 xfs_buf_t *, /* buffer to mark */
267 int, /* use data/log helper thread. */
268 int); /* run completion locally, or in
269 * a helper thread. */
270
271extern void pagebuf_ioerror( /* mark buffer in error (or not) */
272 xfs_buf_t *, /* buffer to mark */
273 int); /* error to store (0 if none) */
274
275extern int pagebuf_iostart( /* start I/O on a buffer */
276 xfs_buf_t *, /* buffer to start */
277 page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */
278 /* PBF_READ, PBF_WRITE, */
279 /* PBF_DELWRI */
280
281extern int pagebuf_iorequest( /* start real I/O */
282 xfs_buf_t *); /* buffer to convey to device */
283
284extern int pagebuf_iowait( /* wait for buffer I/O done */
285 xfs_buf_t *); /* buffer to wait on */
286
287extern void pagebuf_iomove( /* move data in/out of pagebuf */
288 xfs_buf_t *, /* buffer to manipulate */
289 size_t, /* starting buffer offset */
290 size_t, /* length in buffer */
291 caddr_t, /* data pointer */
292 page_buf_rw_t); /* direction */
293
294static inline int pagebuf_iostrategy(xfs_buf_t *pb)
295{
296 return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
297}
298
299static inline int pagebuf_geterror(xfs_buf_t *pb)
300{
301 return pb ? pb->pb_error : ENOMEM;
302}
303
304/* Buffer Utility Routines */
305
306extern caddr_t pagebuf_offset( /* pointer at offset in buffer */
307 xfs_buf_t *, /* buffer to offset into */
308 size_t); /* offset */
309
310/* Pinning Buffer Storage in Memory */
311
312extern void pagebuf_pin( /* pin buffer in memory */
313 xfs_buf_t *); /* buffer to pin */
314
315extern void pagebuf_unpin( /* unpin buffered data */
316 xfs_buf_t *); /* buffer to unpin */
317
318extern int pagebuf_ispin( /* check if buffer is pinned */
319 xfs_buf_t *); /* buffer to check */
320
321/* Delayed Write Buffer Routines */
322
323extern void pagebuf_delwri_dequeue(xfs_buf_t *);
324
325/* Buffer Daemon Setup Routines */
326
327extern int pagebuf_init(void);
328extern void pagebuf_terminate(void);
329
330
331#ifdef PAGEBUF_TRACE
332extern ktrace_t *pagebuf_trace_buf;
333extern void pagebuf_trace(
334 xfs_buf_t *, /* buffer being traced */
335 char *, /* description of operation */
336 void *, /* arbitrary diagnostic value */
337 void *); /* return address */
338#else
339# define pagebuf_trace(pb, id, ptr, ra) do { } while (0)
340#endif
341
342#define pagebuf_target_name(target) \
343 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
344
345
346
347
348
349/* These are just for xfs_syncsub... it sets an internal variable
350 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
351 */
352#define XFS_B_ASYNC PBF_ASYNC
353#define XFS_B_DELWRI PBF_DELWRI
354#define XFS_B_READ PBF_READ
355#define XFS_B_WRITE PBF_WRITE
356#define XFS_B_STALE PBF_STALE
357
358#define XFS_BUF_TRYLOCK PBF_TRYLOCK
359#define XFS_INCORE_TRYLOCK PBF_TRYLOCK
360#define XFS_BUF_LOCK PBF_LOCK
361#define XFS_BUF_MAPPED PBF_MAPPED
362
363#define BUF_BUSY PBF_DONT_BLOCK
364
365#define XFS_BUF_BFLAGS(x) ((x)->pb_flags)
366#define XFS_BUF_ZEROFLAGS(x) \
367 ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
368
369#define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE)
370#define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE)
371#define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE)
372#define XFS_BUF_SUPER_STALE(x) do { \
373 XFS_BUF_STALE(x); \
374 pagebuf_delwri_dequeue(x); \
375 XFS_BUF_DONE(x); \
376 } while (0)
377
378#define XFS_BUF_MANAGE PBF_FS_MANAGED
379#define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED)
380
381#define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI)
382#define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x)
383#define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI)
384
385#define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no)
386#define XFS_BUF_GETERROR(x) pagebuf_geterror(x)
387#define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0)
388
389#define XFS_BUF_DONE(x) ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE))
390#define XFS_BUF_UNDONE(x) ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE)
391#define XFS_BUF_ISDONE(x) (!(PBF_NOT_DONE(x)))
392
393#define XFS_BUF_BUSY(x) ((x)->pb_flags |= PBF_FORCEIO)
394#define XFS_BUF_UNBUSY(x) ((x)->pb_flags &= ~PBF_FORCEIO)
395#define XFS_BUF_ISBUSY(x) (1)
396
397#define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC)
398#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)
399#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)
400
401#define XFS_BUF_FLUSH(x) ((x)->pb_flags |= PBF_FLUSH)
402#define XFS_BUF_UNFLUSH(x) ((x)->pb_flags &= ~PBF_FLUSH)
403#define XFS_BUF_ISFLUSH(x) ((x)->pb_flags & PBF_FLUSH)
404
405#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")
406#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")
407#define XFS_BUF_ISSHUT(x) (0)
408
409#define XFS_BUF_HOLD(x) pagebuf_hold(x)
410#define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ)
411#define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ)
412#define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ)
413
414#define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE)
415#define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE)
416#define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE)
417
418#define XFS_BUF_ISUNINITIAL(x) (0)
419#define XFS_BUF_UNUNINITIAL(x) (0)
420
421#define XFS_BUF_BP_ISMAPPED(bp) 1
422
423#define XFS_BUF_DATAIO(x) ((x)->pb_flags |= PBF_FS_DATAIOD)
424#define XFS_BUF_UNDATAIO(x) ((x)->pb_flags &= ~PBF_FS_DATAIOD)
425
426#define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone
427#define XFS_BUF_SET_IODONE_FUNC(buf, func) \
428 (buf)->pb_iodone = (func)
429#define XFS_BUF_CLR_IODONE_FUNC(buf) \
430 (buf)->pb_iodone = NULL
431#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \
432 (buf)->pb_strat = (func)
433#define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \
434 (buf)->pb_strat = NULL
435
436#define XFS_BUF_FSPRIVATE(buf, type) \
437 ((type)(buf)->pb_fspriv)
438#define XFS_BUF_SET_FSPRIVATE(buf, value) \
439 (buf)->pb_fspriv = (void *)(value)
440#define XFS_BUF_FSPRIVATE2(buf, type) \
441 ((type)(buf)->pb_fspriv2)
442#define XFS_BUF_SET_FSPRIVATE2(buf, value) \
443 (buf)->pb_fspriv2 = (void *)(value)
444#define XFS_BUF_FSPRIVATE3(buf, type) \
445 ((type)(buf)->pb_fspriv3)
446#define XFS_BUF_SET_FSPRIVATE3(buf, value) \
447 (buf)->pb_fspriv3 = (void *)(value)
448#define XFS_BUF_SET_START(buf)
449
450#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
451 (buf)->pb_relse = (value)
452
453#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)
454
455extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
456{
457 if (bp->pb_flags & PBF_MAPPED)
458 return XFS_BUF_PTR(bp) + offset;
459 return (xfs_caddr_t) pagebuf_offset(bp, offset);
460}
461
462#define XFS_BUF_SET_PTR(bp, val, count) \
463 pagebuf_associate_memory(bp, val, count)
464#define XFS_BUF_ADDR(bp) ((bp)->pb_bn)
465#define XFS_BUF_SET_ADDR(bp, blk) \
466 ((bp)->pb_bn = (xfs_daddr_t)(blk))
467#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset)
468#define XFS_BUF_SET_OFFSET(bp, off) \
469 ((bp)->pb_file_offset = (off))
470#define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired)
471#define XFS_BUF_SET_COUNT(bp, cnt) \
472 ((bp)->pb_count_desired = (cnt))
473#define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length)
474#define XFS_BUF_SET_SIZE(bp, cnt) \
475 ((bp)->pb_buffer_length = (cnt))
476#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
477#define XFS_BUF_SET_VTYPE(bp, type)
478#define XFS_BUF_SET_REF(bp, ref)
479
480#define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp)
481
482#define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp)
483#define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0)
484#define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp)
485#define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp)
486#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
487
488/* setup the buffer target from a buftarg structure */
489#define XFS_BUF_SET_TARGET(bp, target) \
490 (bp)->pb_target = (target)
491#define XFS_BUF_TARGET(bp) ((bp)->pb_target)
492#define XFS_BUFTARG_NAME(target) \
493 pagebuf_target_name(target)
494
495#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
496#define XFS_BUF_SET_VTYPE(bp, type)
497#define XFS_BUF_SET_REF(bp, ref)
498
499static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
500{
501 bp->pb_fspriv3 = mp;
502 bp->pb_strat = xfs_bdstrat_cb;
503 pagebuf_delwri_dequeue(bp);
504 return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
505}
506
507static inline void xfs_buf_relse(xfs_buf_t *bp)
508{
509 if (!bp->pb_relse)
510 pagebuf_unlock(bp);
511 pagebuf_rele(bp);
512}
513
514#define xfs_bpin(bp) pagebuf_pin(bp)
515#define xfs_bunpin(bp) pagebuf_unpin(bp)
516
517#define xfs_buftrace(id, bp) \
518 pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
519
520#define xfs_biodone(pb) \
521 pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
522
523#define xfs_biomove(pb, off, len, data, rw) \
524 pagebuf_iomove((pb), (off), (len), (data), \
525 ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
526
527#define xfs_biozero(pb, off, len) \
528 pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
529
530
531static inline int XFS_bwrite(xfs_buf_t *pb)
532{
533 int iowait = (pb->pb_flags & PBF_ASYNC) == 0;
534 int error = 0;
535
536 if (!iowait)
537 pb->pb_flags |= _PBF_RUN_QUEUES;
538
539 pagebuf_delwri_dequeue(pb);
540 pagebuf_iostrategy(pb);
541 if (iowait) {
542 error = pagebuf_iowait(pb);
543 xfs_buf_relse(pb);
544 }
545 return error;
546}
547
548#define XFS_bdwrite(pb) \
549 pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
550
551static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
552{
553 bp->pb_strat = xfs_bdstrat_cb;
554 bp->pb_fspriv3 = mp;
555
556 return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
557}
558
559#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
560
561#define xfs_iowait(pb) pagebuf_iowait(pb)
562
563#define xfs_baread(target, rablkno, ralen) \
564 pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
565
566#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target))
567#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
568#define xfs_buf_free(bp) pagebuf_free(bp)
569
570
571/*
572 * Handling of buftargs.
573 */
574
575extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
576extern void xfs_free_buftarg(xfs_buftarg_t *, int);
577extern void xfs_wait_buftarg(xfs_buftarg_t *);
578extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
579extern void xfs_incore_relse(xfs_buftarg_t *, int, int);
580extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
581
582#define xfs_getsize_buftarg(buftarg) \
583 block_size((buftarg)->pbr_bdev)
584#define xfs_readonly_buftarg(buftarg) \
585 bdev_read_only((buftarg)->pbr_bdev)
586#define xfs_binval(buftarg) \
587 xfs_flush_buftarg(buftarg, 1)
588#define XFS_bflush(buftarg) \
589 xfs_flush_buftarg(buftarg, 1)
590
591#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
new file mode 100644
index 000000000000..00c45849d41a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -0,0 +1,50 @@
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_CRED_H__
33#define __XFS_CRED_H__
34
35/*
36 * Credentials
37 */
38typedef struct cred {
39 /* EMPTY */
40} cred_t;
41
42extern struct cred *sys_cred;
43
44/* this is a hack.. (assums sys_cred is the only cred_t in the system) */
45static __inline int capable_cred(cred_t *cr, int cid)
46{
47 return (cr == sys_cred) ? 1 : capable(cid);
48}
49
50#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
new file mode 100644
index 000000000000..f372a1a5e168
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -0,0 +1,205 @@
1/*
2 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34#include "xfs_types.h"
35#include "xfs_dmapi.h"
36#include "xfs_log.h"
37#include "xfs_trans.h"
38#include "xfs_sb.h"
39#include "xfs_dir.h"
40#include "xfs_mount.h"
41#include "xfs_export.h"
42
43/*
44 * XFS encode and decodes the fileid portion of NFS filehandles
45 * itself instead of letting the generic NFS code do it. This
46 * allows filesystems with 64 bit inode numbers to be exported.
47 *
48 * Note that a side effect is that xfs_vget() won't be passed a
49 * zero inode/generation pair under normal circumstances. As
50 * however a malicious client could send us such data, the check
51 * remains in that code.
52 */
53
54
55STATIC struct dentry *
56linvfs_decode_fh(
57 struct super_block *sb,
58 __u32 *fh,
59 int fh_len,
60 int fileid_type,
61 int (*acceptable)(
62 void *context,
63 struct dentry *de),
64 void *context)
65{
66 xfs_fid2_t ifid;
67 xfs_fid2_t pfid;
68 void *parent = NULL;
69 int is64 = 0;
70 __u32 *p = fh;
71
72#if XFS_BIG_INUMS
73 is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG);
74 fileid_type &= ~XFS_FILEID_TYPE_64FLAG;
75#endif
76
77 /*
78 * Note that we only accept fileids which are long enough
79 * rather than allow the parent generation number to default
80 * to zero. XFS considers zero a valid generation number not
81 * an invalid/wildcard value. There's little point printk'ing
82 * a warning here as we don't have the client information
83 * which would make such a warning useful.
84 */
85 if (fileid_type > 2 ||
86 fh_len < xfs_fileid_length((fileid_type == 2), is64))
87 return NULL;
88
89 p = xfs_fileid_decode_fid2(p, &ifid, is64);
90
91 if (fileid_type == 2) {
92 p = xfs_fileid_decode_fid2(p, &pfid, is64);
93 parent = &pfid;
94 }
95
96 fh = (__u32 *)&ifid;
97 return find_exported_dentry(sb, fh, parent, acceptable, context);
98}
99
100
101STATIC int
102linvfs_encode_fh(
103 struct dentry *dentry,
104 __u32 *fh,
105 int *max_len,
106 int connectable)
107{
108 struct inode *inode = dentry->d_inode;
109 int type = 1;
110 __u32 *p = fh;
111 int len;
112 int is64 = 0;
113#if XFS_BIG_INUMS
114 vfs_t *vfs = LINVFS_GET_VFS(inode->i_sb);
115 xfs_mount_t *mp = XFS_VFSTOM(vfs);
116
117 if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) {
118 /* filesystem may contain 64bit inode numbers */
119 is64 = XFS_FILEID_TYPE_64FLAG;
120 }
121#endif
122
123 /* Directories don't need their parent encoded, they have ".." */
124 if (S_ISDIR(inode->i_mode))
125 connectable = 0;
126
127 /*
128 * Only encode if there is enough space given. In practice
129 * this means we can't export a filesystem with 64bit inodes
130 * over NFSv2 with the subtree_check export option; the other
131 * seven combinations work. The real answer is "don't use v2".
132 */
133 len = xfs_fileid_length(connectable, is64);
134 if (*max_len < len)
135 return 255;
136 *max_len = len;
137
138 p = xfs_fileid_encode_inode(p, inode, is64);
139 if (connectable) {
140 spin_lock(&dentry->d_lock);
141 p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64);
142 spin_unlock(&dentry->d_lock);
143 type = 2;
144 }
145 BUG_ON((p - fh) != len);
146 return type | is64;
147}
148
149STATIC struct dentry *
150linvfs_get_dentry(
151 struct super_block *sb,
152 void *data)
153{
154 vnode_t *vp;
155 struct inode *inode;
156 struct dentry *result;
157 vfs_t *vfsp = LINVFS_GET_VFS(sb);
158 int error;
159
160 VFS_VGET(vfsp, &vp, (fid_t *)data, error);
161 if (error || vp == NULL)
162 return ERR_PTR(-ESTALE) ;
163
164 inode = LINVFS_GET_IP(vp);
165 result = d_alloc_anon(inode);
166 if (!result) {
167 iput(inode);
168 return ERR_PTR(-ENOMEM);
169 }
170 return result;
171}
172
173STATIC struct dentry *
174linvfs_get_parent(
175 struct dentry *child)
176{
177 int error;
178 vnode_t *vp, *cvp;
179 struct dentry *parent;
180 struct dentry dotdot;
181
182 dotdot.d_name.name = "..";
183 dotdot.d_name.len = 2;
184 dotdot.d_inode = NULL;
185
186 cvp = NULL;
187 vp = LINVFS_GET_VP(child->d_inode);
188 VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
189 if (unlikely(error))
190 return ERR_PTR(-error);
191
192 parent = d_alloc_anon(LINVFS_GET_IP(cvp));
193 if (unlikely(!parent)) {
194 VN_RELE(cvp);
195 return ERR_PTR(-ENOMEM);
196 }
197 return parent;
198}
199
200struct export_operations linvfs_export_ops = {
201 .decode_fh = linvfs_decode_fh,
202 .encode_fh = linvfs_encode_fh,
203 .get_parent = linvfs_get_parent,
204 .get_dentry = linvfs_get_dentry,
205};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
new file mode 100644
index 000000000000..60b2abac1c18
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -0,0 +1,122 @@
1/*
2 * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_EXPORT_H__
33#define __XFS_EXPORT_H__
34
35/*
36 * Common defines for code related to exporting XFS filesystems over NFS.
37 *
38 * The NFS fileid goes out on the wire as an array of
39 * 32bit unsigned ints in host order. There are 5 possible
40 * formats.
41 *
42 * (1) fileid_type=0x00
43 * (no fileid data; handled by the generic code)
44 *
45 * (2) fileid_type=0x01
46 * inode-num
47 * generation
48 *
49 * (3) fileid_type=0x02
50 * inode-num
51 * generation
52 * parent-inode-num
53 * parent-generation
54 *
55 * (4) fileid_type=0x81
56 * inode-num-lo32
57 * inode-num-hi32
58 * generation
59 *
60 * (5) fileid_type=0x82
61 * inode-num-lo32
62 * inode-num-hi32
63 * generation
64 * parent-inode-num-lo32
65 * parent-inode-num-hi32
66 * parent-generation
67 *
68 * Note, the NFS filehandle also includes an fsid portion which
69 * may have an inode number in it. That number is hardcoded to
70 * 32bits and there is no way for XFS to intercept it. In
71 * practice this means when exporting an XFS filesytem with 64bit
72 * inodes you should either export the mountpoint (rather than
73 * a subdirectory) or use the "fsid" export option.
74 */
75
76/* This flag goes on the wire. Don't play with it. */
77#define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */
78
79/* Calculate the length in u32 units of the fileid data */
80static inline int
81xfs_fileid_length(int hasparent, int is64)
82{
83 return hasparent ? (is64 ? 6 : 4) : (is64 ? 3 : 2);
84}
85
86/*
87 * Decode encoded inode information (either for the inode itself
88 * or the parent) into an xfs_fid2_t structure. Advances and
89 * returns the new data pointer
90 */
91static inline __u32 *
92xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64)
93{
94 fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len);
95 fid->fid_pad = 0;
96 fid->fid_ino = *p++;
97#if XFS_BIG_INUMS
98 if (is64)
99 fid->fid_ino |= (((__u64)(*p++)) << 32);
100#endif
101 fid->fid_gen = *p++;
102 return p;
103}
104
105/*
106 * Encode inode information (either for the inode itself or the
107 * parent) into a fileid buffer. Advances and returns the new
108 * data pointer.
109 */
110static inline __u32 *
111xfs_fileid_encode_inode(__u32 *p, struct inode *inode, int is64)
112{
113 *p++ = (__u32)inode->i_ino;
114#if XFS_BIG_INUMS
115 if (is64)
116 *p++ = (__u32)(inode->i_ino >> 32);
117#endif
118 *p++ = inode->i_generation;
119 return p;
120}
121
122#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
new file mode 100644
index 000000000000..9f057a4a5b06
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -0,0 +1,573 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34#include "xfs_inum.h"
35#include "xfs_log.h"
36#include "xfs_sb.h"
37#include "xfs_dir.h"
38#include "xfs_dir2.h"
39#include "xfs_trans.h"
40#include "xfs_dmapi.h"
41#include "xfs_mount.h"
42#include "xfs_bmap_btree.h"
43#include "xfs_alloc_btree.h"
44#include "xfs_ialloc_btree.h"
45#include "xfs_alloc.h"
46#include "xfs_btree.h"
47#include "xfs_attr_sf.h"
48#include "xfs_dir_sf.h"
49#include "xfs_dir2_sf.h"
50#include "xfs_dinode.h"
51#include "xfs_inode.h"
52#include "xfs_error.h"
53#include "xfs_rw.h"
54#include "xfs_ioctl32.h"
55
56#include <linux/dcache.h>
57#include <linux/smp_lock.h>
58
59static struct vm_operations_struct linvfs_file_vm_ops;
60
61
62STATIC inline ssize_t
63__linvfs_read(
64 struct kiocb *iocb,
65 char __user *buf,
66 int ioflags,
67 size_t count,
68 loff_t pos)
69{
70 struct iovec iov = {buf, count};
71 struct file *file = iocb->ki_filp;
72 vnode_t *vp = LINVFS_GET_VP(file->f_dentry->d_inode);
73 ssize_t rval;
74
75 BUG_ON(iocb->ki_pos != pos);
76
77 if (unlikely(file->f_flags & O_DIRECT))
78 ioflags |= IO_ISDIRECT;
79 VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
80 return rval;
81}
82
83
84STATIC ssize_t
85linvfs_aio_read(
86 struct kiocb *iocb,
87 char __user *buf,
88 size_t count,
89 loff_t pos)
90{
91 return __linvfs_read(iocb, buf, IO_ISAIO, count, pos);
92}
93
94STATIC ssize_t
95linvfs_aio_read_invis(
96 struct kiocb *iocb,
97 char __user *buf,
98 size_t count,
99 loff_t pos)
100{
101 return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
102}
103
104
105STATIC inline ssize_t
106__linvfs_write(
107 struct kiocb *iocb,
108 const char __user *buf,
109 int ioflags,
110 size_t count,
111 loff_t pos)
112{
113 struct iovec iov = {(void __user *)buf, count};
114 struct file *file = iocb->ki_filp;
115 struct inode *inode = file->f_mapping->host;
116 vnode_t *vp = LINVFS_GET_VP(inode);
117 ssize_t rval;
118
119 BUG_ON(iocb->ki_pos != pos);
120 if (unlikely(file->f_flags & O_DIRECT))
121 ioflags |= IO_ISDIRECT;
122
123 VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
124 return rval;
125}
126
127
128STATIC ssize_t
129linvfs_aio_write(
130 struct kiocb *iocb,
131 const char __user *buf,
132 size_t count,
133 loff_t pos)
134{
135 return __linvfs_write(iocb, buf, IO_ISAIO, count, pos);
136}
137
138STATIC ssize_t
139linvfs_aio_write_invis(
140 struct kiocb *iocb,
141 const char __user *buf,
142 size_t count,
143 loff_t pos)
144{
145 return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
146}
147
148
149STATIC inline ssize_t
150__linvfs_readv(
151 struct file *file,
152 const struct iovec *iov,
153 int ioflags,
154 unsigned long nr_segs,
155 loff_t *ppos)
156{
157 struct inode *inode = file->f_mapping->host;
158 vnode_t *vp = LINVFS_GET_VP(inode);
159 struct kiocb kiocb;
160 ssize_t rval;
161
162 init_sync_kiocb(&kiocb, file);
163 kiocb.ki_pos = *ppos;
164
165 if (unlikely(file->f_flags & O_DIRECT))
166 ioflags |= IO_ISDIRECT;
167 VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
168
169 *ppos = kiocb.ki_pos;
170 return rval;
171}
172
173STATIC ssize_t
174linvfs_readv(
175 struct file *file,
176 const struct iovec *iov,
177 unsigned long nr_segs,
178 loff_t *ppos)
179{
180 return __linvfs_readv(file, iov, 0, nr_segs, ppos);
181}
182
183STATIC ssize_t
184linvfs_readv_invis(
185 struct file *file,
186 const struct iovec *iov,
187 unsigned long nr_segs,
188 loff_t *ppos)
189{
190 return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos);
191}
192
193
194STATIC inline ssize_t
195__linvfs_writev(
196 struct file *file,
197 const struct iovec *iov,
198 int ioflags,
199 unsigned long nr_segs,
200 loff_t *ppos)
201{
202 struct inode *inode = file->f_mapping->host;
203 vnode_t *vp = LINVFS_GET_VP(inode);
204 struct kiocb kiocb;
205 ssize_t rval;
206
207 init_sync_kiocb(&kiocb, file);
208 kiocb.ki_pos = *ppos;
209 if (unlikely(file->f_flags & O_DIRECT))
210 ioflags |= IO_ISDIRECT;
211
212 VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
213
214 *ppos = kiocb.ki_pos;
215 return rval;
216}
217
218
219STATIC ssize_t
220linvfs_writev(
221 struct file *file,
222 const struct iovec *iov,
223 unsigned long nr_segs,
224 loff_t *ppos)
225{
226 return __linvfs_writev(file, iov, 0, nr_segs, ppos);
227}
228
229STATIC ssize_t
230linvfs_writev_invis(
231 struct file *file,
232 const struct iovec *iov,
233 unsigned long nr_segs,
234 loff_t *ppos)
235{
236 return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos);
237}
238
239STATIC ssize_t
240linvfs_sendfile(
241 struct file *filp,
242 loff_t *ppos,
243 size_t count,
244 read_actor_t actor,
245 void *target)
246{
247 vnode_t *vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
248 ssize_t rval;
249
250 VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval);
251 return rval;
252}
253
254
255STATIC int
256linvfs_open(
257 struct inode *inode,
258 struct file *filp)
259{
260 vnode_t *vp = LINVFS_GET_VP(inode);
261 int error;
262
263 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
264 return -EFBIG;
265
266 ASSERT(vp);
267 VOP_OPEN(vp, NULL, error);
268 return -error;
269}
270
271
272STATIC int
273linvfs_release(
274 struct inode *inode,
275 struct file *filp)
276{
277 vnode_t *vp = LINVFS_GET_VP(inode);
278 int error = 0;
279
280 if (vp)
281 VOP_RELEASE(vp, error);
282 return -error;
283}
284
285
286STATIC int
287linvfs_fsync(
288 struct file *filp,
289 struct dentry *dentry,
290 int datasync)
291{
292 struct inode *inode = dentry->d_inode;
293 vnode_t *vp = LINVFS_GET_VP(inode);
294 int error;
295 int flags = FSYNC_WAIT;
296
297 if (datasync)
298 flags |= FSYNC_DATA;
299
300 ASSERT(vp);
301 VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
302 return -error;
303}
304
305/*
306 * linvfs_readdir maps to VOP_READDIR().
307 * We need to build a uio, cred, ...
308 */
309
310#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
311
312STATIC int
313linvfs_readdir(
314 struct file *filp,
315 void *dirent,
316 filldir_t filldir)
317{
318 int error = 0;
319 vnode_t *vp;
320 uio_t uio;
321 iovec_t iov;
322 int eof = 0;
323 caddr_t read_buf;
324 int namelen, size = 0;
325 size_t rlen = PAGE_CACHE_SIZE;
326 xfs_off_t start_offset, curr_offset;
327 xfs_dirent_t *dbp = NULL;
328
329 vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
330 ASSERT(vp);
331
332 /* Try fairly hard to get memory */
333 do {
334 if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
335 break;
336 rlen >>= 1;
337 } while (rlen >= 1024);
338
339 if (read_buf == NULL)
340 return -ENOMEM;
341
342 uio.uio_iov = &iov;
343 uio.uio_segflg = UIO_SYSSPACE;
344 curr_offset = filp->f_pos;
345 if (filp->f_pos != 0x7fffffff)
346 uio.uio_offset = filp->f_pos;
347 else
348 uio.uio_offset = 0xffffffff;
349
350 while (!eof) {
351 uio.uio_resid = iov.iov_len = rlen;
352 iov.iov_base = read_buf;
353 uio.uio_iovcnt = 1;
354
355 start_offset = uio.uio_offset;
356
357 VOP_READDIR(vp, &uio, NULL, &eof, error);
358 if ((uio.uio_offset == start_offset) || error) {
359 size = 0;
360 break;
361 }
362
363 size = rlen - uio.uio_resid;
364 dbp = (xfs_dirent_t *)read_buf;
365 while (size > 0) {
366 namelen = strlen(dbp->d_name);
367
368 if (filldir(dirent, dbp->d_name, namelen,
369 (loff_t) curr_offset & 0x7fffffff,
370 (ino_t) dbp->d_ino,
371 DT_UNKNOWN)) {
372 goto done;
373 }
374 size -= dbp->d_reclen;
375 curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */;
376 dbp = nextdp(dbp);
377 }
378 }
379done:
380 if (!error) {
381 if (size == 0)
382 filp->f_pos = uio.uio_offset & 0x7fffffff;
383 else if (dbp)
384 filp->f_pos = curr_offset;
385 }
386
387 kfree(read_buf);
388 return -error;
389}
390
391
392STATIC int
393linvfs_file_mmap(
394 struct file *filp,
395 struct vm_area_struct *vma)
396{
397 struct inode *ip = filp->f_dentry->d_inode;
398 vnode_t *vp = LINVFS_GET_VP(ip);
399 vattr_t va = { .va_mask = XFS_AT_UPDATIME };
400 int error;
401
402 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
403 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
404
405 error = -XFS_SEND_MMAP(mp, vma, 0);
406 if (error)
407 return error;
408 }
409
410 vma->vm_ops = &linvfs_file_vm_ops;
411
412 VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
413 if (!error)
414 vn_revalidate(vp); /* update Linux inode flags */
415 return 0;
416}
417
418
419STATIC long
420linvfs_ioctl(
421 struct file *filp,
422 unsigned int cmd,
423 unsigned long arg)
424{
425 int error;
426 struct inode *inode = filp->f_dentry->d_inode;
427 vnode_t *vp = LINVFS_GET_VP(inode);
428
429 VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
430 VMODIFY(vp);
431
432 /* NOTE: some of the ioctl's return positive #'s as a
433 * byte count indicating success, such as
434 * readlink_by_handle. So we don't "sign flip"
435 * like most other routines. This means true
436 * errors need to be returned as a negative value.
437 */
438 return error;
439}
440
441STATIC long
442linvfs_ioctl_invis(
443 struct file *filp,
444 unsigned int cmd,
445 unsigned long arg)
446{
447 int error;
448 struct inode *inode = filp->f_dentry->d_inode;
449 vnode_t *vp = LINVFS_GET_VP(inode);
450
451 ASSERT(vp);
452 VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
453 VMODIFY(vp);
454
455 /* NOTE: some of the ioctl's return positive #'s as a
456 * byte count indicating success, such as
457 * readlink_by_handle. So we don't "sign flip"
458 * like most other routines. This means true
459 * errors need to be returned as a negative value.
460 */
461 return error;
462}
463
464#ifdef HAVE_VMOP_MPROTECT
465STATIC int
466linvfs_mprotect(
467 struct vm_area_struct *vma,
468 unsigned int newflags)
469{
470 vnode_t *vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode);
471 int error = 0;
472
473 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
474 if ((vma->vm_flags & VM_MAYSHARE) &&
475 (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) {
476 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
477
478 error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
479 }
480 }
481 return error;
482}
483#endif /* HAVE_VMOP_MPROTECT */
484
485#ifdef HAVE_FOP_OPEN_EXEC
486/* If the user is attempting to execute a file that is offline then
487 * we have to trigger a DMAPI READ event before the file is marked as busy
488 * otherwise the invisible I/O will not be able to write to the file to bring
489 * it back online.
490 */
491STATIC int
492linvfs_open_exec(
493 struct inode *inode)
494{
495 vnode_t *vp = LINVFS_GET_VP(inode);
496 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
497 int error = 0;
498 bhv_desc_t *bdp;
499 xfs_inode_t *ip;
500
501 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
502 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
503 if (!bdp) {
504 error = -EINVAL;
505 goto open_exec_out;
506 }
507 ip = XFS_BHVTOI(bdp);
508 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
509 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
510 0, 0, 0, NULL);
511 }
512 }
513open_exec_out:
514 return error;
515}
516#endif /* HAVE_FOP_OPEN_EXEC */
517
518struct file_operations linvfs_file_operations = {
519 .llseek = generic_file_llseek,
520 .read = do_sync_read,
521 .write = do_sync_write,
522 .readv = linvfs_readv,
523 .writev = linvfs_writev,
524 .aio_read = linvfs_aio_read,
525 .aio_write = linvfs_aio_write,
526 .sendfile = linvfs_sendfile,
527 .unlocked_ioctl = linvfs_ioctl,
528#ifdef CONFIG_COMPAT
529 .compat_ioctl = xfs_compat_ioctl,
530#endif
531 .mmap = linvfs_file_mmap,
532 .open = linvfs_open,
533 .release = linvfs_release,
534 .fsync = linvfs_fsync,
535#ifdef HAVE_FOP_OPEN_EXEC
536 .open_exec = linvfs_open_exec,
537#endif
538};
539
540struct file_operations linvfs_invis_file_operations = {
541 .llseek = generic_file_llseek,
542 .read = do_sync_read,
543 .write = do_sync_write,
544 .readv = linvfs_readv_invis,
545 .writev = linvfs_writev_invis,
546 .aio_read = linvfs_aio_read_invis,
547 .aio_write = linvfs_aio_write_invis,
548 .sendfile = linvfs_sendfile,
549 .unlocked_ioctl = linvfs_ioctl_invis,
550#ifdef CONFIG_COMPAT
551 .compat_ioctl = xfs_compat_invis_ioctl,
552#endif
553 .mmap = linvfs_file_mmap,
554 .open = linvfs_open,
555 .release = linvfs_release,
556 .fsync = linvfs_fsync,
557};
558
559
560struct file_operations linvfs_dir_operations = {
561 .read = generic_read_dir,
562 .readdir = linvfs_readdir,
563 .unlocked_ioctl = linvfs_ioctl,
564 .fsync = linvfs_fsync,
565};
566
567static struct vm_operations_struct linvfs_file_vm_ops = {
568 .nopage = filemap_nopage,
569 .populate = filemap_populate,
570#ifdef HAVE_VMOP_MPROTECT
571 .mprotect = linvfs_mprotect,
572#endif
573};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
new file mode 100644
index 000000000000..05ebd30ec96f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -0,0 +1,124 @@
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34
35/*
36 * Stub for no-op vnode operations that return error status.
37 */
38int
39fs_noerr(void)
40{
41 return 0;
42}
43
44/*
45 * Operation unsupported under this file system.
46 */
47int
48fs_nosys(void)
49{
50 return ENOSYS;
51}
52
53/*
54 * Stub for inactive, strategy, and read/write lock/unlock. Does nothing.
55 */
56/* ARGSUSED */
57void
58fs_noval(void)
59{
60}
61
62/*
63 * vnode pcache layer for vnode_tosspages.
64 * 'last' parameter unused but left in for IRIX compatibility
65 */
66void
67fs_tosspages(
68 bhv_desc_t *bdp,
69 xfs_off_t first,
70 xfs_off_t last,
71 int fiopt)
72{
73 vnode_t *vp = BHV_TO_VNODE(bdp);
74 struct inode *ip = LINVFS_GET_IP(vp);
75
76 if (VN_CACHED(vp))
77 truncate_inode_pages(ip->i_mapping, first);
78}
79
80
81/*
82 * vnode pcache layer for vnode_flushinval_pages.
83 * 'last' parameter unused but left in for IRIX compatibility
84 */
85void
86fs_flushinval_pages(
87 bhv_desc_t *bdp,
88 xfs_off_t first,
89 xfs_off_t last,
90 int fiopt)
91{
92 vnode_t *vp = BHV_TO_VNODE(bdp);
93 struct inode *ip = LINVFS_GET_IP(vp);
94
95 if (VN_CACHED(vp)) {
96 filemap_fdatawrite(ip->i_mapping);
97 filemap_fdatawait(ip->i_mapping);
98
99 truncate_inode_pages(ip->i_mapping, first);
100 }
101}
102
103/*
104 * vnode pcache layer for vnode_flush_pages.
105 * 'last' parameter unused but left in for IRIX compatibility
106 */
107int
108fs_flush_pages(
109 bhv_desc_t *bdp,
110 xfs_off_t first,
111 xfs_off_t last,
112 uint64_t flags,
113 int fiopt)
114{
115 vnode_t *vp = BHV_TO_VNODE(bdp);
116 struct inode *ip = LINVFS_GET_IP(vp);
117
118 if (VN_CACHED(vp)) {
119 filemap_fdatawrite(ip->i_mapping);
120 filemap_fdatawait(ip->i_mapping);
121 }
122
123 return 0;
124}
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
new file mode 100644
index 000000000000..2db9ddbd4567
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.h
@@ -0,0 +1,48 @@
1/*
2 * Copyright (c) 2000, 2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUBR_H__
33#define __XFS_SUBR_H__
34
35/*
36 * Utilities shared among file system implementations.
37 */
38
39struct cred;
40
41extern int fs_noerr(void);
42extern int fs_nosys(void);
43extern void fs_noval(void);
44extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
45extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
46extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
47
48#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
new file mode 100644
index 000000000000..a6da5b4fd240
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -0,0 +1,74 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33/*
34 * This file contains globals needed by XFS that were normally defined
35 * somewhere else in IRIX.
36 */
37
38#include "xfs.h"
39#include "xfs_cred.h"
40#include "xfs_sysctl.h"
41
42/*
43 * System memory size - used to scale certain data structures in XFS.
44 */
45unsigned long xfs_physmem;
46
47/*
48 * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n,
49 * other XFS code uses these values. Times are measured in centisecs (i.e.
50 * 100ths of a second).
51 */
52xfs_param_t xfs_params = {
53 /* MIN DFLT MAX */
54 .restrict_chown = { 0, 1, 1 },
55 .sgid_inherit = { 0, 0, 1 },
56 .symlink_mode = { 0, 0, 1 },
57 .panic_mask = { 0, 0, 127 },
58 .error_level = { 0, 3, 11 },
59 .syncd_timer = { 1*100, 30*100, 7200*100},
60 .stats_clear = { 0, 0, 1 },
61 .inherit_sync = { 0, 1, 1 },
62 .inherit_nodump = { 0, 1, 1 },
63 .inherit_noatim = { 0, 1, 1 },
64 .xfs_buf_timer = { 100/2, 1*100, 30*100 },
65 .xfs_buf_age = { 1*100, 15*100, 7200*100},
66 .inherit_nosym = { 0, 0, 1 },
67 .rotorstep = { 1, 1, 255 },
68};
69
70/*
71 * Global system credential structure.
72 */
73cred_t sys_cred_val, *sys_cred = &sys_cred_val;
74
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
new file mode 100644
index 000000000000..e81e2f38a853
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -0,0 +1,44 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_GLOBALS_H__
33#define __XFS_GLOBALS_H__
34
35/*
36 * This file declares globals needed by XFS that were normally defined
37 * somewhere else in IRIX.
38 */
39
40extern uint64_t xfs_panic_mask; /* set to cause more panics */
41extern unsigned long xfs_physmem;
42extern struct cred *sys_cred;
43
44#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
new file mode 100644
index 000000000000..69809eef8a54
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -0,0 +1,1336 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34
35#include "xfs_fs.h"
36#include "xfs_inum.h"
37#include "xfs_log.h"
38#include "xfs_trans.h"
39#include "xfs_sb.h"
40#include "xfs_dir.h"
41#include "xfs_dir2.h"
42#include "xfs_alloc.h"
43#include "xfs_dmapi.h"
44#include "xfs_mount.h"
45#include "xfs_alloc_btree.h"
46#include "xfs_bmap_btree.h"
47#include "xfs_ialloc_btree.h"
48#include "xfs_btree.h"
49#include "xfs_ialloc.h"
50#include "xfs_attr_sf.h"
51#include "xfs_dir_sf.h"
52#include "xfs_dir2_sf.h"
53#include "xfs_dinode.h"
54#include "xfs_inode.h"
55#include "xfs_bmap.h"
56#include "xfs_bit.h"
57#include "xfs_rtalloc.h"
58#include "xfs_error.h"
59#include "xfs_itable.h"
60#include "xfs_rw.h"
61#include "xfs_acl.h"
62#include "xfs_cap.h"
63#include "xfs_mac.h"
64#include "xfs_attr.h"
65#include "xfs_buf_item.h"
66#include "xfs_utils.h"
67#include "xfs_dfrag.h"
68#include "xfs_fsops.h"
69
70#include <linux/dcache.h>
71#include <linux/mount.h>
72#include <linux/namei.h>
73#include <linux/pagemap.h>
74
75/*
76 * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
77 * a file or fs handle.
78 *
79 * XFS_IOC_PATH_TO_FSHANDLE
80 * returns fs handle for a mount point or path within that mount point
81 * XFS_IOC_FD_TO_HANDLE
82 * returns full handle for a FD opened in user space
83 * XFS_IOC_PATH_TO_HANDLE
84 * returns full handle for a path
85 */
86STATIC int
87xfs_find_handle(
88 unsigned int cmd,
89 void __user *arg)
90{
91 int hsize;
92 xfs_handle_t handle;
93 xfs_fsop_handlereq_t hreq;
94 struct inode *inode;
95 struct vnode *vp;
96
97 if (copy_from_user(&hreq, arg, sizeof(hreq)))
98 return -XFS_ERROR(EFAULT);
99
100 memset((char *)&handle, 0, sizeof(handle));
101
102 switch (cmd) {
103 case XFS_IOC_PATH_TO_FSHANDLE:
104 case XFS_IOC_PATH_TO_HANDLE: {
105 struct nameidata nd;
106 int error;
107
108 error = user_path_walk_link((const char __user *)hreq.path, &nd);
109 if (error)
110 return error;
111
112 ASSERT(nd.dentry);
113 ASSERT(nd.dentry->d_inode);
114 inode = igrab(nd.dentry->d_inode);
115 path_release(&nd);
116 break;
117 }
118
119 case XFS_IOC_FD_TO_HANDLE: {
120 struct file *file;
121
122 file = fget(hreq.fd);
123 if (!file)
124 return -EBADF;
125
126 ASSERT(file->f_dentry);
127 ASSERT(file->f_dentry->d_inode);
128 inode = igrab(file->f_dentry->d_inode);
129 fput(file);
130 break;
131 }
132
133 default:
134 ASSERT(0);
135 return -XFS_ERROR(EINVAL);
136 }
137
138 if (inode->i_sb->s_magic != XFS_SB_MAGIC) {
139 /* we're not in XFS anymore, Toto */
140 iput(inode);
141 return -XFS_ERROR(EINVAL);
142 }
143
144 /* we need the vnode */
145 vp = LINVFS_GET_VP(inode);
146 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
147 iput(inode);
148 return -XFS_ERROR(EBADF);
149 }
150
151 /* now we can grab the fsid */
152 memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
153 hsize = sizeof(xfs_fsid_t);
154
155 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
156 xfs_inode_t *ip;
157 bhv_desc_t *bhv;
158 int lock_mode;
159
160 /* need to get access to the xfs_inode to read the generation */
161 bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
162 ASSERT(bhv);
163 ip = XFS_BHVTOI(bhv);
164 ASSERT(ip);
165 lock_mode = xfs_ilock_map_shared(ip);
166
167 /* fill in fid section of handle from inode */
168 handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
169 sizeof(handle.ha_fid.xfs_fid_len);
170 handle.ha_fid.xfs_fid_pad = 0;
171 handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
172 handle.ha_fid.xfs_fid_ino = ip->i_ino;
173
174 xfs_iunlock_map_shared(ip, lock_mode);
175
176 hsize = XFS_HSIZE(handle);
177 }
178
179 /* now copy our handle into the user buffer & write out the size */
180 if (copy_to_user(hreq.ohandle, &handle, hsize) ||
181 copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
182 iput(inode);
183 return -XFS_ERROR(EFAULT);
184 }
185
186 iput(inode);
187 return 0;
188}
189
190
191/*
192 * Convert userspace handle data into vnode (and inode).
193 * We [ab]use the fact that all the fsop_handlereq ioctl calls
194 * have a data structure argument whose first component is always
195 * a xfs_fsop_handlereq_t, so we can cast to and from this type.
196 * This allows us to optimise the copy_from_user calls and gives
197 * a handy, shared routine.
198 *
199 * If no error, caller must always VN_RELE the returned vp.
200 */
201STATIC int
202xfs_vget_fsop_handlereq(
203 xfs_mount_t *mp,
204 struct inode *parinode, /* parent inode pointer */
205 xfs_fsop_handlereq_t *hreq,
206 vnode_t **vp,
207 struct inode **inode)
208{
209 void __user *hanp;
210 size_t hlen;
211 xfs_fid_t *xfid;
212 xfs_handle_t *handlep;
213 xfs_handle_t handle;
214 xfs_inode_t *ip;
215 struct inode *inodep;
216 vnode_t *vpp;
217 xfs_ino_t ino;
218 __u32 igen;
219 int error;
220
221 /*
222 * Only allow handle opens under a directory.
223 */
224 if (!S_ISDIR(parinode->i_mode))
225 return XFS_ERROR(ENOTDIR);
226
227 hanp = hreq->ihandle;
228 hlen = hreq->ihandlen;
229 handlep = &handle;
230
231 if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
232 return XFS_ERROR(EINVAL);
233 if (copy_from_user(handlep, hanp, hlen))
234 return XFS_ERROR(EFAULT);
235 if (hlen < sizeof(*handlep))
236 memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
237 if (hlen > sizeof(handlep->ha_fsid)) {
238 if (handlep->ha_fid.xfs_fid_len !=
239 (hlen - sizeof(handlep->ha_fsid)
240 - sizeof(handlep->ha_fid.xfs_fid_len))
241 || handlep->ha_fid.xfs_fid_pad)
242 return XFS_ERROR(EINVAL);
243 }
244
245 /*
246 * Crack the handle, obtain the inode # & generation #
247 */
248 xfid = (struct xfs_fid *)&handlep->ha_fid;
249 if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
250 ino = xfid->xfs_fid_ino;
251 igen = xfid->xfs_fid_gen;
252 } else {
253 return XFS_ERROR(EINVAL);
254 }
255
256 /*
257 * Get the XFS inode, building a vnode to go with it.
258 */
259 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
260 if (error)
261 return error;
262 if (ip == NULL)
263 return XFS_ERROR(EIO);
264 if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
265 xfs_iput_new(ip, XFS_ILOCK_SHARED);
266 return XFS_ERROR(ENOENT);
267 }
268
269 vpp = XFS_ITOV(ip);
270 inodep = LINVFS_GET_IP(vpp);
271 xfs_iunlock(ip, XFS_ILOCK_SHARED);
272
273 *vp = vpp;
274 *inode = inodep;
275 return 0;
276}
277
278STATIC int
279xfs_open_by_handle(
280 xfs_mount_t *mp,
281 void __user *arg,
282 struct file *parfilp,
283 struct inode *parinode)
284{
285 int error;
286 int new_fd;
287 int permflag;
288 struct file *filp;
289 struct inode *inode;
290 struct dentry *dentry;
291 vnode_t *vp;
292 xfs_fsop_handlereq_t hreq;
293
294 if (!capable(CAP_SYS_ADMIN))
295 return -XFS_ERROR(EPERM);
296 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
297 return -XFS_ERROR(EFAULT);
298
299 error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
300 if (error)
301 return -error;
302
303 /* Restrict xfs_open_by_handle to directories & regular files. */
304 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
305 iput(inode);
306 return -XFS_ERROR(EINVAL);
307 }
308
309#if BITS_PER_LONG != 32
310 hreq.oflags |= O_LARGEFILE;
311#endif
312 /* Put open permission in namei format. */
313 permflag = hreq.oflags;
314 if ((permflag+1) & O_ACCMODE)
315 permflag++;
316 if (permflag & O_TRUNC)
317 permflag |= 2;
318
319 if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
320 (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
321 iput(inode);
322 return -XFS_ERROR(EPERM);
323 }
324
325 if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
326 iput(inode);
327 return -XFS_ERROR(EACCES);
328 }
329
330 /* Can't write directories. */
331 if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
332 iput(inode);
333 return -XFS_ERROR(EISDIR);
334 }
335
336 if ((new_fd = get_unused_fd()) < 0) {
337 iput(inode);
338 return new_fd;
339 }
340
341 dentry = d_alloc_anon(inode);
342 if (dentry == NULL) {
343 iput(inode);
344 put_unused_fd(new_fd);
345 return -XFS_ERROR(ENOMEM);
346 }
347
348 /* Ensure umount returns EBUSY on umounts while this file is open. */
349 mntget(parfilp->f_vfsmnt);
350
351 /* Create file pointer. */
352 filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags);
353 if (IS_ERR(filp)) {
354 put_unused_fd(new_fd);
355 return -XFS_ERROR(-PTR_ERR(filp));
356 }
357 if (inode->i_mode & S_IFREG)
358 filp->f_op = &linvfs_invis_file_operations;
359
360 fd_install(new_fd, filp);
361 return new_fd;
362}
363
364STATIC int
365xfs_readlink_by_handle(
366 xfs_mount_t *mp,
367 void __user *arg,
368 struct file *parfilp,
369 struct inode *parinode)
370{
371 int error;
372 struct iovec aiov;
373 struct uio auio;
374 struct inode *inode;
375 xfs_fsop_handlereq_t hreq;
376 vnode_t *vp;
377 __u32 olen;
378
379 if (!capable(CAP_SYS_ADMIN))
380 return -XFS_ERROR(EPERM);
381 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
382 return -XFS_ERROR(EFAULT);
383
384 error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
385 if (error)
386 return -error;
387
388 /* Restrict this handle operation to symlinks only. */
389 if (vp->v_type != VLNK) {
390 VN_RELE(vp);
391 return -XFS_ERROR(EINVAL);
392 }
393
394 if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) {
395 VN_RELE(vp);
396 return -XFS_ERROR(EFAULT);
397 }
398 aiov.iov_len = olen;
399 aiov.iov_base = hreq.ohandle;
400
401 auio.uio_iov = &aiov;
402 auio.uio_iovcnt = 1;
403 auio.uio_offset = 0;
404 auio.uio_segflg = UIO_USERSPACE;
405 auio.uio_resid = olen;
406
407 VOP_READLINK(vp, &auio, IO_INVIS, NULL, error);
408
409 VN_RELE(vp);
410 return (olen - auio.uio_resid);
411}
412
413STATIC int
414xfs_fssetdm_by_handle(
415 xfs_mount_t *mp,
416 void __user *arg,
417 struct file *parfilp,
418 struct inode *parinode)
419{
420 int error;
421 struct fsdmidata fsd;
422 xfs_fsop_setdm_handlereq_t dmhreq;
423 struct inode *inode;
424 bhv_desc_t *bdp;
425 vnode_t *vp;
426
427 if (!capable(CAP_MKNOD))
428 return -XFS_ERROR(EPERM);
429 if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
430 return -XFS_ERROR(EFAULT);
431
432 error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode);
433 if (error)
434 return -error;
435
436 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
437 VN_RELE(vp);
438 return -XFS_ERROR(EPERM);
439 }
440
441 if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
442 VN_RELE(vp);
443 return -XFS_ERROR(EFAULT);
444 }
445
446 bdp = bhv_base_unlocked(VN_BHV_HEAD(vp));
447 error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL);
448
449 VN_RELE(vp);
450 if (error)
451 return -error;
452 return 0;
453}
454
455STATIC int
456xfs_attrlist_by_handle(
457 xfs_mount_t *mp,
458 void __user *arg,
459 struct file *parfilp,
460 struct inode *parinode)
461{
462 int error;
463 attrlist_cursor_kern_t *cursor;
464 xfs_fsop_attrlist_handlereq_t al_hreq;
465 struct inode *inode;
466 vnode_t *vp;
467 char *kbuf;
468
469 if (!capable(CAP_SYS_ADMIN))
470 return -XFS_ERROR(EPERM);
471 if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
472 return -XFS_ERROR(EFAULT);
473 if (al_hreq.buflen > XATTR_LIST_MAX)
474 return -XFS_ERROR(EINVAL);
475
476 error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq,
477 &vp, &inode);
478 if (error)
479 goto out;
480
481 kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
482 if (!kbuf)
483 goto out_vn_rele;
484
485 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
486 VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags,
487 cursor, NULL, error);
488 if (error)
489 goto out_kfree;
490
491 if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
492 error = -EFAULT;
493
494 out_kfree:
495 kfree(kbuf);
496 out_vn_rele:
497 VN_RELE(vp);
498 out:
499 return -error;
500}
501
502STATIC int
503xfs_attrmulti_attr_get(
504 struct vnode *vp,
505 char *name,
506 char __user *ubuf,
507 __uint32_t *len,
508 __uint32_t flags)
509{
510 char *kbuf;
511 int error = EFAULT;
512
513 if (*len > XATTR_SIZE_MAX)
514 return EINVAL;
515 kbuf = kmalloc(*len, GFP_KERNEL);
516 if (!kbuf)
517 return ENOMEM;
518
519 VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error);
520 if (error)
521 goto out_kfree;
522
523 if (copy_to_user(ubuf, kbuf, *len))
524 error = EFAULT;
525
526 out_kfree:
527 kfree(kbuf);
528 return error;
529}
530
531STATIC int
532xfs_attrmulti_attr_set(
533 struct vnode *vp,
534 char *name,
535 const char __user *ubuf,
536 __uint32_t len,
537 __uint32_t flags)
538{
539 char *kbuf;
540 int error = EFAULT;
541
542 if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
543 return EPERM;
544 if (len > XATTR_SIZE_MAX)
545 return EINVAL;
546
547 kbuf = kmalloc(len, GFP_KERNEL);
548 if (!kbuf)
549 return ENOMEM;
550
551 if (copy_from_user(kbuf, ubuf, len))
552 goto out_kfree;
553
554 VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error);
555
556 out_kfree:
557 kfree(kbuf);
558 return error;
559}
560
561STATIC int
562xfs_attrmulti_attr_remove(
563 struct vnode *vp,
564 char *name,
565 __uint32_t flags)
566{
567 int error;
568
569 if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
570 return EPERM;
571
572 VOP_ATTR_REMOVE(vp, name, flags, NULL, error);
573 return error;
574}
575
576STATIC int
577xfs_attrmulti_by_handle(
578 xfs_mount_t *mp,
579 void __user *arg,
580 struct file *parfilp,
581 struct inode *parinode)
582{
583 int error;
584 xfs_attr_multiop_t *ops;
585 xfs_fsop_attrmulti_handlereq_t am_hreq;
586 struct inode *inode;
587 vnode_t *vp;
588 unsigned int i, size;
589 char *attr_name;
590
591 if (!capable(CAP_SYS_ADMIN))
592 return -XFS_ERROR(EPERM);
593 if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
594 return -XFS_ERROR(EFAULT);
595
596 error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode);
597 if (error)
598 goto out;
599
600 error = E2BIG;
601 size = am_hreq.opcount * sizeof(attr_multiop_t);
602 if (!size || size > 16 * PAGE_SIZE)
603 goto out_vn_rele;
604
605 error = ENOMEM;
606 ops = kmalloc(size, GFP_KERNEL);
607 if (!ops)
608 goto out_vn_rele;
609
610 error = EFAULT;
611 if (copy_from_user(ops, am_hreq.ops, size))
612 goto out_kfree_ops;
613
614 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
615 if (!attr_name)
616 goto out_kfree_ops;
617
618
619 error = 0;
620 for (i = 0; i < am_hreq.opcount; i++) {
621 ops[i].am_error = strncpy_from_user(attr_name,
622 ops[i].am_attrname, MAXNAMELEN);
623 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
624 error = -ERANGE;
625 if (ops[i].am_error < 0)
626 break;
627
628 switch (ops[i].am_opcode) {
629 case ATTR_OP_GET:
630 ops[i].am_error = xfs_attrmulti_attr_get(vp,
631 attr_name, ops[i].am_attrvalue,
632 &ops[i].am_length, ops[i].am_flags);
633 break;
634 case ATTR_OP_SET:
635 ops[i].am_error = xfs_attrmulti_attr_set(vp,
636 attr_name, ops[i].am_attrvalue,
637 ops[i].am_length, ops[i].am_flags);
638 break;
639 case ATTR_OP_REMOVE:
640 ops[i].am_error = xfs_attrmulti_attr_remove(vp,
641 attr_name, ops[i].am_flags);
642 break;
643 default:
644 ops[i].am_error = EINVAL;
645 }
646 }
647
648 if (copy_to_user(am_hreq.ops, ops, size))
649 error = XFS_ERROR(EFAULT);
650
651 kfree(attr_name);
652 out_kfree_ops:
653 kfree(ops);
654 out_vn_rele:
655 VN_RELE(vp);
656 out:
657 return -error;
658}
659
660/* prototypes for a few of the stack-hungry cases that have
661 * their own functions. Functions are defined after their use
662 * so gcc doesn't get fancy and inline them with -03 */
663
664STATIC int
665xfs_ioc_space(
666 bhv_desc_t *bdp,
667 vnode_t *vp,
668 struct file *filp,
669 int flags,
670 unsigned int cmd,
671 void __user *arg);
672
673STATIC int
674xfs_ioc_bulkstat(
675 xfs_mount_t *mp,
676 unsigned int cmd,
677 void __user *arg);
678
679STATIC int
680xfs_ioc_fsgeometry_v1(
681 xfs_mount_t *mp,
682 void __user *arg);
683
684STATIC int
685xfs_ioc_fsgeometry(
686 xfs_mount_t *mp,
687 void __user *arg);
688
689STATIC int
690xfs_ioc_xattr(
691 vnode_t *vp,
692 xfs_inode_t *ip,
693 struct file *filp,
694 unsigned int cmd,
695 void __user *arg);
696
697STATIC int
698xfs_ioc_getbmap(
699 bhv_desc_t *bdp,
700 struct file *filp,
701 int flags,
702 unsigned int cmd,
703 void __user *arg);
704
705STATIC int
706xfs_ioc_getbmapx(
707 bhv_desc_t *bdp,
708 void __user *arg);
709
710int
711xfs_ioctl(
712 bhv_desc_t *bdp,
713 struct inode *inode,
714 struct file *filp,
715 int ioflags,
716 unsigned int cmd,
717 void __user *arg)
718{
719 int error;
720 vnode_t *vp;
721 xfs_inode_t *ip;
722 xfs_mount_t *mp;
723
724 vp = LINVFS_GET_VP(inode);
725
726 vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address);
727
728 ip = XFS_BHVTOI(bdp);
729 mp = ip->i_mount;
730
731 switch (cmd) {
732
733 case XFS_IOC_ALLOCSP:
734 case XFS_IOC_FREESP:
735 case XFS_IOC_RESVSP:
736 case XFS_IOC_UNRESVSP:
737 case XFS_IOC_ALLOCSP64:
738 case XFS_IOC_FREESP64:
739 case XFS_IOC_RESVSP64:
740 case XFS_IOC_UNRESVSP64:
741 /*
742 * Only allow the sys admin to reserve space unless
743 * unwritten extents are enabled.
744 */
745 if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
746 !capable(CAP_SYS_ADMIN))
747 return -EPERM;
748
749 return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg);
750
751 case XFS_IOC_DIOINFO: {
752 struct dioattr da;
753 xfs_buftarg_t *target =
754 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
755 mp->m_rtdev_targp : mp->m_ddev_targp;
756
757 da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
758 /* The size dio will do in one go */
759 da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
760
761 if (copy_to_user(arg, &da, sizeof(da)))
762 return -XFS_ERROR(EFAULT);
763 return 0;
764 }
765
766 case XFS_IOC_FSBULKSTAT_SINGLE:
767 case XFS_IOC_FSBULKSTAT:
768 case XFS_IOC_FSINUMBERS:
769 return xfs_ioc_bulkstat(mp, cmd, arg);
770
771 case XFS_IOC_FSGEOMETRY_V1:
772 return xfs_ioc_fsgeometry_v1(mp, arg);
773
774 case XFS_IOC_FSGEOMETRY:
775 return xfs_ioc_fsgeometry(mp, arg);
776
777 case XFS_IOC_GETVERSION:
778 case XFS_IOC_GETXFLAGS:
779 case XFS_IOC_SETXFLAGS:
780 case XFS_IOC_FSGETXATTR:
781 case XFS_IOC_FSSETXATTR:
782 case XFS_IOC_FSGETXATTRA:
783 return xfs_ioc_xattr(vp, ip, filp, cmd, arg);
784
785 case XFS_IOC_FSSETDM: {
786 struct fsdmidata dmi;
787
788 if (copy_from_user(&dmi, arg, sizeof(dmi)))
789 return -XFS_ERROR(EFAULT);
790
791 error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate,
792 NULL);
793 return -error;
794 }
795
796 case XFS_IOC_GETBMAP:
797 case XFS_IOC_GETBMAPA:
798 return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg);
799
800 case XFS_IOC_GETBMAPX:
801 return xfs_ioc_getbmapx(bdp, arg);
802
803 case XFS_IOC_FD_TO_HANDLE:
804 case XFS_IOC_PATH_TO_HANDLE:
805 case XFS_IOC_PATH_TO_FSHANDLE:
806 return xfs_find_handle(cmd, arg);
807
808 case XFS_IOC_OPEN_BY_HANDLE:
809 return xfs_open_by_handle(mp, arg, filp, inode);
810
811 case XFS_IOC_FSSETDM_BY_HANDLE:
812 return xfs_fssetdm_by_handle(mp, arg, filp, inode);
813
814 case XFS_IOC_READLINK_BY_HANDLE:
815 return xfs_readlink_by_handle(mp, arg, filp, inode);
816
817 case XFS_IOC_ATTRLIST_BY_HANDLE:
818 return xfs_attrlist_by_handle(mp, arg, filp, inode);
819
820 case XFS_IOC_ATTRMULTI_BY_HANDLE:
821 return xfs_attrmulti_by_handle(mp, arg, filp, inode);
822
823 case XFS_IOC_SWAPEXT: {
824 error = xfs_swapext((struct xfs_swapext __user *)arg);
825 return -error;
826 }
827
828 case XFS_IOC_FSCOUNTS: {
829 xfs_fsop_counts_t out;
830
831 error = xfs_fs_counts(mp, &out);
832 if (error)
833 return -error;
834
835 if (copy_to_user(arg, &out, sizeof(out)))
836 return -XFS_ERROR(EFAULT);
837 return 0;
838 }
839
840 case XFS_IOC_SET_RESBLKS: {
841 xfs_fsop_resblks_t inout;
842 __uint64_t in;
843
844 if (!capable(CAP_SYS_ADMIN))
845 return -EPERM;
846
847 if (copy_from_user(&inout, arg, sizeof(inout)))
848 return -XFS_ERROR(EFAULT);
849
850 /* input parameter is passed in resblks field of structure */
851 in = inout.resblks;
852 error = xfs_reserve_blocks(mp, &in, &inout);
853 if (error)
854 return -error;
855
856 if (copy_to_user(arg, &inout, sizeof(inout)))
857 return -XFS_ERROR(EFAULT);
858 return 0;
859 }
860
861 case XFS_IOC_GET_RESBLKS: {
862 xfs_fsop_resblks_t out;
863
864 if (!capable(CAP_SYS_ADMIN))
865 return -EPERM;
866
867 error = xfs_reserve_blocks(mp, NULL, &out);
868 if (error)
869 return -error;
870
871 if (copy_to_user(arg, &out, sizeof(out)))
872 return -XFS_ERROR(EFAULT);
873
874 return 0;
875 }
876
877 case XFS_IOC_FSGROWFSDATA: {
878 xfs_growfs_data_t in;
879
880 if (!capable(CAP_SYS_ADMIN))
881 return -EPERM;
882
883 if (copy_from_user(&in, arg, sizeof(in)))
884 return -XFS_ERROR(EFAULT);
885
886 error = xfs_growfs_data(mp, &in);
887 return -error;
888 }
889
890 case XFS_IOC_FSGROWFSLOG: {
891 xfs_growfs_log_t in;
892
893 if (!capable(CAP_SYS_ADMIN))
894 return -EPERM;
895
896 if (copy_from_user(&in, arg, sizeof(in)))
897 return -XFS_ERROR(EFAULT);
898
899 error = xfs_growfs_log(mp, &in);
900 return -error;
901 }
902
903 case XFS_IOC_FSGROWFSRT: {
904 xfs_growfs_rt_t in;
905
906 if (!capable(CAP_SYS_ADMIN))
907 return -EPERM;
908
909 if (copy_from_user(&in, arg, sizeof(in)))
910 return -XFS_ERROR(EFAULT);
911
912 error = xfs_growfs_rt(mp, &in);
913 return -error;
914 }
915
916 case XFS_IOC_FREEZE:
917 if (!capable(CAP_SYS_ADMIN))
918 return -EPERM;
919
920 if (inode->i_sb->s_frozen == SB_UNFROZEN)
921 freeze_bdev(inode->i_sb->s_bdev);
922 return 0;
923
924 case XFS_IOC_THAW:
925 if (!capable(CAP_SYS_ADMIN))
926 return -EPERM;
927 if (inode->i_sb->s_frozen != SB_UNFROZEN)
928 thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
929 return 0;
930
931 case XFS_IOC_GOINGDOWN: {
932 __uint32_t in;
933
934 if (!capable(CAP_SYS_ADMIN))
935 return -EPERM;
936
937 if (get_user(in, (__uint32_t __user *)arg))
938 return -XFS_ERROR(EFAULT);
939
940 error = xfs_fs_goingdown(mp, in);
941 return -error;
942 }
943
944 case XFS_IOC_ERROR_INJECTION: {
945 xfs_error_injection_t in;
946
947 if (!capable(CAP_SYS_ADMIN))
948 return -EPERM;
949
950 if (copy_from_user(&in, arg, sizeof(in)))
951 return -XFS_ERROR(EFAULT);
952
953 error = xfs_errortag_add(in.errtag, mp);
954 return -error;
955 }
956
957 case XFS_IOC_ERROR_CLEARALL:
958 if (!capable(CAP_SYS_ADMIN))
959 return -EPERM;
960
961 error = xfs_errortag_clearall(mp);
962 return -error;
963
964 default:
965 return -ENOTTY;
966 }
967}
968
969STATIC int
970xfs_ioc_space(
971 bhv_desc_t *bdp,
972 vnode_t *vp,
973 struct file *filp,
974 int ioflags,
975 unsigned int cmd,
976 void __user *arg)
977{
978 xfs_flock64_t bf;
979 int attr_flags = 0;
980 int error;
981
982 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
983 return -XFS_ERROR(EPERM);
984
985 if (!(filp->f_flags & FMODE_WRITE))
986 return -XFS_ERROR(EBADF);
987
988 if (vp->v_type != VREG)
989 return -XFS_ERROR(EINVAL);
990
991 if (copy_from_user(&bf, arg, sizeof(bf)))
992 return -XFS_ERROR(EFAULT);
993
994 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
995 attr_flags |= ATTR_NONBLOCK;
996 if (ioflags & IO_INVIS)
997 attr_flags |= ATTR_DMI;
998
999 error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos,
1000 NULL, attr_flags);
1001 return -error;
1002}
1003
1004STATIC int
1005xfs_ioc_bulkstat(
1006 xfs_mount_t *mp,
1007 unsigned int cmd,
1008 void __user *arg)
1009{
1010 xfs_fsop_bulkreq_t bulkreq;
1011 int count; /* # of records returned */
1012 xfs_ino_t inlast; /* last inode number */
1013 int done;
1014 int error;
1015
1016 /* done = 1 if there are more stats to get and if bulkstat */
1017 /* should be called again (unused here, but used in dmapi) */
1018
1019 if (!capable(CAP_SYS_ADMIN))
1020 return -EPERM;
1021
1022 if (XFS_FORCED_SHUTDOWN(mp))
1023 return -XFS_ERROR(EIO);
1024
1025 if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
1026 return -XFS_ERROR(EFAULT);
1027
1028 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
1029 return -XFS_ERROR(EFAULT);
1030
1031 if ((count = bulkreq.icount) <= 0)
1032 return -XFS_ERROR(EINVAL);
1033
1034 if (cmd == XFS_IOC_FSINUMBERS)
1035 error = xfs_inumbers(mp, &inlast, &count,
1036 bulkreq.ubuffer);
1037 else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
1038 error = xfs_bulkstat_single(mp, &inlast,
1039 bulkreq.ubuffer, &done);
1040 else { /* XFS_IOC_FSBULKSTAT */
1041 if (count == 1 && inlast != 0) {
1042 inlast++;
1043 error = xfs_bulkstat_single(mp, &inlast,
1044 bulkreq.ubuffer, &done);
1045 } else {
1046 error = xfs_bulkstat(mp, &inlast, &count,
1047 (bulkstat_one_pf)xfs_bulkstat_one, NULL,
1048 sizeof(xfs_bstat_t), bulkreq.ubuffer,
1049 BULKSTAT_FG_QUICK, &done);
1050 }
1051 }
1052
1053 if (error)
1054 return -error;
1055
1056 if (bulkreq.ocount != NULL) {
1057 if (copy_to_user(bulkreq.lastip, &inlast,
1058 sizeof(xfs_ino_t)))
1059 return -XFS_ERROR(EFAULT);
1060
1061 if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
1062 return -XFS_ERROR(EFAULT);
1063 }
1064
1065 return 0;
1066}
1067
1068STATIC int
1069xfs_ioc_fsgeometry_v1(
1070 xfs_mount_t *mp,
1071 void __user *arg)
1072{
1073 xfs_fsop_geom_v1_t fsgeo;
1074 int error;
1075
1076 error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
1077 if (error)
1078 return -error;
1079
1080 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
1081 return -XFS_ERROR(EFAULT);
1082 return 0;
1083}
1084
1085STATIC int
1086xfs_ioc_fsgeometry(
1087 xfs_mount_t *mp,
1088 void __user *arg)
1089{
1090 xfs_fsop_geom_t fsgeo;
1091 int error;
1092
1093 error = xfs_fs_geometry(mp, &fsgeo, 4);
1094 if (error)
1095 return -error;
1096
1097 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
1098 return -XFS_ERROR(EFAULT);
1099 return 0;
1100}
1101
1102/*
1103 * Linux extended inode flags interface.
1104 */
1105#define LINUX_XFLAG_SYNC 0x00000008 /* Synchronous updates */
1106#define LINUX_XFLAG_IMMUTABLE 0x00000010 /* Immutable file */
1107#define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */
1108#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */
1109#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */
1110
1111STATIC unsigned int
1112xfs_merge_ioc_xflags(
1113 unsigned int flags,
1114 unsigned int start)
1115{
1116 unsigned int xflags = start;
1117
1118 if (flags & LINUX_XFLAG_IMMUTABLE)
1119 xflags |= XFS_XFLAG_IMMUTABLE;
1120 else
1121 xflags &= ~XFS_XFLAG_IMMUTABLE;
1122 if (flags & LINUX_XFLAG_APPEND)
1123 xflags |= XFS_XFLAG_APPEND;
1124 else
1125 xflags &= ~XFS_XFLAG_APPEND;
1126 if (flags & LINUX_XFLAG_SYNC)
1127 xflags |= XFS_XFLAG_SYNC;
1128 else
1129 xflags &= ~XFS_XFLAG_SYNC;
1130 if (flags & LINUX_XFLAG_NOATIME)
1131 xflags |= XFS_XFLAG_NOATIME;
1132 else
1133 xflags &= ~XFS_XFLAG_NOATIME;
1134 if (flags & LINUX_XFLAG_NODUMP)
1135 xflags |= XFS_XFLAG_NODUMP;
1136 else
1137 xflags &= ~XFS_XFLAG_NODUMP;
1138
1139 return xflags;
1140}
1141
1142STATIC unsigned int
1143xfs_di2lxflags(
1144 __uint16_t di_flags)
1145{
1146 unsigned int flags = 0;
1147
1148 if (di_flags & XFS_DIFLAG_IMMUTABLE)
1149 flags |= LINUX_XFLAG_IMMUTABLE;
1150 if (di_flags & XFS_DIFLAG_APPEND)
1151 flags |= LINUX_XFLAG_APPEND;
1152 if (di_flags & XFS_DIFLAG_SYNC)
1153 flags |= LINUX_XFLAG_SYNC;
1154 if (di_flags & XFS_DIFLAG_NOATIME)
1155 flags |= LINUX_XFLAG_NOATIME;
1156 if (di_flags & XFS_DIFLAG_NODUMP)
1157 flags |= LINUX_XFLAG_NODUMP;
1158 return flags;
1159}
1160
1161STATIC int
1162xfs_ioc_xattr(
1163 vnode_t *vp,
1164 xfs_inode_t *ip,
1165 struct file *filp,
1166 unsigned int cmd,
1167 void __user *arg)
1168{
1169 struct fsxattr fa;
1170 vattr_t va;
1171 int error;
1172 int attr_flags;
1173 unsigned int flags;
1174
1175 switch (cmd) {
1176 case XFS_IOC_FSGETXATTR: {
1177 va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS;
1178 VOP_GETATTR(vp, &va, 0, NULL, error);
1179 if (error)
1180 return -error;
1181
1182 fa.fsx_xflags = va.va_xflags;
1183 fa.fsx_extsize = va.va_extsize;
1184 fa.fsx_nextents = va.va_nextents;
1185
1186 if (copy_to_user(arg, &fa, sizeof(fa)))
1187 return -XFS_ERROR(EFAULT);
1188 return 0;
1189 }
1190
1191 case XFS_IOC_FSSETXATTR: {
1192 if (copy_from_user(&fa, arg, sizeof(fa)))
1193 return -XFS_ERROR(EFAULT);
1194
1195 attr_flags = 0;
1196 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
1197 attr_flags |= ATTR_NONBLOCK;
1198
1199 va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
1200 va.va_xflags = fa.fsx_xflags;
1201 va.va_extsize = fa.fsx_extsize;
1202
1203 VOP_SETATTR(vp, &va, attr_flags, NULL, error);
1204 if (!error)
1205 vn_revalidate(vp); /* update Linux inode flags */
1206 return -error;
1207 }
1208
1209 case XFS_IOC_FSGETXATTRA: {
1210 va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS;
1211 VOP_GETATTR(vp, &va, 0, NULL, error);
1212 if (error)
1213 return -error;
1214
1215 fa.fsx_xflags = va.va_xflags;
1216 fa.fsx_extsize = va.va_extsize;
1217 fa.fsx_nextents = va.va_anextents;
1218
1219 if (copy_to_user(arg, &fa, sizeof(fa)))
1220 return -XFS_ERROR(EFAULT);
1221 return 0;
1222 }
1223
1224 case XFS_IOC_GETXFLAGS: {
1225 flags = xfs_di2lxflags(ip->i_d.di_flags);
1226 if (copy_to_user(arg, &flags, sizeof(flags)))
1227 return -XFS_ERROR(EFAULT);
1228 return 0;
1229 }
1230
1231 case XFS_IOC_SETXFLAGS: {
1232 if (copy_from_user(&flags, arg, sizeof(flags)))
1233 return -XFS_ERROR(EFAULT);
1234
1235 if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
1236 LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
1237 LINUX_XFLAG_SYNC))
1238 return -XFS_ERROR(EOPNOTSUPP);
1239
1240 attr_flags = 0;
1241 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
1242 attr_flags |= ATTR_NONBLOCK;
1243
1244 va.va_mask = XFS_AT_XFLAGS;
1245 va.va_xflags = xfs_merge_ioc_xflags(flags,
1246 xfs_ip2xflags(ip));
1247
1248 VOP_SETATTR(vp, &va, attr_flags, NULL, error);
1249 if (!error)
1250 vn_revalidate(vp); /* update Linux inode flags */
1251 return -error;
1252 }
1253
1254 case XFS_IOC_GETVERSION: {
1255 flags = LINVFS_GET_IP(vp)->i_generation;
1256 if (copy_to_user(arg, &flags, sizeof(flags)))
1257 return -XFS_ERROR(EFAULT);
1258 return 0;
1259 }
1260
1261 default:
1262 return -ENOTTY;
1263 }
1264}
1265
1266STATIC int
1267xfs_ioc_getbmap(
1268 bhv_desc_t *bdp,
1269 struct file *filp,
1270 int ioflags,
1271 unsigned int cmd,
1272 void __user *arg)
1273{
1274 struct getbmap bm;
1275 int iflags;
1276 int error;
1277
1278 if (copy_from_user(&bm, arg, sizeof(bm)))
1279 return -XFS_ERROR(EFAULT);
1280
1281 if (bm.bmv_count < 2)
1282 return -XFS_ERROR(EINVAL);
1283
1284 iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
1285 if (ioflags & IO_INVIS)
1286 iflags |= BMV_IF_NO_DMAPI_READ;
1287
1288 error = xfs_getbmap(bdp, &bm, (struct getbmap __user *)arg+1, iflags);
1289 if (error)
1290 return -error;
1291
1292 if (copy_to_user(arg, &bm, sizeof(bm)))
1293 return -XFS_ERROR(EFAULT);
1294 return 0;
1295}
1296
1297STATIC int
1298xfs_ioc_getbmapx(
1299 bhv_desc_t *bdp,
1300 void __user *arg)
1301{
1302 struct getbmapx bmx;
1303 struct getbmap bm;
1304 int iflags;
1305 int error;
1306
1307 if (copy_from_user(&bmx, arg, sizeof(bmx)))
1308 return -XFS_ERROR(EFAULT);
1309
1310 if (bmx.bmv_count < 2)
1311 return -XFS_ERROR(EINVAL);
1312
1313 /*
1314 * Map input getbmapx structure to a getbmap
1315 * structure for xfs_getbmap.
1316 */
1317 GETBMAP_CONVERT(bmx, bm);
1318
1319 iflags = bmx.bmv_iflags;
1320
1321 if (iflags & (~BMV_IF_VALID))
1322 return -XFS_ERROR(EINVAL);
1323
1324 iflags |= BMV_IF_EXTENDED;
1325
1326 error = xfs_getbmap(bdp, &bm, (struct getbmapx __user *)arg+1, iflags);
1327 if (error)
1328 return -error;
1329
1330 GETBMAP_CONVERT(bm, bmx);
1331
1332 if (copy_to_user(arg, &bmx, sizeof(bmx)))
1333 return -XFS_ERROR(EFAULT);
1334
1335 return 0;
1336}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
new file mode 100644
index 000000000000..7a12c83184f5
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -0,0 +1,163 @@
1/*
2 * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include <linux/config.h>
34#include <linux/compat.h>
35#include <linux/init.h>
36#include <linux/ioctl.h>
37#include <linux/ioctl32.h>
38#include <linux/syscalls.h>
39#include <linux/types.h>
40#include <linux/fs.h>
41#include <asm/uaccess.h>
42
43#include "xfs.h"
44#include "xfs_types.h"
45#include "xfs_fs.h"
46#include "xfs_vfs.h"
47#include "xfs_vnode.h"
48#include "xfs_dfrag.h"
49
50#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
51#define BROKEN_X86_ALIGNMENT
52#else
53
54typedef struct xfs_fsop_bulkreq32 {
55 compat_uptr_t lastip; /* last inode # pointer */
56 __s32 icount; /* count of entries in buffer */
57 compat_uptr_t ubuffer; /* user buffer for inode desc. */
58 __s32 ocount; /* output count pointer */
59} xfs_fsop_bulkreq32_t;
60
61static unsigned long
62xfs_ioctl32_bulkstat(unsigned long arg)
63{
64 xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg;
65 xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p));
66 u32 addr;
67
68 if (get_user(addr, &p32->lastip) ||
69 put_user(compat_ptr(addr), &p->lastip) ||
70 copy_in_user(&p->icount, &p32->icount, sizeof(s32)) ||
71 get_user(addr, &p32->ubuffer) ||
72 put_user(compat_ptr(addr), &p->ubuffer) ||
73 get_user(addr, &p32->ocount) ||
74 put_user(compat_ptr(addr), &p->ocount))
75 return -EFAULT;
76
77 return (unsigned long)p;
78}
79#endif
80
81static long
82__xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
83{
84 int error;
85 struct inode *inode = f->f_dentry->d_inode;
86 vnode_t *vp = LINVFS_GET_VP(inode);
87
88 switch (cmd) {
89 case XFS_IOC_DIOINFO:
90 case XFS_IOC_FSGEOMETRY_V1:
91 case XFS_IOC_FSGEOMETRY:
92 case XFS_IOC_GETVERSION:
93 case XFS_IOC_GETXFLAGS:
94 case XFS_IOC_SETXFLAGS:
95 case XFS_IOC_FSGETXATTR:
96 case XFS_IOC_FSSETXATTR:
97 case XFS_IOC_FSGETXATTRA:
98 case XFS_IOC_FSSETDM:
99 case XFS_IOC_GETBMAP:
100 case XFS_IOC_GETBMAPA:
101 case XFS_IOC_GETBMAPX:
102/* not handled
103 case XFS_IOC_FD_TO_HANDLE:
104 case XFS_IOC_PATH_TO_HANDLE:
105 case XFS_IOC_PATH_TO_HANDLE:
106 case XFS_IOC_PATH_TO_FSHANDLE:
107 case XFS_IOC_OPEN_BY_HANDLE:
108 case XFS_IOC_FSSETDM_BY_HANDLE:
109 case XFS_IOC_READLINK_BY_HANDLE:
110 case XFS_IOC_ATTRLIST_BY_HANDLE:
111 case XFS_IOC_ATTRMULTI_BY_HANDLE:
112*/
113 case XFS_IOC_FSCOUNTS:
114 case XFS_IOC_SET_RESBLKS:
115 case XFS_IOC_GET_RESBLKS:
116 case XFS_IOC_FSGROWFSDATA:
117 case XFS_IOC_FSGROWFSLOG:
118 case XFS_IOC_FSGROWFSRT:
119 case XFS_IOC_FREEZE:
120 case XFS_IOC_THAW:
121 case XFS_IOC_GOINGDOWN:
122 case XFS_IOC_ERROR_INJECTION:
123 case XFS_IOC_ERROR_CLEARALL:
124 break;
125
126#ifndef BROKEN_X86_ALIGNMENT
127 /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */
128 case XFS_IOC_ALLOCSP:
129 case XFS_IOC_FREESP:
130 case XFS_IOC_RESVSP:
131 case XFS_IOC_UNRESVSP:
132 case XFS_IOC_ALLOCSP64:
133 case XFS_IOC_FREESP64:
134 case XFS_IOC_RESVSP64:
135 case XFS_IOC_UNRESVSP64:
136 case XFS_IOC_SWAPEXT:
137 break;
138
139 case XFS_IOC_FSBULKSTAT_SINGLE:
140 case XFS_IOC_FSBULKSTAT:
141 case XFS_IOC_FSINUMBERS:
142 arg = xfs_ioctl32_bulkstat(arg);
143 break;
144#endif
145 default:
146 return -ENOIOCTLCMD;
147 }
148
149 VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
150 VMODIFY(vp);
151
152 return error;
153}
154
155long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg)
156{
157 return __xfs_compat_ioctl(0, f, cmd, arg);
158}
159
160long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg)
161{
162 return __xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
163}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
new file mode 100644
index 000000000000..779f69a48116
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg);
34long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
new file mode 100644
index 000000000000..407e99359391
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -0,0 +1,680 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34#include "xfs_fs.h"
35#include "xfs_inum.h"
36#include "xfs_log.h"
37#include "xfs_trans.h"
38#include "xfs_sb.h"
39#include "xfs_ag.h"
40#include "xfs_dir.h"
41#include "xfs_dir2.h"
42#include "xfs_alloc.h"
43#include "xfs_dmapi.h"
44#include "xfs_quota.h"
45#include "xfs_mount.h"
46#include "xfs_alloc_btree.h"
47#include "xfs_bmap_btree.h"
48#include "xfs_ialloc_btree.h"
49#include "xfs_btree.h"
50#include "xfs_ialloc.h"
51#include "xfs_attr_sf.h"
52#include "xfs_dir_sf.h"
53#include "xfs_dir2_sf.h"
54#include "xfs_dinode.h"
55#include "xfs_inode.h"
56#include "xfs_bmap.h"
57#include "xfs_bit.h"
58#include "xfs_rtalloc.h"
59#include "xfs_error.h"
60#include "xfs_itable.h"
61#include "xfs_rw.h"
62#include "xfs_acl.h"
63#include "xfs_cap.h"
64#include "xfs_mac.h"
65#include "xfs_attr.h"
66#include "xfs_buf_item.h"
67#include "xfs_utils.h"
68
69#include <linux/xattr.h>
70#include <linux/namei.h>
71
72
73/*
74 * Pull the link count and size up from the xfs inode to the linux inode
75 */
76STATIC void
77validate_fields(
78 struct inode *ip)
79{
80 vnode_t *vp = LINVFS_GET_VP(ip);
81 vattr_t va;
82 int error;
83
84 va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
85 VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error);
86 if (likely(!error)) {
87 ip->i_nlink = va.va_nlink;
88 ip->i_blocks = va.va_nblocks;
89
90 /* we're under i_sem so i_size can't change under us */
91 if (i_size_read(ip) != va.va_size)
92 i_size_write(ip, va.va_size);
93 }
94}
95
96/*
97 * Determine whether a process has a valid fs_struct (kernel daemons
98 * like knfsd don't have an fs_struct).
99 *
100 * XXX(hch): nfsd is broken, better fix it instead.
101 */
102STATIC inline int
103has_fs_struct(struct task_struct *task)
104{
105 return (task->fs != init_task.fs);
106}
107
108STATIC int
109linvfs_mknod(
110 struct inode *dir,
111 struct dentry *dentry,
112 int mode,
113 dev_t rdev)
114{
115 struct inode *ip;
116 vattr_t va;
117 vnode_t *vp = NULL, *dvp = LINVFS_GET_VP(dir);
118 xfs_acl_t *default_acl = NULL;
119 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS;
120 int error;
121
122 /*
123 * Irix uses Missed'em'V split, but doesn't want to see
124 * the upper 5 bits of (14bit) major.
125 */
126 if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)
127 return -EINVAL;
128
129 if (test_default_acl && test_default_acl(dvp)) {
130 if (!_ACL_ALLOC(default_acl))
131 return -ENOMEM;
132 if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
133 _ACL_FREE(default_acl);
134 default_acl = NULL;
135 }
136 }
137
138 if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current))
139 mode &= ~current->fs->umask;
140
141 memset(&va, 0, sizeof(va));
142 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
143 va.va_type = IFTOVT(mode);
144 va.va_mode = mode;
145
146 switch (mode & S_IFMT) {
147 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
148 va.va_rdev = sysv_encode_dev(rdev);
149 va.va_mask |= XFS_AT_RDEV;
150 /*FALLTHROUGH*/
151 case S_IFREG:
152 VOP_CREATE(dvp, dentry, &va, &vp, NULL, error);
153 break;
154 case S_IFDIR:
155 VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error);
156 break;
157 default:
158 error = EINVAL;
159 break;
160 }
161
162 if (default_acl) {
163 if (!error) {
164 error = _ACL_INHERIT(vp, &va, default_acl);
165 if (!error) {
166 VMODIFY(vp);
167 } else {
168 struct dentry teardown = {};
169 int err2;
170
171 /* Oh, the horror.
172 * If we can't add the ACL we must back out.
173 * ENOSPC can hit here, among other things.
174 */
175 teardown.d_inode = ip = LINVFS_GET_IP(vp);
176 teardown.d_name = dentry->d_name;
177
178 vn_mark_bad(vp);
179
180 if (S_ISDIR(mode))
181 VOP_RMDIR(dvp, &teardown, NULL, err2);
182 else
183 VOP_REMOVE(dvp, &teardown, NULL, err2);
184 VN_RELE(vp);
185 }
186 }
187 _ACL_FREE(default_acl);
188 }
189
190 if (!error) {
191 ASSERT(vp);
192 ip = LINVFS_GET_IP(vp);
193
194 if (S_ISCHR(mode) || S_ISBLK(mode))
195 ip->i_rdev = rdev;
196 else if (S_ISDIR(mode))
197 validate_fields(ip);
198 d_instantiate(dentry, ip);
199 validate_fields(dir);
200 }
201 return -error;
202}
203
204STATIC int
205linvfs_create(
206 struct inode *dir,
207 struct dentry *dentry,
208 int mode,
209 struct nameidata *nd)
210{
211 return linvfs_mknod(dir, dentry, mode, 0);
212}
213
214STATIC int
215linvfs_mkdir(
216 struct inode *dir,
217 struct dentry *dentry,
218 int mode)
219{
220 return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0);
221}
222
223STATIC struct dentry *
224linvfs_lookup(
225 struct inode *dir,
226 struct dentry *dentry,
227 struct nameidata *nd)
228{
229 struct vnode *vp = LINVFS_GET_VP(dir), *cvp;
230 int error;
231
232 if (dentry->d_name.len >= MAXNAMELEN)
233 return ERR_PTR(-ENAMETOOLONG);
234
235 VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error);
236 if (error) {
237 if (unlikely(error != ENOENT))
238 return ERR_PTR(-error);
239 d_add(dentry, NULL);
240 return NULL;
241 }
242
243 return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
244}
245
246STATIC int
247linvfs_link(
248 struct dentry *old_dentry,
249 struct inode *dir,
250 struct dentry *dentry)
251{
252 struct inode *ip; /* inode of guy being linked to */
253 vnode_t *tdvp; /* target directory for new name/link */
254 vnode_t *vp; /* vp of name being linked */
255 int error;
256
257 ip = old_dentry->d_inode; /* inode being linked to */
258 if (S_ISDIR(ip->i_mode))
259 return -EPERM;
260
261 tdvp = LINVFS_GET_VP(dir);
262 vp = LINVFS_GET_VP(ip);
263
264 VOP_LINK(tdvp, vp, dentry, NULL, error);
265 if (!error) {
266 VMODIFY(tdvp);
267 VN_HOLD(vp);
268 validate_fields(ip);
269 d_instantiate(dentry, ip);
270 }
271 return -error;
272}
273
274STATIC int
275linvfs_unlink(
276 struct inode *dir,
277 struct dentry *dentry)
278{
279 struct inode *inode;
280 vnode_t *dvp; /* directory containing name to remove */
281 int error;
282
283 inode = dentry->d_inode;
284 dvp = LINVFS_GET_VP(dir);
285
286 VOP_REMOVE(dvp, dentry, NULL, error);
287 if (!error) {
288 validate_fields(dir); /* For size only */
289 validate_fields(inode);
290 }
291
292 return -error;
293}
294
295STATIC int
296linvfs_symlink(
297 struct inode *dir,
298 struct dentry *dentry,
299 const char *symname)
300{
301 struct inode *ip;
302 vattr_t va;
303 vnode_t *dvp; /* directory containing name of symlink */
304 vnode_t *cvp; /* used to lookup symlink to put in dentry */
305 int error;
306
307 dvp = LINVFS_GET_VP(dir);
308 cvp = NULL;
309
310 memset(&va, 0, sizeof(va));
311 va.va_type = VLNK;
312 va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
313 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
314
315 error = 0;
316 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
317 if (!error && cvp) {
318 ASSERT(cvp->v_type == VLNK);
319 ip = LINVFS_GET_IP(cvp);
320 d_instantiate(dentry, ip);
321 validate_fields(dir);
322 validate_fields(ip); /* size needs update */
323 }
324 return -error;
325}
326
327STATIC int
328linvfs_rmdir(
329 struct inode *dir,
330 struct dentry *dentry)
331{
332 struct inode *inode = dentry->d_inode;
333 vnode_t *dvp = LINVFS_GET_VP(dir);
334 int error;
335
336 VOP_RMDIR(dvp, dentry, NULL, error);
337 if (!error) {
338 validate_fields(inode);
339 validate_fields(dir);
340 }
341 return -error;
342}
343
344STATIC int
345linvfs_rename(
346 struct inode *odir,
347 struct dentry *odentry,
348 struct inode *ndir,
349 struct dentry *ndentry)
350{
351 struct inode *new_inode = ndentry->d_inode;
352 vnode_t *fvp; /* from directory */
353 vnode_t *tvp; /* target directory */
354 int error;
355
356 fvp = LINVFS_GET_VP(odir);
357 tvp = LINVFS_GET_VP(ndir);
358
359 VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
360 if (error)
361 return -error;
362
363 if (new_inode)
364 validate_fields(new_inode);
365
366 validate_fields(odir);
367 if (ndir != odir)
368 validate_fields(ndir);
369 return 0;
370}
371
372/*
373 * careful here - this function can get called recursively, so
374 * we need to be very careful about how much stack we use.
375 * uio is kmalloced for this reason...
376 */
377STATIC int
378linvfs_follow_link(
379 struct dentry *dentry,
380 struct nameidata *nd)
381{
382 vnode_t *vp;
383 uio_t *uio;
384 iovec_t iov;
385 int error;
386 char *link;
387
388 ASSERT(dentry);
389 ASSERT(nd);
390
391 link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
392 if (!link) {
393 nd_set_link(nd, ERR_PTR(-ENOMEM));
394 return 0;
395 }
396
397 uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
398 if (!uio) {
399 kfree(link);
400 nd_set_link(nd, ERR_PTR(-ENOMEM));
401 return 0;
402 }
403
404 vp = LINVFS_GET_VP(dentry->d_inode);
405
406 iov.iov_base = link;
407 iov.iov_len = MAXNAMELEN;
408
409 uio->uio_iov = &iov;
410 uio->uio_offset = 0;
411 uio->uio_segflg = UIO_SYSSPACE;
412 uio->uio_resid = MAXNAMELEN;
413 uio->uio_iovcnt = 1;
414
415 VOP_READLINK(vp, uio, 0, NULL, error);
416 if (error) {
417 kfree(link);
418 link = ERR_PTR(-error);
419 } else {
420 link[MAXNAMELEN - uio->uio_resid] = '\0';
421 }
422 kfree(uio);
423
424 nd_set_link(nd, link);
425 return 0;
426}
427
428static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd)
429{
430 char *s = nd_get_link(nd);
431 if (!IS_ERR(s))
432 kfree(s);
433}
434
435#ifdef CONFIG_XFS_POSIX_ACL
436STATIC int
437linvfs_permission(
438 struct inode *inode,
439 int mode,
440 struct nameidata *nd)
441{
442 vnode_t *vp = LINVFS_GET_VP(inode);
443 int error;
444
445 mode <<= 6; /* convert from linux to vnode access bits */
446 VOP_ACCESS(vp, mode, NULL, error);
447 return -error;
448}
449#else
450#define linvfs_permission NULL
451#endif
452
453STATIC int
454linvfs_getattr(
455 struct vfsmount *mnt,
456 struct dentry *dentry,
457 struct kstat *stat)
458{
459 struct inode *inode = dentry->d_inode;
460 vnode_t *vp = LINVFS_GET_VP(inode);
461 int error = 0;
462
463 if (unlikely(vp->v_flag & VMODIFIED))
464 error = vn_revalidate(vp);
465 if (!error)
466 generic_fillattr(inode, stat);
467 return 0;
468}
469
470STATIC int
471linvfs_setattr(
472 struct dentry *dentry,
473 struct iattr *attr)
474{
475 struct inode *inode = dentry->d_inode;
476 unsigned int ia_valid = attr->ia_valid;
477 vnode_t *vp = LINVFS_GET_VP(inode);
478 vattr_t vattr;
479 int flags = 0;
480 int error;
481
482 memset(&vattr, 0, sizeof(vattr_t));
483 if (ia_valid & ATTR_UID) {
484 vattr.va_mask |= XFS_AT_UID;
485 vattr.va_uid = attr->ia_uid;
486 }
487 if (ia_valid & ATTR_GID) {
488 vattr.va_mask |= XFS_AT_GID;
489 vattr.va_gid = attr->ia_gid;
490 }
491 if (ia_valid & ATTR_SIZE) {
492 vattr.va_mask |= XFS_AT_SIZE;
493 vattr.va_size = attr->ia_size;
494 }
495 if (ia_valid & ATTR_ATIME) {
496 vattr.va_mask |= XFS_AT_ATIME;
497 vattr.va_atime = attr->ia_atime;
498 }
499 if (ia_valid & ATTR_MTIME) {
500 vattr.va_mask |= XFS_AT_MTIME;
501 vattr.va_mtime = attr->ia_mtime;
502 }
503 if (ia_valid & ATTR_CTIME) {
504 vattr.va_mask |= XFS_AT_CTIME;
505 vattr.va_ctime = attr->ia_ctime;
506 }
507 if (ia_valid & ATTR_MODE) {
508 vattr.va_mask |= XFS_AT_MODE;
509 vattr.va_mode = attr->ia_mode;
510 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
511 inode->i_mode &= ~S_ISGID;
512 }
513
514 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
515 flags |= ATTR_UTIME;
516#ifdef ATTR_NO_BLOCK
517 if ((ia_valid & ATTR_NO_BLOCK))
518 flags |= ATTR_NONBLOCK;
519#endif
520
521 VOP_SETATTR(vp, &vattr, flags, NULL, error);
522 if (error)
523 return -error;
524 vn_revalidate(vp);
525 return error;
526}
527
528STATIC void
529linvfs_truncate(
530 struct inode *inode)
531{
532 block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block);
533}
534
535STATIC int
536linvfs_setxattr(
537 struct dentry *dentry,
538 const char *name,
539 const void *data,
540 size_t size,
541 int flags)
542{
543 vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
544 char *attr = (char *)name;
545 attrnames_t *namesp;
546 int xflags = 0;
547 int error;
548
549 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
550 if (!namesp)
551 return -EOPNOTSUPP;
552 attr += namesp->attr_namelen;
553 error = namesp->attr_capable(vp, NULL);
554 if (error)
555 return error;
556
557 /* Convert Linux syscall to XFS internal ATTR flags */
558 if (flags & XATTR_CREATE)
559 xflags |= ATTR_CREATE;
560 if (flags & XATTR_REPLACE)
561 xflags |= ATTR_REPLACE;
562 xflags |= namesp->attr_flag;
563 return namesp->attr_set(vp, attr, (void *)data, size, xflags);
564}
565
566STATIC ssize_t
567linvfs_getxattr(
568 struct dentry *dentry,
569 const char *name,
570 void *data,
571 size_t size)
572{
573 vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
574 char *attr = (char *)name;
575 attrnames_t *namesp;
576 int xflags = 0;
577 ssize_t error;
578
579 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
580 if (!namesp)
581 return -EOPNOTSUPP;
582 attr += namesp->attr_namelen;
583 error = namesp->attr_capable(vp, NULL);
584 if (error)
585 return error;
586
587 /* Convert Linux syscall to XFS internal ATTR flags */
588 if (!size) {
589 xflags |= ATTR_KERNOVAL;
590 data = NULL;
591 }
592 xflags |= namesp->attr_flag;
593 return namesp->attr_get(vp, attr, (void *)data, size, xflags);
594}
595
596STATIC ssize_t
597linvfs_listxattr(
598 struct dentry *dentry,
599 char *data,
600 size_t size)
601{
602 vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
603 int error, xflags = ATTR_KERNAMELS;
604 ssize_t result;
605
606 if (!size)
607 xflags |= ATTR_KERNOVAL;
608 xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
609
610 error = attr_generic_list(vp, data, size, xflags, &result);
611 if (error < 0)
612 return error;
613 return result;
614}
615
616STATIC int
617linvfs_removexattr(
618 struct dentry *dentry,
619 const char *name)
620{
621 vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
622 char *attr = (char *)name;
623 attrnames_t *namesp;
624 int xflags = 0;
625 int error;
626
627 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
628 if (!namesp)
629 return -EOPNOTSUPP;
630 attr += namesp->attr_namelen;
631 error = namesp->attr_capable(vp, NULL);
632 if (error)
633 return error;
634 xflags |= namesp->attr_flag;
635 return namesp->attr_remove(vp, attr, xflags);
636}
637
638
639struct inode_operations linvfs_file_inode_operations = {
640 .permission = linvfs_permission,
641 .truncate = linvfs_truncate,
642 .getattr = linvfs_getattr,
643 .setattr = linvfs_setattr,
644 .setxattr = linvfs_setxattr,
645 .getxattr = linvfs_getxattr,
646 .listxattr = linvfs_listxattr,
647 .removexattr = linvfs_removexattr,
648};
649
650struct inode_operations linvfs_dir_inode_operations = {
651 .create = linvfs_create,
652 .lookup = linvfs_lookup,
653 .link = linvfs_link,
654 .unlink = linvfs_unlink,
655 .symlink = linvfs_symlink,
656 .mkdir = linvfs_mkdir,
657 .rmdir = linvfs_rmdir,
658 .mknod = linvfs_mknod,
659 .rename = linvfs_rename,
660 .permission = linvfs_permission,
661 .getattr = linvfs_getattr,
662 .setattr = linvfs_setattr,
663 .setxattr = linvfs_setxattr,
664 .getxattr = linvfs_getxattr,
665 .listxattr = linvfs_listxattr,
666 .removexattr = linvfs_removexattr,
667};
668
669struct inode_operations linvfs_symlink_inode_operations = {
670 .readlink = generic_readlink,
671 .follow_link = linvfs_follow_link,
672 .put_link = linvfs_put_link,
673 .permission = linvfs_permission,
674 .getattr = linvfs_getattr,
675 .setattr = linvfs_setattr,
676 .setxattr = linvfs_setxattr,
677 .getxattr = linvfs_getxattr,
678 .listxattr = linvfs_listxattr,
679 .removexattr = linvfs_removexattr,
680};
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
new file mode 100644
index 000000000000..6a69a62c36b0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -0,0 +1,51 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_IOPS_H__
33#define __XFS_IOPS_H__
34
35extern struct inode_operations linvfs_file_inode_operations;
36extern struct inode_operations linvfs_dir_inode_operations;
37extern struct inode_operations linvfs_symlink_inode_operations;
38
39extern struct file_operations linvfs_file_operations;
40extern struct file_operations linvfs_invis_file_operations;
41extern struct file_operations linvfs_dir_operations;
42
43extern struct address_space_operations linvfs_aops;
44
45extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
46extern void linvfs_unwritten_done(struct buffer_head *, int);
47
48extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
49 int, unsigned int, void __user *);
50
51#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
new file mode 100644
index 000000000000..71bb41019a12
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -0,0 +1,374 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_LINUX__
33#define __XFS_LINUX__
34
35#include <linux/types.h>
36#include <linux/config.h>
37
38/*
39 * Some types are conditional depending on the target system.
40 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
41 * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
42 * as requiring XFS_BIG_BLKNOS to be set.
43 */
44#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
45# define XFS_BIG_BLKNOS 1
46# if BITS_PER_LONG == 64
47# define XFS_BIG_INUMS 1
48# else
49# define XFS_BIG_INUMS 0
50# endif
51#else
52# define XFS_BIG_BLKNOS 0
53# define XFS_BIG_INUMS 0
54#endif
55
56#include <xfs_types.h>
57#include <xfs_arch.h>
58
59#include <kmem.h>
60#include <mrlock.h>
61#include <spin.h>
62#include <sv.h>
63#include <mutex.h>
64#include <sema.h>
65#include <time.h>
66
67#include <support/qsort.h>
68#include <support/ktrace.h>
69#include <support/debug.h>
70#include <support/move.h>
71#include <support/uuid.h>
72
73#include <linux/mm.h>
74#include <linux/kernel.h>
75#include <linux/blkdev.h>
76#include <linux/slab.h>
77#include <linux/module.h>
78#include <linux/file.h>
79#include <linux/swap.h>
80#include <linux/errno.h>
81#include <linux/sched.h>
82#include <linux/bitops.h>
83#include <linux/major.h>
84#include <linux/pagemap.h>
85#include <linux/vfs.h>
86#include <linux/seq_file.h>
87#include <linux/init.h>
88#include <linux/list.h>
89#include <linux/proc_fs.h>
90#include <linux/version.h>
91#include <linux/sort.h>
92
93#include <asm/page.h>
94#include <asm/div64.h>
95#include <asm/param.h>
96#include <asm/uaccess.h>
97#include <asm/byteorder.h>
98#include <asm/unaligned.h>
99
100#include <xfs_behavior.h>
101#include <xfs_vfs.h>
102#include <xfs_cred.h>
103#include <xfs_vnode.h>
104#include <xfs_stats.h>
105#include <xfs_sysctl.h>
106#include <xfs_iops.h>
107#include <xfs_super.h>
108#include <xfs_globals.h>
109#include <xfs_fs_subr.h>
110#include <xfs_lrw.h>
111#include <xfs_buf.h>
112
113/*
114 * Feature macros (disable/enable)
115 */
116#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
117#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
118
119/*
120 * State flag for unwritten extent buffers.
121 *
122 * We need to be able to distinguish between these and delayed
123 * allocate buffers within XFS. The generic IO path code does
124 * not need to distinguish - we use the BH_Delay flag for both
125 * delalloc and these ondisk-uninitialised buffers.
126 */
127BUFFER_FNS(PrivateStart, unwritten);
128static inline void set_buffer_unwritten_io(struct buffer_head *bh)
129{
130 bh->b_end_io = linvfs_unwritten_done;
131}
132
133#define restricted_chown xfs_params.restrict_chown.val
134#define irix_sgid_inherit xfs_params.sgid_inherit.val
135#define irix_symlink_mode xfs_params.symlink_mode.val
136#define xfs_panic_mask xfs_params.panic_mask.val
137#define xfs_error_level xfs_params.error_level.val
138#define xfs_syncd_centisecs xfs_params.syncd_timer.val
139#define xfs_stats_clear xfs_params.stats_clear.val
140#define xfs_inherit_sync xfs_params.inherit_sync.val
141#define xfs_inherit_nodump xfs_params.inherit_nodump.val
142#define xfs_inherit_noatime xfs_params.inherit_noatim.val
143#define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val
144#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val
145#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
146#define xfs_rotorstep xfs_params.rotorstep.val
147
148#ifndef __smp_processor_id
149#define __smp_processor_id() smp_processor_id()
150#endif
151#define current_cpu() __smp_processor_id()
152#define current_pid() (current->pid)
153#define current_fsuid(cred) (current->fsuid)
154#define current_fsgid(cred) (current->fsgid)
155
156#define NBPP PAGE_SIZE
157#define DPPSHFT (PAGE_SHIFT - 9)
158#define NDPP (1 << (PAGE_SHIFT - 9))
159#define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT)
160#define dtopt(DD) ((DD) >> DPPSHFT)
161#define dpoff(DD) ((DD) & (NDPP-1))
162
163#define NBBY 8 /* number of bits per byte */
164#define NBPC PAGE_SIZE /* Number of bytes per click */
165#define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */
166
167/*
168 * Size of block device i/o is parameterized here.
169 * Currently the system supports page-sized i/o.
170 */
171#define BLKDEV_IOSHIFT BPCSHIFT
172#define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT)
173/* number of BB's per block device block */
174#define BLKDEV_BB BTOBB(BLKDEV_IOSIZE)
175
176/* bytes to clicks */
177#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
178#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT)
179#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
180#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT)
181#define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT)
182#define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT)
183
184/* off_t bytes to clicks */
185#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
186#define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT)
187
188/* clicks to off_t bytes */
189#define ctooff(x) ((xfs_off_t)(x)<<BPCSHIFT)
190
191/* clicks to bytes */
192#define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT)
193#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT)
194#define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT)
195#define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT)
196
197/* bytes to clicks */
198#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
199
200#ifndef CELL_CAPABLE
201#define FSC_NOTIFY_NAME_CHANGED(vp)
202#endif
203
204#ifndef ENOATTR
205#define ENOATTR ENODATA /* Attribute not found */
206#endif
207
208/* Note: EWRONGFS never visible outside the kernel */
209#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
210
211/*
212 * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't
213 * return codes out of its known range in errno.
214 * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't
215 * conflict with any code we use already or any code a driver may use)
216 * XXX Some options (currently we do #2):
217 * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated]
218 * 2/ 990 ["Unknown error 990"]
219 * 3/ EUCLEAN ["Structure needs cleaning"]
220 * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace]
221 */
222#define EFSCORRUPTED 990 /* Filesystem is corrupted */
223
224#define SYNCHRONIZE() barrier()
225#define __return_address __builtin_return_address(0)
226
227/*
228 * IRIX (BSD) quotactl makes use of separate commands for user/group,
229 * whereas on Linux the syscall encodes this information into the cmd
230 * field (see the QCMD macro in quota.h). These macros help keep the
231 * code portable - they are not visible from the syscall interface.
232 */
233#define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */
234#define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */
235
236/* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
237/* we may well need to fine-tune this if it ever becomes an issue. */
238#define DQUOT_MAX_HEURISTIC 1024 /* NR_DQUOTS */
239#define ndquot DQUOT_MAX_HEURISTIC
240
241/* IRIX uses the current size of the name cache to guess a good value */
242/* - this isn't the same but is a good enough starting point for now. */
243#define DQUOT_HASH_HEURISTIC files_stat.nr_files
244
245/* IRIX inodes maintain the project ID also, zero this field on Linux */
246#define DEFAULT_PROJID 0
247#define dfltprid DEFAULT_PROJID
248
249#define MAXPATHLEN 1024
250
251#define MIN(a,b) (min(a,b))
252#define MAX(a,b) (max(a,b))
253#define howmany(x, y) (((x)+((y)-1))/(y))
254#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
255
256#define xfs_stack_trace() dump_stack()
257
258#define xfs_itruncate_data(ip, off) \
259 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
260
261
262/* Move the kernel do_div definition off to one side */
263
264#if defined __i386__
265/* For ia32 we need to pull some tricks to get past various versions
266 * of the compiler which do not like us using do_div in the middle
267 * of large functions.
268 */
269static inline __u32 xfs_do_div(void *a, __u32 b, int n)
270{
271 __u32 mod;
272
273 switch (n) {
274 case 4:
275 mod = *(__u32 *)a % b;
276 *(__u32 *)a = *(__u32 *)a / b;
277 return mod;
278 case 8:
279 {
280 unsigned long __upper, __low, __high, __mod;
281 __u64 c = *(__u64 *)a;
282 __upper = __high = c >> 32;
283 __low = c;
284 if (__high) {
285 __upper = __high % (b);
286 __high = __high / (b);
287 }
288 asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
289 asm("":"=A" (c):"a" (__low),"d" (__high));
290 *(__u64 *)a = c;
291 return __mod;
292 }
293 }
294
295 /* NOTREACHED */
296 return 0;
297}
298
299/* Side effect free 64 bit mod operation */
300static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
301{
302 switch (n) {
303 case 4:
304 return *(__u32 *)a % b;
305 case 8:
306 {
307 unsigned long __upper, __low, __high, __mod;
308 __u64 c = *(__u64 *)a;
309 __upper = __high = c >> 32;
310 __low = c;
311 if (__high) {
312 __upper = __high % (b);
313 __high = __high / (b);
314 }
315 asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
316 asm("":"=A" (c):"a" (__low),"d" (__high));
317 return __mod;
318 }
319 }
320
321 /* NOTREACHED */
322 return 0;
323}
324#else
325static inline __u32 xfs_do_div(void *a, __u32 b, int n)
326{
327 __u32 mod;
328
329 switch (n) {
330 case 4:
331 mod = *(__u32 *)a % b;
332 *(__u32 *)a = *(__u32 *)a / b;
333 return mod;
334 case 8:
335 mod = do_div(*(__u64 *)a, b);
336 return mod;
337 }
338
339 /* NOTREACHED */
340 return 0;
341}
342
343/* Side effect free 64 bit mod operation */
344static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
345{
346 switch (n) {
347 case 4:
348 return *(__u32 *)a % b;
349 case 8:
350 {
351 __u64 c = *(__u64 *)a;
352 return do_div(c, b);
353 }
354 }
355
356 /* NOTREACHED */
357 return 0;
358}
359#endif
360
361#undef do_div
362#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a))
363#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a))
364
365static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
366{
367 x += y - 1;
368 do_div(x, y);
369 return(x * y);
370}
371
372#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
373
374#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
new file mode 100644
index 000000000000..ff145fd0d1a4
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -0,0 +1,1082 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32/*
33 * fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff)
34 *
35 */
36
37#include "xfs.h"
38
39#include "xfs_fs.h"
40#include "xfs_inum.h"
41#include "xfs_log.h"
42#include "xfs_trans.h"
43#include "xfs_sb.h"
44#include "xfs_ag.h"
45#include "xfs_dir.h"
46#include "xfs_dir2.h"
47#include "xfs_alloc.h"
48#include "xfs_dmapi.h"
49#include "xfs_quota.h"
50#include "xfs_mount.h"
51#include "xfs_alloc_btree.h"
52#include "xfs_bmap_btree.h"
53#include "xfs_ialloc_btree.h"
54#include "xfs_btree.h"
55#include "xfs_ialloc.h"
56#include "xfs_attr_sf.h"
57#include "xfs_dir_sf.h"
58#include "xfs_dir2_sf.h"
59#include "xfs_dinode.h"
60#include "xfs_inode.h"
61#include "xfs_bmap.h"
62#include "xfs_bit.h"
63#include "xfs_rtalloc.h"
64#include "xfs_error.h"
65#include "xfs_itable.h"
66#include "xfs_rw.h"
67#include "xfs_acl.h"
68#include "xfs_cap.h"
69#include "xfs_mac.h"
70#include "xfs_attr.h"
71#include "xfs_inode_item.h"
72#include "xfs_buf_item.h"
73#include "xfs_utils.h"
74#include "xfs_iomap.h"
75
76#include <linux/capability.h>
77#include <linux/writeback.h>
78
79
80#if defined(XFS_RW_TRACE)
81void
82xfs_rw_enter_trace(
83 int tag,
84 xfs_iocore_t *io,
85 void *data,
86 size_t segs,
87 loff_t offset,
88 int ioflags)
89{
90 xfs_inode_t *ip = XFS_IO_INODE(io);
91
92 if (ip->i_rwtrace == NULL)
93 return;
94 ktrace_enter(ip->i_rwtrace,
95 (void *)(unsigned long)tag,
96 (void *)ip,
97 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
98 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
99 (void *)data,
100 (void *)((unsigned long)segs),
101 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
102 (void *)((unsigned long)(offset & 0xffffffff)),
103 (void *)((unsigned long)ioflags),
104 (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
105 (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
106 (void *)NULL,
107 (void *)NULL,
108 (void *)NULL,
109 (void *)NULL,
110 (void *)NULL);
111}
112
113void
114xfs_inval_cached_trace(
115 xfs_iocore_t *io,
116 xfs_off_t offset,
117 xfs_off_t len,
118 xfs_off_t first,
119 xfs_off_t last)
120{
121 xfs_inode_t *ip = XFS_IO_INODE(io);
122
123 if (ip->i_rwtrace == NULL)
124 return;
125 ktrace_enter(ip->i_rwtrace,
126 (void *)(__psint_t)XFS_INVAL_CACHED,
127 (void *)ip,
128 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
129 (void *)((unsigned long)(offset & 0xffffffff)),
130 (void *)((unsigned long)((len >> 32) & 0xffffffff)),
131 (void *)((unsigned long)(len & 0xffffffff)),
132 (void *)((unsigned long)((first >> 32) & 0xffffffff)),
133 (void *)((unsigned long)(first & 0xffffffff)),
134 (void *)((unsigned long)((last >> 32) & 0xffffffff)),
135 (void *)((unsigned long)(last & 0xffffffff)),
136 (void *)NULL,
137 (void *)NULL,
138 (void *)NULL,
139 (void *)NULL,
140 (void *)NULL,
141 (void *)NULL);
142}
143#endif
144
145/*
146 * xfs_iozero
147 *
148 * xfs_iozero clears the specified range of buffer supplied,
149 * and marks all the affected blocks as valid and modified. If
150 * an affected block is not allocated, it will be allocated. If
151 * an affected block is not completely overwritten, and is not
152 * valid before the operation, it will be read from disk before
153 * being partially zeroed.
154 */
155STATIC int
156xfs_iozero(
157 struct inode *ip, /* inode */
158 loff_t pos, /* offset in file */
159 size_t count, /* size of data to zero */
160 loff_t end_size) /* max file size to set */
161{
162 unsigned bytes;
163 struct page *page;
164 struct address_space *mapping;
165 char *kaddr;
166 int status;
167
168 mapping = ip->i_mapping;
169 do {
170 unsigned long index, offset;
171
172 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
173 index = pos >> PAGE_CACHE_SHIFT;
174 bytes = PAGE_CACHE_SIZE - offset;
175 if (bytes > count)
176 bytes = count;
177
178 status = -ENOMEM;
179 page = grab_cache_page(mapping, index);
180 if (!page)
181 break;
182
183 kaddr = kmap(page);
184 status = mapping->a_ops->prepare_write(NULL, page, offset,
185 offset + bytes);
186 if (status) {
187 goto unlock;
188 }
189
190 memset((void *) (kaddr + offset), 0, bytes);
191 flush_dcache_page(page);
192 status = mapping->a_ops->commit_write(NULL, page, offset,
193 offset + bytes);
194 if (!status) {
195 pos += bytes;
196 count -= bytes;
197 if (pos > i_size_read(ip))
198 i_size_write(ip, pos < end_size ? pos : end_size);
199 }
200
201unlock:
202 kunmap(page);
203 unlock_page(page);
204 page_cache_release(page);
205 if (status)
206 break;
207 } while (count);
208
209 return (-status);
210}
211
212/*
213 * xfs_inval_cached_pages
214 *
215 * This routine is responsible for keeping direct I/O and buffered I/O
216 * somewhat coherent. From here we make sure that we're at least
217 * temporarily holding the inode I/O lock exclusively and then call
218 * the page cache to flush and invalidate any cached pages. If there
219 * are no cached pages this routine will be very quick.
220 */
221void
222xfs_inval_cached_pages(
223 vnode_t *vp,
224 xfs_iocore_t *io,
225 xfs_off_t offset,
226 int write,
227 int relock)
228{
229 if (VN_CACHED(vp)) {
230 xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
231 VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
232 }
233
234}
235
236ssize_t /* bytes read, or (-) error */
237xfs_read(
238 bhv_desc_t *bdp,
239 struct kiocb *iocb,
240 const struct iovec *iovp,
241 unsigned int segs,
242 loff_t *offset,
243 int ioflags,
244 cred_t *credp)
245{
246 struct file *file = iocb->ki_filp;
247 struct inode *inode = file->f_mapping->host;
248 size_t size = 0;
249 ssize_t ret;
250 xfs_fsize_t n;
251 xfs_inode_t *ip;
252 xfs_mount_t *mp;
253 vnode_t *vp;
254 unsigned long seg;
255
256 ip = XFS_BHVTOI(bdp);
257 vp = BHV_TO_VNODE(bdp);
258 mp = ip->i_mount;
259
260 XFS_STATS_INC(xs_read_calls);
261
262 /* START copy & waste from filemap.c */
263 for (seg = 0; seg < segs; seg++) {
264 const struct iovec *iv = &iovp[seg];
265
266 /*
267 * If any segment has a negative length, or the cumulative
268 * length ever wraps negative then return -EINVAL.
269 */
270 size += iv->iov_len;
271 if (unlikely((ssize_t)(size|iv->iov_len) < 0))
272 return XFS_ERROR(-EINVAL);
273 }
274 /* END copy & waste from filemap.c */
275
276 if (unlikely(ioflags & IO_ISDIRECT)) {
277 xfs_buftarg_t *target =
278 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
279 mp->m_rtdev_targp : mp->m_ddev_targp;
280 if ((*offset & target->pbr_smask) ||
281 (size & target->pbr_smask)) {
282 if (*offset == ip->i_d.di_size) {
283 return (0);
284 }
285 return -XFS_ERROR(EINVAL);
286 }
287 }
288
289 n = XFS_MAXIOFFSET(mp) - *offset;
290 if ((n <= 0) || (size == 0))
291 return 0;
292
293 if (n < size)
294 size = n;
295
296 if (XFS_FORCED_SHUTDOWN(mp)) {
297 return -EIO;
298 }
299
300 if (unlikely(ioflags & IO_ISDIRECT))
301 down(&inode->i_sem);
302 xfs_ilock(ip, XFS_IOLOCK_SHARED);
303
304 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
305 !(ioflags & IO_INVIS)) {
306 vrwlock_t locktype = VRWLOCK_READ;
307
308 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
309 BHV_TO_VNODE(bdp), *offset, size,
310 FILP_DELAY_FLAG(file), &locktype);
311 if (ret) {
312 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
313 goto unlock_isem;
314 }
315 }
316
317 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
318 (void *)iovp, segs, *offset, ioflags);
319 ret = __generic_file_aio_read(iocb, iovp, segs, offset);
320 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
321 ret = wait_on_sync_kiocb(iocb);
322 if (ret > 0)
323 XFS_STATS_ADD(xs_read_bytes, ret);
324
325 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
326
327 if (likely(!(ioflags & IO_INVIS)))
328 xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
329
330unlock_isem:
331 if (unlikely(ioflags & IO_ISDIRECT))
332 up(&inode->i_sem);
333 return ret;
334}
335
336ssize_t
337xfs_sendfile(
338 bhv_desc_t *bdp,
339 struct file *filp,
340 loff_t *offset,
341 int ioflags,
342 size_t count,
343 read_actor_t actor,
344 void *target,
345 cred_t *credp)
346{
347 ssize_t ret;
348 xfs_fsize_t n;
349 xfs_inode_t *ip;
350 xfs_mount_t *mp;
351 vnode_t *vp;
352
353 ip = XFS_BHVTOI(bdp);
354 vp = BHV_TO_VNODE(bdp);
355 mp = ip->i_mount;
356
357 XFS_STATS_INC(xs_read_calls);
358
359 n = XFS_MAXIOFFSET(mp) - *offset;
360 if ((n <= 0) || (count == 0))
361 return 0;
362
363 if (n < count)
364 count = n;
365
366 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
367 return -EIO;
368
369 xfs_ilock(ip, XFS_IOLOCK_SHARED);
370
371 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
372 (!(ioflags & IO_INVIS))) {
373 vrwlock_t locktype = VRWLOCK_READ;
374 int error;
375
376 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count,
377 FILP_DELAY_FLAG(filp), &locktype);
378 if (error) {
379 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
380 return -error;
381 }
382 }
383 xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
384 (void *)(unsigned long)target, count, *offset, ioflags);
385 ret = generic_file_sendfile(filp, offset, count, actor, target);
386
387 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
388
389 if (ret > 0)
390 XFS_STATS_ADD(xs_read_bytes, ret);
391
392 if (likely(!(ioflags & IO_INVIS)))
393 xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
394
395 return ret;
396}
397
398/*
399 * This routine is called to handle zeroing any space in the last
400 * block of the file that is beyond the EOF. We do this since the
401 * size is being increased without writing anything to that block
402 * and we don't want anyone to read the garbage on the disk.
403 */
404STATIC int /* error (positive) */
405xfs_zero_last_block(
406 struct inode *ip,
407 xfs_iocore_t *io,
408 xfs_off_t offset,
409 xfs_fsize_t isize,
410 xfs_fsize_t end_size)
411{
412 xfs_fileoff_t last_fsb;
413 xfs_mount_t *mp;
414 int nimaps;
415 int zero_offset;
416 int zero_len;
417 int isize_fsb_offset;
418 int error = 0;
419 xfs_bmbt_irec_t imap;
420 loff_t loff;
421 size_t lsize;
422
423 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
424 ASSERT(offset > isize);
425
426 mp = io->io_mount;
427
428 isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
429 if (isize_fsb_offset == 0) {
430 /*
431 * There are no extra bytes in the last block on disk to
432 * zero, so return.
433 */
434 return 0;
435 }
436
437 last_fsb = XFS_B_TO_FSBT(mp, isize);
438 nimaps = 1;
439 error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
440 &nimaps, NULL);
441 if (error) {
442 return error;
443 }
444 ASSERT(nimaps > 0);
445 /*
446 * If the block underlying isize is just a hole, then there
447 * is nothing to zero.
448 */
449 if (imap.br_startblock == HOLESTARTBLOCK) {
450 return 0;
451 }
452 /*
453 * Zero the part of the last block beyond the EOF, and write it
454 * out sync. We need to drop the ilock while we do this so we
455 * don't deadlock when the buffer cache calls back to us.
456 */
457 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
458 loff = XFS_FSB_TO_B(mp, last_fsb);
459 lsize = XFS_FSB_TO_B(mp, 1);
460
461 zero_offset = isize_fsb_offset;
462 zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
463
464 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
465
466 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
467 ASSERT(error >= 0);
468 return error;
469}
470
471/*
472 * Zero any on disk space between the current EOF and the new,
473 * larger EOF. This handles the normal case of zeroing the remainder
474 * of the last block in the file and the unusual case of zeroing blocks
475 * out beyond the size of the file. This second case only happens
476 * with fixed size extents and when the system crashes before the inode
477 * size was updated but after blocks were allocated. If fill is set,
478 * then any holes in the range are filled and zeroed. If not, the holes
479 * are left alone as holes.
480 */
481
482int /* error (positive) */
483xfs_zero_eof(
484 vnode_t *vp,
485 xfs_iocore_t *io,
486 xfs_off_t offset, /* starting I/O offset */
487 xfs_fsize_t isize, /* current inode size */
488 xfs_fsize_t end_size) /* terminal inode size */
489{
490 struct inode *ip = LINVFS_GET_IP(vp);
491 xfs_fileoff_t start_zero_fsb;
492 xfs_fileoff_t end_zero_fsb;
493 xfs_fileoff_t prev_zero_fsb;
494 xfs_fileoff_t zero_count_fsb;
495 xfs_fileoff_t last_fsb;
496 xfs_extlen_t buf_len_fsb;
497 xfs_extlen_t prev_zero_count;
498 xfs_mount_t *mp;
499 int nimaps;
500 int error = 0;
501 xfs_bmbt_irec_t imap;
502 loff_t loff;
503 size_t lsize;
504
505 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
506 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
507
508 mp = io->io_mount;
509
510 /*
511 * First handle zeroing the block on which isize resides.
512 * We only zero a part of that block so it is handled specially.
513 */
514 error = xfs_zero_last_block(ip, io, offset, isize, end_size);
515 if (error) {
516 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
517 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
518 return error;
519 }
520
521 /*
522 * Calculate the range between the new size and the old
523 * where blocks needing to be zeroed may exist. To get the
524 * block where the last byte in the file currently resides,
525 * we need to subtract one from the size and truncate back
526 * to a block boundary. We subtract 1 in case the size is
527 * exactly on a block boundary.
528 */
529 last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
530 start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
531 end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
532 ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
533 if (last_fsb == end_zero_fsb) {
534 /*
535 * The size was only incremented on its last block.
536 * We took care of that above, so just return.
537 */
538 return 0;
539 }
540
541 ASSERT(start_zero_fsb <= end_zero_fsb);
542 prev_zero_fsb = NULLFILEOFF;
543 prev_zero_count = 0;
544 while (start_zero_fsb <= end_zero_fsb) {
545 nimaps = 1;
546 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
547 error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
548 0, NULL, 0, &imap, &nimaps, NULL);
549 if (error) {
550 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
551 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
552 return error;
553 }
554 ASSERT(nimaps > 0);
555
556 if (imap.br_state == XFS_EXT_UNWRITTEN ||
557 imap.br_startblock == HOLESTARTBLOCK) {
558 /*
559 * This loop handles initializing pages that were
560 * partially initialized by the code below this
561 * loop. It basically zeroes the part of the page
562 * that sits on a hole and sets the page as P_HOLE
563 * and calls remapf if it is a mapped file.
564 */
565 prev_zero_fsb = NULLFILEOFF;
566 prev_zero_count = 0;
567 start_zero_fsb = imap.br_startoff +
568 imap.br_blockcount;
569 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
570 continue;
571 }
572
573 /*
574 * There are blocks in the range requested.
575 * Zero them a single write at a time. We actually
576 * don't zero the entire range returned if it is
577 * too big and simply loop around to get the rest.
578 * That is not the most efficient thing to do, but it
579 * is simple and this path should not be exercised often.
580 */
581 buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount,
582 mp->m_writeio_blocks << 8);
583 /*
584 * Drop the inode lock while we're doing the I/O.
585 * We'll still have the iolock to protect us.
586 */
587 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
588
589 loff = XFS_FSB_TO_B(mp, start_zero_fsb);
590 lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
591
592 error = xfs_iozero(ip, loff, lsize, end_size);
593
594 if (error) {
595 goto out_lock;
596 }
597
598 prev_zero_fsb = start_zero_fsb;
599 prev_zero_count = buf_len_fsb;
600 start_zero_fsb = imap.br_startoff + buf_len_fsb;
601 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
602
603 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
604 }
605
606 return 0;
607
608out_lock:
609
610 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
611 ASSERT(error >= 0);
612 return error;
613}
614
615ssize_t /* bytes written, or (-) error */
616xfs_write(
617 bhv_desc_t *bdp,
618 struct kiocb *iocb,
619 const struct iovec *iovp,
620 unsigned int nsegs,
621 loff_t *offset,
622 int ioflags,
623 cred_t *credp)
624{
625 struct file *file = iocb->ki_filp;
626 struct address_space *mapping = file->f_mapping;
627 struct inode *inode = mapping->host;
628 unsigned long segs = nsegs;
629 xfs_inode_t *xip;
630 xfs_mount_t *mp;
631 ssize_t ret = 0, error = 0;
632 xfs_fsize_t isize, new_size;
633 xfs_iocore_t *io;
634 vnode_t *vp;
635 unsigned long seg;
636 int iolock;
637 int eventsent = 0;
638 vrwlock_t locktype;
639 size_t ocount = 0, count;
640 loff_t pos;
641 int need_isem = 1, need_flush = 0;
642
643 XFS_STATS_INC(xs_write_calls);
644
645 vp = BHV_TO_VNODE(bdp);
646 xip = XFS_BHVTOI(bdp);
647
648 for (seg = 0; seg < segs; seg++) {
649 const struct iovec *iv = &iovp[seg];
650
651 /*
652 * If any segment has a negative length, or the cumulative
653 * length ever wraps negative then return -EINVAL.
654 */
655 ocount += iv->iov_len;
656 if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
657 return -EINVAL;
658 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
659 continue;
660 if (seg == 0)
661 return -EFAULT;
662 segs = seg;
663 ocount -= iv->iov_len; /* This segment is no good */
664 break;
665 }
666
667 count = ocount;
668 pos = *offset;
669
670 if (count == 0)
671 return 0;
672
673 io = &xip->i_iocore;
674 mp = io->io_mount;
675
676 if (XFS_FORCED_SHUTDOWN(mp))
677 return -EIO;
678
679 fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE);
680
681 if (ioflags & IO_ISDIRECT) {
682 xfs_buftarg_t *target =
683 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
684 mp->m_rtdev_targp : mp->m_ddev_targp;
685
686 if ((pos & target->pbr_smask) || (count & target->pbr_smask))
687 return XFS_ERROR(-EINVAL);
688
689 if (!VN_CACHED(vp) && pos < i_size_read(inode))
690 need_isem = 0;
691
692 if (VN_CACHED(vp))
693 need_flush = 1;
694 }
695
696relock:
697 if (need_isem) {
698 iolock = XFS_IOLOCK_EXCL;
699 locktype = VRWLOCK_WRITE;
700
701 down(&inode->i_sem);
702 } else {
703 iolock = XFS_IOLOCK_SHARED;
704 locktype = VRWLOCK_WRITE_DIRECT;
705 }
706
707 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
708
709 isize = i_size_read(inode);
710
711 if (file->f_flags & O_APPEND)
712 *offset = isize;
713
714start:
715 error = -generic_write_checks(file, &pos, &count,
716 S_ISBLK(inode->i_mode));
717 if (error) {
718 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
719 goto out_unlock_isem;
720 }
721
722 new_size = pos + count;
723 if (new_size > isize)
724 io->io_new_size = new_size;
725
726 if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
727 !(ioflags & IO_INVIS) && !eventsent)) {
728 loff_t savedsize = pos;
729 int dmflags = FILP_DELAY_FLAG(file);
730
731 if (need_isem)
732 dmflags |= DM_FLAGS_ISEM;
733
734 xfs_iunlock(xip, XFS_ILOCK_EXCL);
735 error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
736 pos, count,
737 dmflags, &locktype);
738 if (error) {
739 xfs_iunlock(xip, iolock);
740 goto out_unlock_isem;
741 }
742 xfs_ilock(xip, XFS_ILOCK_EXCL);
743 eventsent = 1;
744
745 /*
746 * The iolock was dropped and reaquired in XFS_SEND_DATA
747 * so we have to recheck the size when appending.
748 * We will only "goto start;" once, since having sent the
749 * event prevents another call to XFS_SEND_DATA, which is
750 * what allows the size to change in the first place.
751 */
752 if ((file->f_flags & O_APPEND) && savedsize != isize) {
753 pos = isize = xip->i_d.di_size;
754 goto start;
755 }
756 }
757
758 /*
759 * On Linux, generic_file_write updates the times even if
760 * no data is copied in so long as the write had a size.
761 *
762 * We must update xfs' times since revalidate will overcopy xfs.
763 */
764 if (!(ioflags & IO_INVIS)) {
765 xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
766 inode_update_time(inode, 1);
767 }
768
769 /*
770 * If the offset is beyond the size of the file, we have a couple
771 * of things to do. First, if there is already space allocated
772 * we need to either create holes or zero the disk or ...
773 *
774 * If there is a page where the previous size lands, we need
775 * to zero it out up to the new size.
776 */
777
778 if (pos > isize) {
779 error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
780 isize, pos + count);
781 if (error) {
782 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
783 goto out_unlock_isem;
784 }
785 }
786 xfs_iunlock(xip, XFS_ILOCK_EXCL);
787
788 /*
789 * If we're writing the file then make sure to clear the
790 * setuid and setgid bits if the process is not being run
791 * by root. This keeps people from modifying setuid and
792 * setgid binaries.
793 */
794
795 if (((xip->i_d.di_mode & S_ISUID) ||
796 ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) ==
797 (S_ISGID | S_IXGRP))) &&
798 !capable(CAP_FSETID)) {
799 error = xfs_write_clear_setuid(xip);
800 if (likely(!error))
801 error = -remove_suid(file->f_dentry);
802 if (unlikely(error)) {
803 xfs_iunlock(xip, iolock);
804 goto out_unlock_isem;
805 }
806 }
807
808retry:
809 /* We can write back this queue in page reclaim */
810 current->backing_dev_info = mapping->backing_dev_info;
811
812 if ((ioflags & IO_ISDIRECT)) {
813 if (need_flush) {
814 xfs_inval_cached_trace(io, pos, -1,
815 ctooff(offtoct(pos)), -1);
816 VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)),
817 -1, FI_REMAPF_LOCKED);
818 }
819
820 if (need_isem) {
821 /* demote the lock now the cached pages are gone */
822 XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
823 up(&inode->i_sem);
824
825 iolock = XFS_IOLOCK_SHARED;
826 locktype = VRWLOCK_WRITE_DIRECT;
827 need_isem = 0;
828 }
829
830 xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
831 *offset, ioflags);
832 ret = generic_file_direct_write(iocb, iovp,
833 &segs, pos, offset, count, ocount);
834
835 /*
836 * direct-io write to a hole: fall through to buffered I/O
837 * for completing the rest of the request.
838 */
839 if (ret >= 0 && ret != count) {
840 XFS_STATS_ADD(xs_write_bytes, ret);
841
842 pos += ret;
843 count -= ret;
844
845 need_isem = 1;
846 ioflags &= ~IO_ISDIRECT;
847 xfs_iunlock(xip, iolock);
848 goto relock;
849 }
850 } else {
851 xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
852 *offset, ioflags);
853 ret = generic_file_buffered_write(iocb, iovp, segs,
854 pos, offset, count, ret);
855 }
856
857 current->backing_dev_info = NULL;
858
859 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
860 ret = wait_on_sync_kiocb(iocb);
861
862 if ((ret == -ENOSPC) &&
863 DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
864 !(ioflags & IO_INVIS)) {
865
866 xfs_rwunlock(bdp, locktype);
867 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
868 DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
869 0, 0, 0); /* Delay flag intentionally unused */
870 if (error)
871 goto out_unlock_isem;
872 xfs_rwlock(bdp, locktype);
873 pos = xip->i_d.di_size;
874 ret = 0;
875 goto retry;
876 }
877
878 if (*offset > xip->i_d.di_size) {
879 xfs_ilock(xip, XFS_ILOCK_EXCL);
880 if (*offset > xip->i_d.di_size) {
881 xip->i_d.di_size = *offset;
882 i_size_write(inode, *offset);
883 xip->i_update_core = 1;
884 xip->i_update_size = 1;
885 }
886 xfs_iunlock(xip, XFS_ILOCK_EXCL);
887 }
888
889 error = -ret;
890 if (ret <= 0)
891 goto out_unlock_internal;
892
893 XFS_STATS_ADD(xs_write_bytes, ret);
894
895 /* Handle various SYNC-type writes */
896 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
897 /*
898 * If we're treating this as O_DSYNC and we have not updated the
899 * size, force the log.
900 */
901 if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
902 !(xip->i_update_size)) {
903 xfs_inode_log_item_t *iip = xip->i_itemp;
904
905 /*
906 * If an allocation transaction occurred
907 * without extending the size, then we have to force
908 * the log up the proper point to ensure that the
909 * allocation is permanent. We can't count on
910 * the fact that buffered writes lock out direct I/O
911 * writes - the direct I/O write could have extended
912 * the size nontransactionally, then finished before
913 * we started. xfs_write_file will think that the file
914 * didn't grow but the update isn't safe unless the
915 * size change is logged.
916 *
917 * Force the log if we've committed a transaction
918 * against the inode or if someone else has and
919 * the commit record hasn't gone to disk (e.g.
920 * the inode is pinned). This guarantees that
921 * all changes affecting the inode are permanent
922 * when we return.
923 */
924 if (iip && iip->ili_last_lsn) {
925 xfs_log_force(mp, iip->ili_last_lsn,
926 XFS_LOG_FORCE | XFS_LOG_SYNC);
927 } else if (xfs_ipincount(xip) > 0) {
928 xfs_log_force(mp, (xfs_lsn_t)0,
929 XFS_LOG_FORCE | XFS_LOG_SYNC);
930 }
931
932 } else {
933 xfs_trans_t *tp;
934
935 /*
936 * O_SYNC or O_DSYNC _with_ a size update are handled
937 * the same way.
938 *
939 * If the write was synchronous then we need to make
940 * sure that the inode modification time is permanent.
941 * We'll have updated the timestamp above, so here
942 * we use a synchronous transaction to log the inode.
943 * It's not fast, but it's necessary.
944 *
945 * If this a dsync write and the size got changed
946 * non-transactionally, then we need to ensure that
947 * the size change gets logged in a synchronous
948 * transaction.
949 */
950
951 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
952 if ((error = xfs_trans_reserve(tp, 0,
953 XFS_SWRITE_LOG_RES(mp),
954 0, 0, 0))) {
955 /* Transaction reserve failed */
956 xfs_trans_cancel(tp, 0);
957 } else {
958 /* Transaction reserve successful */
959 xfs_ilock(xip, XFS_ILOCK_EXCL);
960 xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL);
961 xfs_trans_ihold(tp, xip);
962 xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE);
963 xfs_trans_set_sync(tp);
964 error = xfs_trans_commit(tp, 0, NULL);
965 xfs_iunlock(xip, XFS_ILOCK_EXCL);
966 }
967 if (error)
968 goto out_unlock_internal;
969 }
970
971 xfs_rwunlock(bdp, locktype);
972 if (need_isem)
973 up(&inode->i_sem);
974
975 error = sync_page_range(inode, mapping, pos, ret);
976 if (!error)
977 error = ret;
978 return error;
979 }
980
981 out_unlock_internal:
982 xfs_rwunlock(bdp, locktype);
983 out_unlock_isem:
984 if (need_isem)
985 up(&inode->i_sem);
986 return -error;
987}
988
989/*
990 * All xfs metadata buffers except log state machine buffers
991 * get this attached as their b_bdstrat callback function.
992 * This is so that we can catch a buffer
993 * after prematurely unpinning it to forcibly shutdown the filesystem.
994 */
995int
996xfs_bdstrat_cb(struct xfs_buf *bp)
997{
998 xfs_mount_t *mp;
999
1000 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
1001 if (!XFS_FORCED_SHUTDOWN(mp)) {
1002 pagebuf_iorequest(bp);
1003 return 0;
1004 } else {
1005 xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
1006 /*
1007 * Metadata write that didn't get logged but
1008 * written delayed anyway. These aren't associated
1009 * with a transaction, and can be ignored.
1010 */
1011 if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
1012 (XFS_BUF_ISREAD(bp)) == 0)
1013 return (xfs_bioerror_relse(bp));
1014 else
1015 return (xfs_bioerror(bp));
1016 }
1017}
1018
1019
1020int
1021xfs_bmap(bhv_desc_t *bdp,
1022 xfs_off_t offset,
1023 ssize_t count,
1024 int flags,
1025 xfs_iomap_t *iomapp,
1026 int *niomaps)
1027{
1028 xfs_inode_t *ip = XFS_BHVTOI(bdp);
1029 xfs_iocore_t *io = &ip->i_iocore;
1030
1031 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
1032 ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
1033 ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
1034
1035 return xfs_iomap(io, offset, count, flags, iomapp, niomaps);
1036}
1037
1038/*
1039 * Wrapper around bdstrat so that we can stop data
1040 * from going to disk in case we are shutting down the filesystem.
1041 * Typically user data goes thru this path; one of the exceptions
1042 * is the superblock.
1043 */
1044int
1045xfsbdstrat(
1046 struct xfs_mount *mp,
1047 struct xfs_buf *bp)
1048{
1049 ASSERT(mp);
1050 if (!XFS_FORCED_SHUTDOWN(mp)) {
1051 /* Grio redirection would go here
1052 * if (XFS_BUF_IS_GRIO(bp)) {
1053 */
1054
1055 pagebuf_iorequest(bp);
1056 return 0;
1057 }
1058
1059 xfs_buftrace("XFSBDSTRAT IOERROR", bp);
1060 return (xfs_bioerror_relse(bp));
1061}
1062
1063/*
1064 * If the underlying (data/log/rt) device is readonly, there are some
1065 * operations that cannot proceed.
1066 */
1067int
1068xfs_dev_is_read_only(
1069 xfs_mount_t *mp,
1070 char *message)
1071{
1072 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
1073 xfs_readonly_buftarg(mp->m_logdev_targp) ||
1074 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
1075 cmn_err(CE_NOTE,
1076 "XFS: %s required on read-only device.", message);
1077 cmn_err(CE_NOTE,
1078 "XFS: write access unavailable, cannot proceed.");
1079 return EROFS;
1080 }
1081 return 0;
1082}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
new file mode 100644
index 000000000000..d723e35254a0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -0,0 +1,116 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_LRW_H__
33#define __XFS_LRW_H__
34
35struct vnode;
36struct bhv_desc;
37struct xfs_mount;
38struct xfs_iocore;
39struct xfs_inode;
40struct xfs_bmbt_irec;
41struct xfs_buf;
42struct xfs_iomap;
43
44#if defined(XFS_RW_TRACE)
45/*
46 * Defines for the trace mechanisms in xfs_lrw.c.
47 */
48#define XFS_RW_KTRACE_SIZE 128
49
50#define XFS_READ_ENTER 1
51#define XFS_WRITE_ENTER 2
52#define XFS_IOMAP_READ_ENTER 3
53#define XFS_IOMAP_WRITE_ENTER 4
54#define XFS_IOMAP_READ_MAP 5
55#define XFS_IOMAP_WRITE_MAP 6
56#define XFS_IOMAP_WRITE_NOSPACE 7
57#define XFS_ITRUNC_START 8
58#define XFS_ITRUNC_FINISH1 9
59#define XFS_ITRUNC_FINISH2 10
60#define XFS_CTRUNC1 11
61#define XFS_CTRUNC2 12
62#define XFS_CTRUNC3 13
63#define XFS_CTRUNC4 14
64#define XFS_CTRUNC5 15
65#define XFS_CTRUNC6 16
66#define XFS_BUNMAPI 17
67#define XFS_INVAL_CACHED 18
68#define XFS_DIORD_ENTER 19
69#define XFS_DIOWR_ENTER 20
70#define XFS_SENDFILE_ENTER 21
71#define XFS_WRITEPAGE_ENTER 22
72#define XFS_RELEASEPAGE_ENTER 23
73#define XFS_IOMAP_ALLOC_ENTER 24
74#define XFS_IOMAP_ALLOC_MAP 25
75#define XFS_IOMAP_UNWRITTEN 26
76extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
77 void *, size_t, loff_t, int);
78extern void xfs_inval_cached_trace(struct xfs_iocore *,
79 xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
80#else
81#define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags)
82#define xfs_inval_cached_trace(io, offset, len, first, last)
83#endif
84
85/*
86 * Maximum count of bmaps used by read and write paths.
87 */
88#define XFS_MAX_RW_NBMAPS 4
89
90extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int,
91 struct xfs_iomap *, int *);
92extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
93extern int xfs_bdstrat_cb(struct xfs_buf *);
94
95extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
96 xfs_fsize_t, xfs_fsize_t);
97extern void xfs_inval_cached_pages(struct vnode *, struct xfs_iocore *,
98 xfs_off_t, int, int);
99extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
100 const struct iovec *, unsigned int,
101 loff_t *, int, struct cred *);
102extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
103 const struct iovec *, unsigned int,
104 loff_t *, int, struct cred *);
105extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
106 loff_t *, int, size_t, read_actor_t,
107 void *, struct cred *);
108
109extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
110
111#define XFS_FSB_TO_DB_IO(io,fsb) \
112 (((io)->io_flags & XFS_IOCORE_RT) ? \
113 XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \
114 XFS_FSB_TO_DADDR((io)->io_mount, (fsb)))
115
116#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
new file mode 100644
index 000000000000..aaf5ddba47f3
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -0,0 +1,132 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34#include <linux/proc_fs.h>
35
36DEFINE_PER_CPU(struct xfsstats, xfsstats);
37
38STATIC int
39xfs_read_xfsstats(
40 char *buffer,
41 char **start,
42 off_t offset,
43 int count,
44 int *eof,
45 void *data)
46{
47 int c, i, j, len, val;
48 __uint64_t xs_xstrat_bytes = 0;
49 __uint64_t xs_write_bytes = 0;
50 __uint64_t xs_read_bytes = 0;
51
52 static struct xstats_entry {
53 char *desc;
54 int endpoint;
55 } xstats[] = {
56 { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC },
57 { "abt", XFSSTAT_END_ALLOC_BTREE },
58 { "blk_map", XFSSTAT_END_BLOCK_MAPPING },
59 { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE },
60 { "dir", XFSSTAT_END_DIRECTORY_OPS },
61 { "trans", XFSSTAT_END_TRANSACTIONS },
62 { "ig", XFSSTAT_END_INODE_OPS },
63 { "log", XFSSTAT_END_LOG_OPS },
64 { "push_ail", XFSSTAT_END_TAIL_PUSHING },
65 { "xstrat", XFSSTAT_END_WRITE_CONVERT },
66 { "rw", XFSSTAT_END_READ_WRITE_OPS },
67 { "attr", XFSSTAT_END_ATTRIBUTE_OPS },
68 { "icluster", XFSSTAT_END_INODE_CLUSTER },
69 { "vnodes", XFSSTAT_END_VNODE_OPS },
70 { "buf", XFSSTAT_END_BUF },
71 };
72
73 /* Loop over all stats groups */
74 for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) {
75 len += sprintf(buffer + len, xstats[i].desc);
76 /* inner loop does each group */
77 while (j < xstats[i].endpoint) {
78 val = 0;
79 /* sum over all cpus */
80 for (c = 0; c < NR_CPUS; c++) {
81 if (!cpu_possible(c)) continue;
82 val += *(((__u32*)&per_cpu(xfsstats, c) + j));
83 }
84 len += sprintf(buffer + len, " %u", val);
85 j++;
86 }
87 buffer[len++] = '\n';
88 }
89 /* extra precision counters */
90 for (i = 0; i < NR_CPUS; i++) {
91 if (!cpu_possible(i)) continue;
92 xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
93 xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
94 xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
95 }
96
97 len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n",
98 xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
99 len += sprintf(buffer + len, "debug %u\n",
100#if defined(DEBUG)
101 1);
102#else
103 0);
104#endif
105
106 if (offset >= len) {
107 *start = buffer;
108 *eof = 1;
109 return 0;
110 }
111 *start = buffer + offset;
112 if ((len -= offset) > count)
113 return count;
114 *eof = 1;
115
116 return len;
117}
118
119void
120xfs_init_procfs(void)
121{
122 if (!proc_mkdir("fs/xfs", NULL))
123 return;
124 create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
125}
126
127void
128xfs_cleanup_procfs(void)
129{
130 remove_proc_entry("fs/xfs/stat", NULL);
131 remove_proc_entry("fs/xfs", NULL);
132}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
new file mode 100644
index 000000000000..3f756a6c3eb0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -0,0 +1,166 @@
1/*
2 * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_STATS_H__
33#define __XFS_STATS_H__
34
35
36#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
37
38#include <linux/percpu.h>
39
40/*
41 * XFS global statistics
42 */
43struct xfsstats {
44# define XFSSTAT_END_EXTENT_ALLOC 4
45 __uint32_t xs_allocx;
46 __uint32_t xs_allocb;
47 __uint32_t xs_freex;
48 __uint32_t xs_freeb;
49# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4)
50 __uint32_t xs_abt_lookup;
51 __uint32_t xs_abt_compare;
52 __uint32_t xs_abt_insrec;
53 __uint32_t xs_abt_delrec;
54# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7)
55 __uint32_t xs_blk_mapr;
56 __uint32_t xs_blk_mapw;
57 __uint32_t xs_blk_unmap;
58 __uint32_t xs_add_exlist;
59 __uint32_t xs_del_exlist;
60 __uint32_t xs_look_exlist;
61 __uint32_t xs_cmp_exlist;
62# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4)
63 __uint32_t xs_bmbt_lookup;
64 __uint32_t xs_bmbt_compare;
65 __uint32_t xs_bmbt_insrec;
66 __uint32_t xs_bmbt_delrec;
67# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4)
68 __uint32_t xs_dir_lookup;
69 __uint32_t xs_dir_create;
70 __uint32_t xs_dir_remove;
71 __uint32_t xs_dir_getdents;
72# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3)
73 __uint32_t xs_trans_sync;
74 __uint32_t xs_trans_async;
75 __uint32_t xs_trans_empty;
76# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7)
77 __uint32_t xs_ig_attempts;
78 __uint32_t xs_ig_found;
79 __uint32_t xs_ig_frecycle;
80 __uint32_t xs_ig_missed;
81 __uint32_t xs_ig_dup;
82 __uint32_t xs_ig_reclaims;
83 __uint32_t xs_ig_attrchg;
84# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5)
85 __uint32_t xs_log_writes;
86 __uint32_t xs_log_blocks;
87 __uint32_t xs_log_noiclogs;
88 __uint32_t xs_log_force;
89 __uint32_t xs_log_force_sleep;
90# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10)
91 __uint32_t xs_try_logspace;
92 __uint32_t xs_sleep_logspace;
93 __uint32_t xs_push_ail;
94 __uint32_t xs_push_ail_success;
95 __uint32_t xs_push_ail_pushbuf;
96 __uint32_t xs_push_ail_pinned;
97 __uint32_t xs_push_ail_locked;
98 __uint32_t xs_push_ail_flushing;
99 __uint32_t xs_push_ail_restarts;
100 __uint32_t xs_push_ail_flush;
101# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2)
102 __uint32_t xs_xstrat_quick;
103 __uint32_t xs_xstrat_split;
104# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2)
105 __uint32_t xs_write_calls;
106 __uint32_t xs_read_calls;
107# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4)
108 __uint32_t xs_attr_get;
109 __uint32_t xs_attr_set;
110 __uint32_t xs_attr_remove;
111 __uint32_t xs_attr_list;
112# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3)
113 __uint32_t xs_iflush_count;
114 __uint32_t xs_icluster_flushcnt;
115 __uint32_t xs_icluster_flushinode;
116# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8)
117 __uint32_t vn_active; /* # vnodes not on free lists */
118 __uint32_t vn_alloc; /* # times vn_alloc called */
119 __uint32_t vn_get; /* # times vn_get called */
120 __uint32_t vn_hold; /* # times vn_hold called */
121 __uint32_t vn_rele; /* # times vn_rele called */
122 __uint32_t vn_reclaim; /* # times vn_reclaim called */
123 __uint32_t vn_remove; /* # times vn_remove called */
124 __uint32_t vn_free; /* # times vn_free called */
125#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
126 __uint32_t pb_get;
127 __uint32_t pb_create;
128 __uint32_t pb_get_locked;
129 __uint32_t pb_get_locked_waited;
130 __uint32_t pb_busy_locked;
131 __uint32_t pb_miss_locked;
132 __uint32_t pb_page_retries;
133 __uint32_t pb_page_found;
134 __uint32_t pb_get_read;
135/* Extra precision counters */
136 __uint64_t xs_xstrat_bytes;
137 __uint64_t xs_write_bytes;
138 __uint64_t xs_read_bytes;
139};
140
141DECLARE_PER_CPU(struct xfsstats, xfsstats);
142
143/*
144 * We don't disable preempt, not too worried about poking the
145 * wrong CPU's stat for now (also aggregated before reporting).
146 */
147#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++)
148#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
149#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
150
151extern void xfs_init_procfs(void);
152extern void xfs_cleanup_procfs(void);
153
154
155#else /* !CONFIG_PROC_FS */
156
157# define XFS_STATS_INC(count)
158# define XFS_STATS_DEC(count)
159# define XFS_STATS_ADD(count, inc)
160
161static __inline void xfs_init_procfs(void) { };
162static __inline void xfs_cleanup_procfs(void) { };
163
164#endif /* !CONFIG_PROC_FS */
165
166#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
new file mode 100644
index 000000000000..53dc658cafa6
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -0,0 +1,912 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34
35#include "xfs_inum.h"
36#include "xfs_log.h"
37#include "xfs_clnt.h"
38#include "xfs_trans.h"
39#include "xfs_sb.h"
40#include "xfs_dir.h"
41#include "xfs_dir2.h"
42#include "xfs_alloc.h"
43#include "xfs_dmapi.h"
44#include "xfs_quota.h"
45#include "xfs_mount.h"
46#include "xfs_alloc_btree.h"
47#include "xfs_bmap_btree.h"
48#include "xfs_ialloc_btree.h"
49#include "xfs_btree.h"
50#include "xfs_ialloc.h"
51#include "xfs_attr_sf.h"
52#include "xfs_dir_sf.h"
53#include "xfs_dir2_sf.h"
54#include "xfs_dinode.h"
55#include "xfs_inode.h"
56#include "xfs_bmap.h"
57#include "xfs_bit.h"
58#include "xfs_rtalloc.h"
59#include "xfs_error.h"
60#include "xfs_itable.h"
61#include "xfs_rw.h"
62#include "xfs_acl.h"
63#include "xfs_cap.h"
64#include "xfs_mac.h"
65#include "xfs_attr.h"
66#include "xfs_buf_item.h"
67#include "xfs_utils.h"
68#include "xfs_version.h"
69#include "xfs_ioctl32.h"
70
71#include <linux/namei.h>
72#include <linux/init.h>
73#include <linux/mount.h>
74#include <linux/writeback.h>
75
76STATIC struct quotactl_ops linvfs_qops;
77STATIC struct super_operations linvfs_sops;
78STATIC kmem_zone_t *linvfs_inode_zone;
79
80STATIC struct xfs_mount_args *
81xfs_args_allocate(
82 struct super_block *sb)
83{
84 struct xfs_mount_args *args;
85
86 args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
87 args->logbufs = args->logbufsize = -1;
88 strncpy(args->fsname, sb->s_id, MAXNAMELEN);
89
90 /* Copy the already-parsed mount(2) flags we're interested in */
91 if (sb->s_flags & MS_NOATIME)
92 args->flags |= XFSMNT_NOATIME;
93 if (sb->s_flags & MS_DIRSYNC)
94 args->flags |= XFSMNT_DIRSYNC;
95 if (sb->s_flags & MS_SYNCHRONOUS)
96 args->flags |= XFSMNT_WSYNC;
97
98 /* Default to 32 bit inodes on Linux all the time */
99 args->flags |= XFSMNT_32BITINODES;
100
101 return args;
102}
103
104__uint64_t
105xfs_max_file_offset(
106 unsigned int blockshift)
107{
108 unsigned int pagefactor = 1;
109 unsigned int bitshift = BITS_PER_LONG - 1;
110
111 /* Figure out maximum filesize, on Linux this can depend on
112 * the filesystem blocksize (on 32 bit platforms).
113 * __block_prepare_write does this in an [unsigned] long...
114 * page->index << (PAGE_CACHE_SHIFT - bbits)
115 * So, for page sized blocks (4K on 32 bit platforms),
116 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
117 * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
118 * but for smaller blocksizes it is less (bbits = log2 bsize).
119 * Note1: get_block_t takes a long (implicit cast from above)
120 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
121 * can optionally convert the [unsigned] long from above into
122 * an [unsigned] long long.
123 */
124
125#if BITS_PER_LONG == 32
126# if defined(CONFIG_LBD)
127 ASSERT(sizeof(sector_t) == 8);
128 pagefactor = PAGE_CACHE_SIZE;
129 bitshift = BITS_PER_LONG;
130# else
131 pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
132# endif
133#endif
134
135 return (((__uint64_t)pagefactor) << bitshift) - 1;
136}
137
138STATIC __inline__ void
139xfs_set_inodeops(
140 struct inode *inode)
141{
142 vnode_t *vp = LINVFS_GET_VP(inode);
143
144 if (vp->v_type == VNON) {
145 vn_mark_bad(vp);
146 } else if (S_ISREG(inode->i_mode)) {
147 inode->i_op = &linvfs_file_inode_operations;
148 inode->i_fop = &linvfs_file_operations;
149 inode->i_mapping->a_ops = &linvfs_aops;
150 } else if (S_ISDIR(inode->i_mode)) {
151 inode->i_op = &linvfs_dir_inode_operations;
152 inode->i_fop = &linvfs_dir_operations;
153 } else if (S_ISLNK(inode->i_mode)) {
154 inode->i_op = &linvfs_symlink_inode_operations;
155 if (inode->i_blocks)
156 inode->i_mapping->a_ops = &linvfs_aops;
157 } else {
158 inode->i_op = &linvfs_file_inode_operations;
159 init_special_inode(inode, inode->i_mode, inode->i_rdev);
160 }
161}
162
163STATIC __inline__ void
164xfs_revalidate_inode(
165 xfs_mount_t *mp,
166 vnode_t *vp,
167 xfs_inode_t *ip)
168{
169 struct inode *inode = LINVFS_GET_IP(vp);
170
171 inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
172 inode->i_nlink = ip->i_d.di_nlink;
173 inode->i_uid = ip->i_d.di_uid;
174 inode->i_gid = ip->i_d.di_gid;
175 if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
176 inode->i_rdev = 0;
177 } else {
178 xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
179 inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
180 }
181 inode->i_blksize = PAGE_CACHE_SIZE;
182 inode->i_generation = ip->i_d.di_gen;
183 i_size_write(inode, ip->i_d.di_size);
184 inode->i_blocks =
185 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
186 inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
187 inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
188 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
189 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
190 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
191 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
192 if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
193 inode->i_flags |= S_IMMUTABLE;
194 else
195 inode->i_flags &= ~S_IMMUTABLE;
196 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
197 inode->i_flags |= S_APPEND;
198 else
199 inode->i_flags &= ~S_APPEND;
200 if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
201 inode->i_flags |= S_SYNC;
202 else
203 inode->i_flags &= ~S_SYNC;
204 if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
205 inode->i_flags |= S_NOATIME;
206 else
207 inode->i_flags &= ~S_NOATIME;
208 vp->v_flag &= ~VMODIFIED;
209}
210
211void
212xfs_initialize_vnode(
213 bhv_desc_t *bdp,
214 vnode_t *vp,
215 bhv_desc_t *inode_bhv,
216 int unlock)
217{
218 xfs_inode_t *ip = XFS_BHVTOI(inode_bhv);
219 struct inode *inode = LINVFS_GET_IP(vp);
220
221 if (!inode_bhv->bd_vobj) {
222 vp->v_vfsp = bhvtovfs(bdp);
223 bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops);
224 bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
225 }
226
227 /*
228 * We need to set the ops vectors, and unlock the inode, but if
229 * we have been called during the new inode create process, it is
230 * too early to fill in the Linux inode. We will get called a
231 * second time once the inode is properly set up, and then we can
232 * finish our work.
233 */
234 if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
235 vp->v_type = IFTOVT(ip->i_d.di_mode);
236 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
237 xfs_set_inodeops(inode);
238
239 ip->i_flags &= ~XFS_INEW;
240 barrier();
241
242 unlock_new_inode(inode);
243 }
244}
245
246int
247xfs_blkdev_get(
248 xfs_mount_t *mp,
249 const char *name,
250 struct block_device **bdevp)
251{
252 int error = 0;
253
254 *bdevp = open_bdev_excl(name, 0, mp);
255 if (IS_ERR(*bdevp)) {
256 error = PTR_ERR(*bdevp);
257 printk("XFS: Invalid device [%s], error=%d\n", name, error);
258 }
259
260 return -error;
261}
262
263void
264xfs_blkdev_put(
265 struct block_device *bdev)
266{
267 if (bdev)
268 close_bdev_excl(bdev);
269}
270
271
272STATIC struct inode *
273linvfs_alloc_inode(
274 struct super_block *sb)
275{
276 vnode_t *vp;
277
278 vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone,
279 kmem_flags_convert(KM_SLEEP));
280 if (!vp)
281 return NULL;
282 return LINVFS_GET_IP(vp);
283}
284
285STATIC void
286linvfs_destroy_inode(
287 struct inode *inode)
288{
289 kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
290}
291
292STATIC void
293init_once(
294 void *data,
295 kmem_cache_t *cachep,
296 unsigned long flags)
297{
298 vnode_t *vp = (vnode_t *)data;
299
300 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
301 SLAB_CTOR_CONSTRUCTOR)
302 inode_init_once(LINVFS_GET_IP(vp));
303}
304
305STATIC int
306init_inodecache( void )
307{
308 linvfs_inode_zone = kmem_cache_create("linvfs_icache",
309 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
310 init_once, NULL);
311 if (linvfs_inode_zone == NULL)
312 return -ENOMEM;
313 return 0;
314}
315
316STATIC void
317destroy_inodecache( void )
318{
319 if (kmem_cache_destroy(linvfs_inode_zone))
320 printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
321}
322
323/*
324 * Attempt to flush the inode, this will actually fail
325 * if the inode is pinned, but we dirty the inode again
326 * at the point when it is unpinned after a log write,
327 * since this is when the inode itself becomes flushable.
328 */
329STATIC int
330linvfs_write_inode(
331 struct inode *inode,
332 int sync)
333{
334 vnode_t *vp = LINVFS_GET_VP(inode);
335 int error = 0, flags = FLUSH_INODE;
336
337 if (vp) {
338 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
339 if (sync)
340 flags |= FLUSH_SYNC;
341 VOP_IFLUSH(vp, flags, error);
342 if (error == EAGAIN) {
343 if (sync)
344 VOP_IFLUSH(vp, flags | FLUSH_LOG, error);
345 else
346 error = 0;
347 }
348 }
349
350 return -error;
351}
352
353STATIC void
354linvfs_clear_inode(
355 struct inode *inode)
356{
357 vnode_t *vp = LINVFS_GET_VP(inode);
358
359 if (vp) {
360 vn_rele(vp);
361 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
362 /*
363 * Do all our cleanup, and remove this vnode.
364 */
365 vn_remove(vp);
366 }
367}
368
369
370/*
371 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
372 * Doing this has two advantages:
373 * - It saves on stack space, which is tight in certain situations
374 * - It can be used (with care) as a mechanism to avoid deadlocks.
375 * Flushing while allocating in a full filesystem requires both.
376 */
377STATIC void
378xfs_syncd_queue_work(
379 struct vfs *vfs,
380 void *data,
381 void (*syncer)(vfs_t *, void *))
382{
383 vfs_sync_work_t *work;
384
385 work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
386 INIT_LIST_HEAD(&work->w_list);
387 work->w_syncer = syncer;
388 work->w_data = data;
389 work->w_vfs = vfs;
390 spin_lock(&vfs->vfs_sync_lock);
391 list_add_tail(&work->w_list, &vfs->vfs_sync_list);
392 spin_unlock(&vfs->vfs_sync_lock);
393 wake_up_process(vfs->vfs_sync_task);
394}
395
396/*
397 * Flush delayed allocate data, attempting to free up reserved space
398 * from existing allocations. At this point a new allocation attempt
399 * has failed with ENOSPC and we are in the process of scratching our
400 * heads, looking about for more room...
401 */
402STATIC void
403xfs_flush_inode_work(
404 vfs_t *vfs,
405 void *inode)
406{
407 filemap_flush(((struct inode *)inode)->i_mapping);
408 iput((struct inode *)inode);
409}
410
411void
412xfs_flush_inode(
413 xfs_inode_t *ip)
414{
415 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
416 struct vfs *vfs = XFS_MTOVFS(ip->i_mount);
417
418 igrab(inode);
419 xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
420 delay(HZ/2);
421}
422
423/*
424 * This is the "bigger hammer" version of xfs_flush_inode_work...
425 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
426 */
427STATIC void
428xfs_flush_device_work(
429 vfs_t *vfs,
430 void *inode)
431{
432 sync_blockdev(vfs->vfs_super->s_bdev);
433 iput((struct inode *)inode);
434}
435
436void
437xfs_flush_device(
438 xfs_inode_t *ip)
439{
440 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
441 struct vfs *vfs = XFS_MTOVFS(ip->i_mount);
442
443 igrab(inode);
444 xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
445 delay(HZ/2);
446 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
447}
448
449#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
450STATIC void
451vfs_sync_worker(
452 vfs_t *vfsp,
453 void *unused)
454{
455 int error;
456
457 if (!(vfsp->vfs_flag & VFS_RDONLY))
458 VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
459 vfsp->vfs_sync_seq++;
460 wmb();
461 wake_up(&vfsp->vfs_wait_single_sync_task);
462}
463
464STATIC int
465xfssyncd(
466 void *arg)
467{
468 long timeleft;
469 vfs_t *vfsp = (vfs_t *) arg;
470 struct list_head tmp;
471 struct vfs_sync_work *work, *n;
472
473 daemonize("xfssyncd");
474
475 vfsp->vfs_sync_work.w_vfs = vfsp;
476 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
477 vfsp->vfs_sync_task = current;
478 wmb();
479 wake_up(&vfsp->vfs_wait_sync_task);
480
481 INIT_LIST_HEAD(&tmp);
482 timeleft = (xfs_syncd_centisecs * HZ) / 100;
483 for (;;) {
484 set_current_state(TASK_INTERRUPTIBLE);
485 timeleft = schedule_timeout(timeleft);
486 /* swsusp */
487 try_to_freeze(PF_FREEZE);
488 if (vfsp->vfs_flag & VFS_UMOUNT)
489 break;
490
491 spin_lock(&vfsp->vfs_sync_lock);
492 /*
493 * We can get woken by laptop mode, to do a sync -
494 * that's the (only!) case where the list would be
495 * empty with time remaining.
496 */
497 if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
498 if (!timeleft)
499 timeleft = (xfs_syncd_centisecs * HZ) / 100;
500 INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
501 list_add_tail(&vfsp->vfs_sync_work.w_list,
502 &vfsp->vfs_sync_list);
503 }
504 list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
505 list_move(&work->w_list, &tmp);
506 spin_unlock(&vfsp->vfs_sync_lock);
507
508 list_for_each_entry_safe(work, n, &tmp, w_list) {
509 (*work->w_syncer)(vfsp, work->w_data);
510 list_del(&work->w_list);
511 if (work == &vfsp->vfs_sync_work)
512 continue;
513 kmem_free(work, sizeof(struct vfs_sync_work));
514 }
515 }
516
517 vfsp->vfs_sync_task = NULL;
518 wmb();
519 wake_up(&vfsp->vfs_wait_sync_task);
520
521 return 0;
522}
523
524STATIC int
525linvfs_start_syncd(
526 vfs_t *vfsp)
527{
528 int pid;
529
530 pid = kernel_thread(xfssyncd, (void *) vfsp,
531 CLONE_VM | CLONE_FS | CLONE_FILES);
532 if (pid < 0)
533 return -pid;
534 wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
535 return 0;
536}
537
538STATIC void
539linvfs_stop_syncd(
540 vfs_t *vfsp)
541{
542 vfsp->vfs_flag |= VFS_UMOUNT;
543 wmb();
544
545 wake_up_process(vfsp->vfs_sync_task);
546 wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
547}
548
549STATIC void
550linvfs_put_super(
551 struct super_block *sb)
552{
553 vfs_t *vfsp = LINVFS_GET_VFS(sb);
554 int error;
555
556 linvfs_stop_syncd(vfsp);
557 VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
558 if (!error)
559 VFS_UNMOUNT(vfsp, 0, NULL, error);
560 if (error) {
561 printk("XFS unmount got error %d\n", error);
562 printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp);
563 return;
564 }
565
566 vfs_deallocate(vfsp);
567}
568
569STATIC void
570linvfs_write_super(
571 struct super_block *sb)
572{
573 vfs_t *vfsp = LINVFS_GET_VFS(sb);
574 int error;
575
576 if (sb->s_flags & MS_RDONLY) {
577 sb->s_dirt = 0; /* paranoia */
578 return;
579 }
580 /* Push the log and superblock a little */
581 VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
582 sb->s_dirt = 0;
583}
584
585STATIC int
586linvfs_sync_super(
587 struct super_block *sb,
588 int wait)
589{
590 vfs_t *vfsp = LINVFS_GET_VFS(sb);
591 int error;
592 int flags = SYNC_FSDATA;
593
594 if (wait)
595 flags |= SYNC_WAIT;
596
597 VFS_SYNC(vfsp, flags, NULL, error);
598 sb->s_dirt = 0;
599
600 if (unlikely(laptop_mode)) {
601 int prev_sync_seq = vfsp->vfs_sync_seq;
602
603 /*
604 * The disk must be active because we're syncing.
605 * We schedule xfssyncd now (now that the disk is
606 * active) instead of later (when it might not be).
607 */
608 wake_up_process(vfsp->vfs_sync_task);
609 /*
610 * We have to wait for the sync iteration to complete.
611 * If we don't, the disk activity caused by the sync
612 * will come after the sync is completed, and that
613 * triggers another sync from laptop mode.
614 */
615 wait_event(vfsp->vfs_wait_single_sync_task,
616 vfsp->vfs_sync_seq != prev_sync_seq);
617 }
618
619 return -error;
620}
621
622STATIC int
623linvfs_statfs(
624 struct super_block *sb,
625 struct kstatfs *statp)
626{
627 vfs_t *vfsp = LINVFS_GET_VFS(sb);
628 int error;
629
630 VFS_STATVFS(vfsp, statp, NULL, error);
631 return -error;
632}
633
634STATIC int
635linvfs_remount(
636 struct super_block *sb,
637 int *flags,
638 char *options)
639{
640 vfs_t *vfsp = LINVFS_GET_VFS(sb);
641 struct xfs_mount_args *args = xfs_args_allocate(sb);
642 int error;
643
644 VFS_PARSEARGS(vfsp, options, args, 1, error);
645 if (!error)
646 VFS_MNTUPDATE(vfsp, flags, args, error);
647 kmem_free(args, sizeof(*args));
648 return -error;
649}
650
651STATIC void
652linvfs_freeze_fs(
653 struct super_block *sb)
654{
655 VFS_FREEZE(LINVFS_GET_VFS(sb));
656}
657
658STATIC int
659linvfs_show_options(
660 struct seq_file *m,
661 struct vfsmount *mnt)
662{
663 struct vfs *vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
664 int error;
665
666 VFS_SHOWARGS(vfsp, m, error);
667 return error;
668}
669
670STATIC int
671linvfs_getxstate(
672 struct super_block *sb,
673 struct fs_quota_stat *fqs)
674{
675 struct vfs *vfsp = LINVFS_GET_VFS(sb);
676 int error;
677
678 VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
679 return -error;
680}
681
682STATIC int
683linvfs_setxstate(
684 struct super_block *sb,
685 unsigned int flags,
686 int op)
687{
688 struct vfs *vfsp = LINVFS_GET_VFS(sb);
689 int error;
690
691 VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
692 return -error;
693}
694
695STATIC int
696linvfs_getxquota(
697 struct super_block *sb,
698 int type,
699 qid_t id,
700 struct fs_disk_quota *fdq)
701{
702 struct vfs *vfsp = LINVFS_GET_VFS(sb);
703 int error, getmode;
704
705 getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA;
706 VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
707 return -error;
708}
709
710STATIC int
711linvfs_setxquota(
712 struct super_block *sb,
713 int type,
714 qid_t id,
715 struct fs_disk_quota *fdq)
716{
717 struct vfs *vfsp = LINVFS_GET_VFS(sb);
718 int error, setmode;
719
720 setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM;
721 VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
722 return -error;
723}
724
725STATIC int
726linvfs_fill_super(
727 struct super_block *sb,
728 void *data,
729 int silent)
730{
731 vnode_t *rootvp;
732 struct vfs *vfsp = vfs_allocate();
733 struct xfs_mount_args *args = xfs_args_allocate(sb);
734 struct kstatfs statvfs;
735 int error, error2;
736
737 vfsp->vfs_super = sb;
738 LINVFS_SET_VFS(sb, vfsp);
739 if (sb->s_flags & MS_RDONLY)
740 vfsp->vfs_flag |= VFS_RDONLY;
741 bhv_insert_all_vfsops(vfsp);
742
743 VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
744 if (error) {
745 bhv_remove_all_vfsops(vfsp, 1);
746 goto fail_vfsop;
747 }
748
749 sb_min_blocksize(sb, BBSIZE);
750#ifdef CONFIG_XFS_EXPORT
751 sb->s_export_op = &linvfs_export_ops;
752#endif
753 sb->s_qcop = &linvfs_qops;
754 sb->s_op = &linvfs_sops;
755
756 VFS_MOUNT(vfsp, args, NULL, error);
757 if (error) {
758 bhv_remove_all_vfsops(vfsp, 1);
759 goto fail_vfsop;
760 }
761
762 VFS_STATVFS(vfsp, &statvfs, NULL, error);
763 if (error)
764 goto fail_unmount;
765
766 sb->s_dirt = 1;
767 sb->s_magic = statvfs.f_type;
768 sb->s_blocksize = statvfs.f_bsize;
769 sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
770 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
771 sb->s_time_gran = 1;
772 set_posix_acl_flag(sb);
773
774 VFS_ROOT(vfsp, &rootvp, error);
775 if (error)
776 goto fail_unmount;
777
778 sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
779 if (!sb->s_root) {
780 error = ENOMEM;
781 goto fail_vnrele;
782 }
783 if (is_bad_inode(sb->s_root->d_inode)) {
784 error = EINVAL;
785 goto fail_vnrele;
786 }
787 if ((error = linvfs_start_syncd(vfsp)))
788 goto fail_vnrele;
789 vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
790
791 kmem_free(args, sizeof(*args));
792 return 0;
793
794fail_vnrele:
795 if (sb->s_root) {
796 dput(sb->s_root);
797 sb->s_root = NULL;
798 } else {
799 VN_RELE(rootvp);
800 }
801
802fail_unmount:
803 VFS_UNMOUNT(vfsp, 0, NULL, error2);
804
805fail_vfsop:
806 vfs_deallocate(vfsp);
807 kmem_free(args, sizeof(*args));
808 return -error;
809}
810
811STATIC struct super_block *
812linvfs_get_sb(
813 struct file_system_type *fs_type,
814 int flags,
815 const char *dev_name,
816 void *data)
817{
818 return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
819}
820
821STATIC struct super_operations linvfs_sops = {
822 .alloc_inode = linvfs_alloc_inode,
823 .destroy_inode = linvfs_destroy_inode,
824 .write_inode = linvfs_write_inode,
825 .clear_inode = linvfs_clear_inode,
826 .put_super = linvfs_put_super,
827 .write_super = linvfs_write_super,
828 .sync_fs = linvfs_sync_super,
829 .write_super_lockfs = linvfs_freeze_fs,
830 .statfs = linvfs_statfs,
831 .remount_fs = linvfs_remount,
832 .show_options = linvfs_show_options,
833};
834
835STATIC struct quotactl_ops linvfs_qops = {
836 .get_xstate = linvfs_getxstate,
837 .set_xstate = linvfs_setxstate,
838 .get_xquota = linvfs_getxquota,
839 .set_xquota = linvfs_setxquota,
840};
841
842STATIC struct file_system_type xfs_fs_type = {
843 .owner = THIS_MODULE,
844 .name = "xfs",
845 .get_sb = linvfs_get_sb,
846 .kill_sb = kill_block_super,
847 .fs_flags = FS_REQUIRES_DEV,
848};
849
850
851STATIC int __init
852init_xfs_fs( void )
853{
854 int error;
855 struct sysinfo si;
856 static char message[] __initdata = KERN_INFO \
857 XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
858
859 printk(message);
860
861 si_meminfo(&si);
862 xfs_physmem = si.totalram;
863
864 ktrace_init(64);
865
866 error = init_inodecache();
867 if (error < 0)
868 goto undo_inodecache;
869
870 error = pagebuf_init();
871 if (error < 0)
872 goto undo_pagebuf;
873
874 vn_init();
875 xfs_init();
876 uuid_init();
877 vfs_initquota();
878
879 error = register_filesystem(&xfs_fs_type);
880 if (error)
881 goto undo_register;
882 XFS_DM_INIT(&xfs_fs_type);
883 return 0;
884
885undo_register:
886 pagebuf_terminate();
887
888undo_pagebuf:
889 destroy_inodecache();
890
891undo_inodecache:
892 return error;
893}
894
895STATIC void __exit
896exit_xfs_fs( void )
897{
898 vfs_exitquota();
899 XFS_DM_EXIT(&xfs_fs_type);
900 unregister_filesystem(&xfs_fs_type);
901 xfs_cleanup();
902 pagebuf_terminate();
903 destroy_inodecache();
904 ktrace_uninit();
905}
906
907module_init(init_xfs_fs);
908module_exit(exit_xfs_fs);
909
910MODULE_AUTHOR("Silicon Graphics, Inc.");
911MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
912MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
new file mode 100644
index 000000000000..ec7e0035c731
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -0,0 +1,138 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_SUPER_H__
33#define __XFS_SUPER_H__
34
35#ifdef CONFIG_XFS_DMAPI
36# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops)
37# define vfs_initdmapi() dmapi_init()
38# define vfs_exitdmapi() dmapi_uninit()
39#else
40# define vfs_insertdmapi(vfs) do { } while (0)
41# define vfs_initdmapi() do { } while (0)
42# define vfs_exitdmapi() do { } while (0)
43#endif
44
45#ifdef CONFIG_XFS_QUOTA
46# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops)
47extern void xfs_qm_init(void);
48extern void xfs_qm_exit(void);
49# define vfs_initquota() xfs_qm_init()
50# define vfs_exitquota() xfs_qm_exit()
51#else
52# define vfs_insertquota(vfs) do { } while (0)
53# define vfs_initquota() do { } while (0)
54# define vfs_exitquota() do { } while (0)
55#endif
56
57#ifdef CONFIG_XFS_POSIX_ACL
58# define XFS_ACL_STRING "ACLs, "
59# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL)
60#else
61# define XFS_ACL_STRING
62# define set_posix_acl_flag(sb) do { } while (0)
63#endif
64
65#ifdef CONFIG_XFS_SECURITY
66# define XFS_SECURITY_STRING "security attributes, "
67# define ENOSECURITY 0
68#else
69# define XFS_SECURITY_STRING
70# define ENOSECURITY EOPNOTSUPP
71#endif
72
73#ifdef CONFIG_XFS_RT
74# define XFS_REALTIME_STRING "realtime, "
75#else
76# define XFS_REALTIME_STRING
77#endif
78
79#if XFS_BIG_BLKNOS
80# if XFS_BIG_INUMS
81# define XFS_BIGFS_STRING "large block/inode numbers, "
82# else
83# define XFS_BIGFS_STRING "large block numbers, "
84# endif
85#else
86# define XFS_BIGFS_STRING
87#endif
88
89#ifdef CONFIG_XFS_TRACE
90# define XFS_TRACE_STRING "tracing, "
91#else
92# define XFS_TRACE_STRING
93#endif
94
95#ifdef CONFIG_XFS_DMAPI
96# define XFS_DMAPI_STRING "dmapi support, "
97#else
98# define XFS_DMAPI_STRING
99#endif
100
101#ifdef DEBUG
102# define XFS_DBG_STRING "debug"
103#else
104# define XFS_DBG_STRING "no debug"
105#endif
106
107#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
108 XFS_SECURITY_STRING \
109 XFS_REALTIME_STRING \
110 XFS_BIGFS_STRING \
111 XFS_TRACE_STRING \
112 XFS_DMAPI_STRING \
113 XFS_DBG_STRING /* DBG must be last */
114
115#define LINVFS_GET_VFS(s) \
116 (vfs_t *)((s)->s_fs_info)
117#define LINVFS_SET_VFS(s, vfsp) \
118 ((s)->s_fs_info = vfsp)
119
120struct xfs_inode;
121struct xfs_mount;
122struct xfs_buftarg;
123struct block_device;
124
125extern __uint64_t xfs_max_file_offset(unsigned int);
126
127extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int);
128
129extern void xfs_flush_inode(struct xfs_inode *);
130extern void xfs_flush_device(struct xfs_inode *);
131
132extern int xfs_blkdev_get(struct xfs_mount *, const char *,
133 struct block_device **);
134extern void xfs_blkdev_put(struct block_device *);
135
136extern struct export_operations linvfs_export_ops;
137
138#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
new file mode 100644
index 000000000000..0dc010356f4d
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -0,0 +1,174 @@
1/*
2 * Copyright (c) 2001-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34#include "xfs_rw.h"
35#include <linux/sysctl.h>
36#include <linux/proc_fs.h>
37
38
39static struct ctl_table_header *xfs_table_header;
40
41
42#ifdef CONFIG_PROC_FS
43STATIC int
44xfs_stats_clear_proc_handler(
45 ctl_table *ctl,
46 int write,
47 struct file *filp,
48 void __user *buffer,
49 size_t *lenp,
50 loff_t *ppos)
51{
52 int c, ret, *valp = ctl->data;
53 __uint32_t vn_active;
54
55 ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
56
57 if (!ret && write && *valp) {
58 printk("XFS Clearing xfsstats\n");
59 for (c = 0; c < NR_CPUS; c++) {
60 if (!cpu_possible(c)) continue;
61 preempt_disable();
62 /* save vn_active, it's a universal truth! */
63 vn_active = per_cpu(xfsstats, c).vn_active;
64 memset(&per_cpu(xfsstats, c), 0,
65 sizeof(struct xfsstats));
66 per_cpu(xfsstats, c).vn_active = vn_active;
67 preempt_enable();
68 }
69 xfs_stats_clear = 0;
70 }
71
72 return ret;
73}
74#endif /* CONFIG_PROC_FS */
75
76STATIC ctl_table xfs_table[] = {
77 {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
78 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
79 &sysctl_intvec, NULL,
80 &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
81
82 {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
83 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
84 &sysctl_intvec, NULL,
85 &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
86
87 {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
88 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
89 &sysctl_intvec, NULL,
90 &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
91
92 {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
93 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
94 &sysctl_intvec, NULL,
95 &xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
96
97 {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
98 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
99 &sysctl_intvec, NULL,
100 &xfs_params.error_level.min, &xfs_params.error_level.max},
101
102 {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
103 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
104 &sysctl_intvec, NULL,
105 &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
106
107 {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
108 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
109 &sysctl_intvec, NULL,
110 &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
111
112 {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
113 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
114 &sysctl_intvec, NULL,
115 &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
116
117 {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
118 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
119 &sysctl_intvec, NULL,
120 &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
121
122 {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
123 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
124 &sysctl_intvec, NULL,
125 &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
126
127 {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
128 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
129 &sysctl_intvec, NULL,
130 &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
131
132 {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
133 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
134 &sysctl_intvec, NULL,
135 &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
136
137 {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
138 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
139 &sysctl_intvec, NULL,
140 &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
141
142 /* please keep this the last entry */
143#ifdef CONFIG_PROC_FS
144 {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
145 sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
146 &sysctl_intvec, NULL,
147 &xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
148#endif /* CONFIG_PROC_FS */
149
150 {0}
151};
152
153STATIC ctl_table xfs_dir_table[] = {
154 {FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
155 {0}
156};
157
158STATIC ctl_table xfs_root_table[] = {
159 {CTL_FS, "fs", NULL, 0, 0555, xfs_dir_table},
160 {0}
161};
162
163void
164xfs_sysctl_register(void)
165{
166 xfs_table_header = register_sysctl_table(xfs_root_table, 1);
167}
168
169void
170xfs_sysctl_unregister(void)
171{
172 if (xfs_table_header)
173 unregister_sysctl_table(xfs_table_header);
174}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
new file mode 100644
index 000000000000..a39a95020a58
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -0,0 +1,114 @@
1/*
2 * Copyright (c) 2001-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#ifndef __XFS_SYSCTL_H__
34#define __XFS_SYSCTL_H__
35
36#include <linux/sysctl.h>
37
38/*
39 * Tunable xfs parameters
40 */
41
42typedef struct xfs_sysctl_val {
43 int min;
44 int val;
45 int max;
46} xfs_sysctl_val_t;
47
48typedef struct xfs_param {
49 xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
50 xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
51 * not a member of parent dir GID. */
52 xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
53 xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */
54 xfs_sysctl_val_t error_level; /* Degree of reporting for problems */
55 xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */
56 xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */
57 xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */
58 xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
59 xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
60 xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
61 xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */
62 xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
63 xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
64} xfs_param_t;
65
66/*
67 * xfs_error_level:
68 *
69 * How much error reporting will be done when internal problems are
70 * encountered. These problems normally return an EFSCORRUPTED to their
71 * caller, with no other information reported.
72 *
73 * 0 No error reports
74 * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown
75 * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any
76 * additional errors that are known to not cause shutdowns)
77 *
78 * xfs_panic_mask bit 0x8 turns the error reports into panics
79 */
80
81enum {
82 /* XFS_REFCACHE_SIZE = 1 */
83 /* XFS_REFCACHE_PURGE = 2 */
84 XFS_RESTRICT_CHOWN = 3,
85 XFS_SGID_INHERIT = 4,
86 XFS_SYMLINK_MODE = 5,
87 XFS_PANIC_MASK = 6,
88 XFS_ERRLEVEL = 7,
89 XFS_SYNCD_TIMER = 8,
90 /* XFS_PROBE_DMAPI = 9 */
91 /* XFS_PROBE_IOOPS = 10 */
92 /* XFS_PROBE_QUOTA = 11 */
93 XFS_STATS_CLEAR = 12,
94 XFS_INHERIT_SYNC = 13,
95 XFS_INHERIT_NODUMP = 14,
96 XFS_INHERIT_NOATIME = 15,
97 XFS_BUF_TIMER = 16,
98 XFS_BUF_AGE = 17,
99 /* XFS_IO_BYPASS = 18 */
100 XFS_INHERIT_NOSYM = 19,
101 XFS_ROTORSTEP = 20,
102};
103
104extern xfs_param_t xfs_params;
105
106#ifdef CONFIG_SYSCTL
107extern void xfs_sysctl_register(void);
108extern void xfs_sysctl_unregister(void);
109#else
110# define xfs_sysctl_register() do { } while (0)
111# define xfs_sysctl_unregister() do { } while (0)
112#endif /* CONFIG_SYSCTL */
113
114#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
new file mode 100644
index 000000000000..96f96394417e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_version.h
@@ -0,0 +1,44 @@
1/*
2 * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33/*
34 * Dummy file that can contain a timestamp to put into the
35 * XFS init string, to help users keep track of what they're
36 * running
37 */
38
39#ifndef __XFS_VERSION_H__
40#define __XFS_VERSION_H__
41
42#define XFS_VERSION_STRING "SGI XFS"
43
44#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
new file mode 100644
index 000000000000..669c61644959
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -0,0 +1,330 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34#include "xfs_fs.h"
35#include "xfs_macros.h"
36#include "xfs_inum.h"
37#include "xfs_log.h"
38#include "xfs_clnt.h"
39#include "xfs_trans.h"
40#include "xfs_sb.h"
41#include "xfs_ag.h"
42#include "xfs_dir.h"
43#include "xfs_dir2.h"
44#include "xfs_imap.h"
45#include "xfs_alloc.h"
46#include "xfs_dmapi.h"
47#include "xfs_mount.h"
48#include "xfs_quota.h"
49
50int
51vfs_mount(
52 struct bhv_desc *bdp,
53 struct xfs_mount_args *args,
54 struct cred *cr)
55{
56 struct bhv_desc *next = bdp;
57
58 ASSERT(next);
59 while (! (bhvtovfsops(next))->vfs_mount)
60 next = BHV_NEXT(next);
61 return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr));
62}
63
64int
65vfs_parseargs(
66 struct bhv_desc *bdp,
67 char *s,
68 struct xfs_mount_args *args,
69 int f)
70{
71 struct bhv_desc *next = bdp;
72
73 ASSERT(next);
74 while (! (bhvtovfsops(next))->vfs_parseargs)
75 next = BHV_NEXT(next);
76 return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f));
77}
78
79int
80vfs_showargs(
81 struct bhv_desc *bdp,
82 struct seq_file *m)
83{
84 struct bhv_desc *next = bdp;
85
86 ASSERT(next);
87 while (! (bhvtovfsops(next))->vfs_showargs)
88 next = BHV_NEXT(next);
89 return ((*bhvtovfsops(next)->vfs_showargs)(next, m));
90}
91
92int
93vfs_unmount(
94 struct bhv_desc *bdp,
95 int fl,
96 struct cred *cr)
97{
98 struct bhv_desc *next = bdp;
99
100 ASSERT(next);
101 while (! (bhvtovfsops(next))->vfs_unmount)
102 next = BHV_NEXT(next);
103 return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr));
104}
105
106int
107vfs_mntupdate(
108 struct bhv_desc *bdp,
109 int *fl,
110 struct xfs_mount_args *args)
111{
112 struct bhv_desc *next = bdp;
113
114 ASSERT(next);
115 while (! (bhvtovfsops(next))->vfs_mntupdate)
116 next = BHV_NEXT(next);
117 return ((*bhvtovfsops(next)->vfs_mntupdate)(next, fl, args));
118}
119
120int
121vfs_root(
122 struct bhv_desc *bdp,
123 struct vnode **vpp)
124{
125 struct bhv_desc *next = bdp;
126
127 ASSERT(next);
128 while (! (bhvtovfsops(next))->vfs_root)
129 next = BHV_NEXT(next);
130 return ((*bhvtovfsops(next)->vfs_root)(next, vpp));
131}
132
133int
134vfs_statvfs(
135 struct bhv_desc *bdp,
136 xfs_statfs_t *sp,
137 struct vnode *vp)
138{
139 struct bhv_desc *next = bdp;
140
141 ASSERT(next);
142 while (! (bhvtovfsops(next))->vfs_statvfs)
143 next = BHV_NEXT(next);
144 return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp));
145}
146
147int
148vfs_sync(
149 struct bhv_desc *bdp,
150 int fl,
151 struct cred *cr)
152{
153 struct bhv_desc *next = bdp;
154
155 ASSERT(next);
156 while (! (bhvtovfsops(next))->vfs_sync)
157 next = BHV_NEXT(next);
158 return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr));
159}
160
161int
162vfs_vget(
163 struct bhv_desc *bdp,
164 struct vnode **vpp,
165 struct fid *fidp)
166{
167 struct bhv_desc *next = bdp;
168
169 ASSERT(next);
170 while (! (bhvtovfsops(next))->vfs_vget)
171 next = BHV_NEXT(next);
172 return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp));
173}
174
175int
176vfs_dmapiops(
177 struct bhv_desc *bdp,
178 caddr_t addr)
179{
180 struct bhv_desc *next = bdp;
181
182 ASSERT(next);
183 while (! (bhvtovfsops(next))->vfs_dmapiops)
184 next = BHV_NEXT(next);
185 return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr));
186}
187
188int
189vfs_quotactl(
190 struct bhv_desc *bdp,
191 int cmd,
192 int id,
193 caddr_t addr)
194{
195 struct bhv_desc *next = bdp;
196
197 ASSERT(next);
198 while (! (bhvtovfsops(next))->vfs_quotactl)
199 next = BHV_NEXT(next);
200 return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr));
201}
202
203void
204vfs_init_vnode(
205 struct bhv_desc *bdp,
206 struct vnode *vp,
207 struct bhv_desc *bp,
208 int unlock)
209{
210 struct bhv_desc *next = bdp;
211
212 ASSERT(next);
213 while (! (bhvtovfsops(next))->vfs_init_vnode)
214 next = BHV_NEXT(next);
215 ((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock));
216}
217
218void
219vfs_force_shutdown(
220 struct bhv_desc *bdp,
221 int fl,
222 char *file,
223 int line)
224{
225 struct bhv_desc *next = bdp;
226
227 ASSERT(next);
228 while (! (bhvtovfsops(next))->vfs_force_shutdown)
229 next = BHV_NEXT(next);
230 ((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
231}
232
233void
234vfs_freeze(
235 struct bhv_desc *bdp)
236{
237 struct bhv_desc *next = bdp;
238
239 ASSERT(next);
240 while (! (bhvtovfsops(next))->vfs_freeze)
241 next = BHV_NEXT(next);
242 ((*bhvtovfsops(next)->vfs_freeze)(next));
243}
244
245vfs_t *
246vfs_allocate( void )
247{
248 struct vfs *vfsp;
249
250 vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP);
251 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
252 INIT_LIST_HEAD(&vfsp->vfs_sync_list);
253 spin_lock_init(&vfsp->vfs_sync_lock);
254 init_waitqueue_head(&vfsp->vfs_wait_sync_task);
255 init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
256 return vfsp;
257}
258
259void
260vfs_deallocate(
261 struct vfs *vfsp)
262{
263 bhv_head_destroy(VFS_BHVHEAD(vfsp));
264 kmem_free(vfsp, sizeof(vfs_t));
265}
266
267void
268vfs_insertops(
269 struct vfs *vfsp,
270 struct bhv_vfsops *vfsops)
271{
272 struct bhv_desc *bdp;
273
274 bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP);
275 bhv_desc_init(bdp, NULL, vfsp, vfsops);
276 bhv_insert(&vfsp->vfs_bh, bdp);
277}
278
279void
280vfs_insertbhv(
281 struct vfs *vfsp,
282 struct bhv_desc *bdp,
283 struct vfsops *vfsops,
284 void *mount)
285{
286 bhv_desc_init(bdp, mount, vfsp, vfsops);
287 bhv_insert_initial(&vfsp->vfs_bh, bdp);
288}
289
290void
291bhv_remove_vfsops(
292 struct vfs *vfsp,
293 int pos)
294{
295 struct bhv_desc *bhv;
296
297 bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos);
298 if (!bhv)
299 return;
300 bhv_remove(&vfsp->vfs_bh, bhv);
301 kmem_free(bhv, sizeof(*bhv));
302}
303
304void
305bhv_remove_all_vfsops(
306 struct vfs *vfsp,
307 int freebase)
308{
309 struct xfs_mount *mp;
310
311 bhv_remove_vfsops(vfsp, VFS_POSITION_QM);
312 bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
313 if (!freebase)
314 return;
315 mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
316 VFS_REMOVEBHV(vfsp, &mp->m_bhv);
317 xfs_mount_free(mp, 0);
318}
319
320void
321bhv_insert_all_vfsops(
322 struct vfs *vfsp)
323{
324 struct xfs_mount *mp;
325
326 mp = xfs_mount_init();
327 vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
328 vfs_insertdmapi(vfsp);
329 vfs_insertquota(vfsp);
330}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
new file mode 100644
index 000000000000..76493991578f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -0,0 +1,223 @@
1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_VFS_H__
33#define __XFS_VFS_H__
34
35#include <linux/vfs.h>
36#include "xfs_fs.h"
37
38struct fid;
39struct vfs;
40struct cred;
41struct vnode;
42struct kstatfs;
43struct seq_file;
44struct super_block;
45struct xfs_mount_args;
46
47typedef struct kstatfs xfs_statfs_t;
48
49typedef struct vfs_sync_work {
50 struct list_head w_list;
51 struct vfs *w_vfs;
52 void *w_data; /* syncer routine argument */
53 void (*w_syncer)(struct vfs *, void *);
54} vfs_sync_work_t;
55
56typedef struct vfs {
57 u_int vfs_flag; /* flags */
58 xfs_fsid_t vfs_fsid; /* file system ID */
59 xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */
60 bhv_head_t vfs_bh; /* head of vfs behavior chain */
61 struct super_block *vfs_super; /* generic superblock pointer */
62 struct task_struct *vfs_sync_task; /* generalised sync thread */
63 vfs_sync_work_t vfs_sync_work; /* work item for VFS_SYNC */
64 struct list_head vfs_sync_list; /* sync thread work item list */
65 spinlock_t vfs_sync_lock; /* work item list lock */
66 int vfs_sync_seq; /* sync thread generation no. */
67 wait_queue_head_t vfs_wait_single_sync_task;
68 wait_queue_head_t vfs_wait_sync_task;
69} vfs_t;
70
71#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */
72
73#define bhvtovfs(bdp) ( (struct vfs *)BHV_VOBJ(bdp) )
74#define bhvtovfsops(bdp) ( (struct vfsops *)BHV_OPS(bdp) )
75#define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh )
76#define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
77
78#define VFS_POSITION_BASE BHV_POSITION_BASE /* chain bottom */
79#define VFS_POSITION_TOP BHV_POSITION_TOP /* chain top */
80#define VFS_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */
81
82typedef enum {
83 VFS_BHV_UNKNOWN, /* not specified */
84 VFS_BHV_XFS, /* xfs */
85 VFS_BHV_DM, /* data migration */
86 VFS_BHV_QM, /* quota manager */
87 VFS_BHV_IO, /* IO path */
88 VFS_BHV_END /* housekeeping end-of-range */
89} vfs_bhv_t;
90
91#define VFS_POSITION_XFS (BHV_POSITION_BASE)
92#define VFS_POSITION_DM (VFS_POSITION_BASE+10)
93#define VFS_POSITION_QM (VFS_POSITION_BASE+20)
94#define VFS_POSITION_IO (VFS_POSITION_BASE+30)
95
96#define VFS_RDONLY 0x0001 /* read-only vfs */
97#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
98#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
99#define VFS_UMOUNT 0x0008 /* unmount in progress */
100#define VFS_END 0x0008 /* max flag */
101
102#define SYNC_ATTR 0x0001 /* sync attributes */
103#define SYNC_CLOSE 0x0002 /* close file system down */
104#define SYNC_DELWRI 0x0004 /* look at delayed writes */
105#define SYNC_WAIT 0x0008 /* wait for i/o to complete */
106#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */
107#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
108#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
109#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
110
111typedef int (*vfs_mount_t)(bhv_desc_t *,
112 struct xfs_mount_args *, struct cred *);
113typedef int (*vfs_parseargs_t)(bhv_desc_t *, char *,
114 struct xfs_mount_args *, int);
115typedef int (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
116typedef int (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
117typedef int (*vfs_mntupdate_t)(bhv_desc_t *, int *,
118 struct xfs_mount_args *);
119typedef int (*vfs_root_t)(bhv_desc_t *, struct vnode **);
120typedef int (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
121typedef int (*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
122typedef int (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *);
123typedef int (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
124typedef int (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
125typedef void (*vfs_init_vnode_t)(bhv_desc_t *,
126 struct vnode *, bhv_desc_t *, int);
127typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
128typedef void (*vfs_freeze_t)(bhv_desc_t *);
129
130typedef struct vfsops {
131 bhv_position_t vf_position; /* behavior chain position */
132 vfs_mount_t vfs_mount; /* mount file system */
133 vfs_parseargs_t vfs_parseargs; /* parse mount options */
134 vfs_showargs_t vfs_showargs; /* unparse mount options */
135 vfs_unmount_t vfs_unmount; /* unmount file system */
136 vfs_mntupdate_t vfs_mntupdate; /* update file system options */
137 vfs_root_t vfs_root; /* get root vnode */
138 vfs_statvfs_t vfs_statvfs; /* file system statistics */
139 vfs_sync_t vfs_sync; /* flush files */
140 vfs_vget_t vfs_vget; /* get vnode from fid */
141 vfs_dmapiops_t vfs_dmapiops; /* data migration */
142 vfs_quotactl_t vfs_quotactl; /* disk quota */
143 vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */
144 vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */
145 vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */
146} vfsops_t;
147
148/*
149 * VFS's. Operates on vfs structure pointers (starts at bhv head).
150 */
151#define VHEAD(v) ((v)->vfs_fbhv)
152#define VFS_MOUNT(v, ma,cr, rv) ((rv) = vfs_mount(VHEAD(v), ma,cr))
153#define VFS_PARSEARGS(v, o,ma,f, rv) ((rv) = vfs_parseargs(VHEAD(v), o,ma,f))
154#define VFS_SHOWARGS(v, m, rv) ((rv) = vfs_showargs(VHEAD(v), m))
155#define VFS_UNMOUNT(v, f, cr, rv) ((rv) = vfs_unmount(VHEAD(v), f,cr))
156#define VFS_MNTUPDATE(v, fl, args, rv) ((rv) = vfs_mntupdate(VHEAD(v), fl, args))
157#define VFS_ROOT(v, vpp, rv) ((rv) = vfs_root(VHEAD(v), vpp))
158#define VFS_STATVFS(v, sp,vp, rv) ((rv) = vfs_statvfs(VHEAD(v), sp,vp))
159#define VFS_SYNC(v, flag,cr, rv) ((rv) = vfs_sync(VHEAD(v), flag,cr))
160#define VFS_VGET(v, vpp,fidp, rv) ((rv) = vfs_vget(VHEAD(v), vpp,fidp))
161#define VFS_DMAPIOPS(v, p, rv) ((rv) = vfs_dmapiops(VHEAD(v), p))
162#define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p))
163#define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) )
164#define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) )
165#define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) )
166
167/*
168 * PVFS's. Operates on behavior descriptor pointers.
169 */
170#define PVFS_MOUNT(b, ma,cr, rv) ((rv) = vfs_mount(b, ma,cr))
171#define PVFS_PARSEARGS(b, o,ma,f, rv) ((rv) = vfs_parseargs(b, o,ma,f))
172#define PVFS_SHOWARGS(b, m, rv) ((rv) = vfs_showargs(b, m))
173#define PVFS_UNMOUNT(b, f,cr, rv) ((rv) = vfs_unmount(b, f,cr))
174#define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = vfs_mntupdate(b, fl, args))
175#define PVFS_ROOT(b, vpp, rv) ((rv) = vfs_root(b, vpp))
176#define PVFS_STATVFS(b, sp,vp, rv) ((rv) = vfs_statvfs(b, sp,vp))
177#define PVFS_SYNC(b, flag,cr, rv) ((rv) = vfs_sync(b, flag,cr))
178#define PVFS_VGET(b, vpp,fidp, rv) ((rv) = vfs_vget(b, vpp,fidp))
179#define PVFS_DMAPIOPS(b, p, rv) ((rv) = vfs_dmapiops(b, p))
180#define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p))
181#define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) )
182#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) )
183#define PVFS_FREEZE(b) ( vfs_freeze(b) )
184
185extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
186extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
187extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
188extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
189extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *);
190extern int vfs_root(bhv_desc_t *, struct vnode **);
191extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
192extern int vfs_sync(bhv_desc_t *, int, struct cred *);
193extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *);
194extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
195extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
196extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
197extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
198extern void vfs_freeze(bhv_desc_t *);
199
200typedef struct bhv_vfsops {
201 struct vfsops bhv_common;
202 void * bhv_custom;
203} bhv_vfsops_t;
204
205#define vfs_bhv_lookup(v, id) ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) )
206#define vfs_bhv_custom(b) ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom )
207#define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o))
208#define vfs_bhv_clr_custom(b) ( (b)->bhv_custom = NULL )
209
210extern vfs_t *vfs_allocate(void);
211extern void vfs_deallocate(vfs_t *);
212extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
213extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
214
215extern void bhv_insert_all_vfsops(struct vfs *);
216extern void bhv_remove_all_vfsops(struct vfs *, int);
217extern void bhv_remove_vfsops(struct vfs *, int);
218
219#define fs_frozen(vfsp) ((vfsp)->vfs_super->s_frozen)
220#define fs_check_frozen(vfsp, level) \
221 vfs_check_frozen(vfsp->vfs_super, level);
222
223#endif /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
new file mode 100644
index 000000000000..849c61c74f3c
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -0,0 +1,455 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include "xfs.h"
34
35
36uint64_t vn_generation; /* vnode generation number */
37DEFINE_SPINLOCK(vnumber_lock);
38
39/*
40 * Dedicated vnode inactive/reclaim sync semaphores.
41 * Prime number of hash buckets since address is used as the key.
42 */
43#define NVSYNC 37
44#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
45sv_t vsync[NVSYNC];
46
47/*
48 * Translate stat(2) file types to vnode types and vice versa.
49 * Aware of numeric order of S_IFMT and vnode type values.
50 */
51enum vtype iftovt_tab[] = {
52 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
53 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
54};
55
56u_short vttoif_tab[] = {
57 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
58};
59
60
61void
62vn_init(void)
63{
64 register sv_t *svp;
65 register int i;
66
67 for (svp = vsync, i = 0; i < NVSYNC; i++, svp++)
68 init_sv(svp, SV_DEFAULT, "vsy", i);
69}
70
71/*
72 * Clean a vnode of filesystem-specific data and prepare it for reuse.
73 */
74STATIC int
75vn_reclaim(
76 struct vnode *vp)
77{
78 int error;
79
80 XFS_STATS_INC(vn_reclaim);
81 vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
82
83 /*
84 * Only make the VOP_RECLAIM call if there are behaviors
85 * to call.
86 */
87 if (vp->v_fbhv) {
88 VOP_RECLAIM(vp, error);
89 if (error)
90 return -error;
91 }
92 ASSERT(vp->v_fbhv == NULL);
93
94 VN_LOCK(vp);
95 vp->v_flag &= (VRECLM|VWAIT);
96 VN_UNLOCK(vp, 0);
97
98 vp->v_type = VNON;
99 vp->v_fbhv = NULL;
100
101#ifdef XFS_VNODE_TRACE
102 ktrace_free(vp->v_trace);
103 vp->v_trace = NULL;
104#endif
105
106 return 0;
107}
108
109STATIC void
110vn_wakeup(
111 struct vnode *vp)
112{
113 VN_LOCK(vp);
114 if (vp->v_flag & VWAIT)
115 sv_broadcast(vptosync(vp));
116 vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
117 VN_UNLOCK(vp, 0);
118}
119
120int
121vn_wait(
122 struct vnode *vp)
123{
124 VN_LOCK(vp);
125 if (vp->v_flag & (VINACT | VRECLM)) {
126 vp->v_flag |= VWAIT;
127 sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
128 return 1;
129 }
130 VN_UNLOCK(vp, 0);
131 return 0;
132}
133
134struct vnode *
135vn_initialize(
136 struct inode *inode)
137{
138 struct vnode *vp = LINVFS_GET_VP(inode);
139
140 XFS_STATS_INC(vn_active);
141 XFS_STATS_INC(vn_alloc);
142
143 vp->v_flag = VMODIFIED;
144 spinlock_init(&vp->v_lock, "v_lock");
145
146 spin_lock(&vnumber_lock);
147 if (!++vn_generation) /* v_number shouldn't be zero */
148 vn_generation++;
149 vp->v_number = vn_generation;
150 spin_unlock(&vnumber_lock);
151
152 ASSERT(VN_CACHED(vp) == 0);
153
154 /* Initialize the first behavior and the behavior chain head. */
155 vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
156
157#ifdef XFS_VNODE_TRACE
158 vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
159 printk("Allocated VNODE_TRACE at 0x%p\n", vp->v_trace);
160#endif /* XFS_VNODE_TRACE */
161
162 vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
163 return vp;
164}
165
166/*
167 * Get a reference on a vnode.
168 */
169vnode_t *
170vn_get(
171 struct vnode *vp,
172 vmap_t *vmap)
173{
174 struct inode *inode;
175
176 XFS_STATS_INC(vn_get);
177 inode = LINVFS_GET_IP(vp);
178 if (inode->i_state & I_FREEING)
179 return NULL;
180
181 inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
182 if (!inode) /* Inode not present */
183 return NULL;
184
185 vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
186
187 return vp;
188}
189
190/*
191 * Revalidate the Linux inode from the vattr.
192 * Note: i_size _not_ updated; we must hold the inode
193 * semaphore when doing that - callers responsibility.
194 */
195void
196vn_revalidate_core(
197 struct vnode *vp,
198 vattr_t *vap)
199{
200 struct inode *inode = LINVFS_GET_IP(vp);
201
202 inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode;
203 inode->i_nlink = vap->va_nlink;
204 inode->i_uid = vap->va_uid;
205 inode->i_gid = vap->va_gid;
206 inode->i_blocks = vap->va_nblocks;
207 inode->i_mtime = vap->va_mtime;
208 inode->i_ctime = vap->va_ctime;
209 inode->i_atime = vap->va_atime;
210 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
211 inode->i_flags |= S_IMMUTABLE;
212 else
213 inode->i_flags &= ~S_IMMUTABLE;
214 if (vap->va_xflags & XFS_XFLAG_APPEND)
215 inode->i_flags |= S_APPEND;
216 else
217 inode->i_flags &= ~S_APPEND;
218 if (vap->va_xflags & XFS_XFLAG_SYNC)
219 inode->i_flags |= S_SYNC;
220 else
221 inode->i_flags &= ~S_SYNC;
222 if (vap->va_xflags & XFS_XFLAG_NOATIME)
223 inode->i_flags |= S_NOATIME;
224 else
225 inode->i_flags &= ~S_NOATIME;
226}
227
228/*
229 * Revalidate the Linux inode from the vnode.
230 */
231int
232vn_revalidate(
233 struct vnode *vp)
234{
235 vattr_t va;
236 int error;
237
238 vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
239 ASSERT(vp->v_fbhv != NULL);
240
241 va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS;
242 VOP_GETATTR(vp, &va, 0, NULL, error);
243 if (!error) {
244 vn_revalidate_core(vp, &va);
245 VUNMODIFY(vp);
246 }
247 return -error;
248}
249
250/*
251 * purge a vnode from the cache
252 * At this point the vnode is guaranteed to have no references (vn_count == 0)
253 * The caller has to make sure that there are no ways someone could
254 * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
255 */
256void
257vn_purge(
258 struct vnode *vp,
259 vmap_t *vmap)
260{
261 vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
262
263again:
264 /*
265 * Check whether vp has already been reclaimed since our caller
266 * sampled its version while holding a filesystem cache lock that
267 * its VOP_RECLAIM function acquires.
268 */
269 VN_LOCK(vp);
270 if (vp->v_number != vmap->v_number) {
271 VN_UNLOCK(vp, 0);
272 return;
273 }
274
275 /*
276 * If vp is being reclaimed or inactivated, wait until it is inert,
277 * then proceed. Can't assume that vnode is actually reclaimed
278 * just because the reclaimed flag is asserted -- a vn_alloc
279 * reclaim can fail.
280 */
281 if (vp->v_flag & (VINACT | VRECLM)) {
282 ASSERT(vn_count(vp) == 0);
283 vp->v_flag |= VWAIT;
284 sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
285 goto again;
286 }
287
288 /*
289 * Another process could have raced in and gotten this vnode...
290 */
291 if (vn_count(vp) > 0) {
292 VN_UNLOCK(vp, 0);
293 return;
294 }
295
296 XFS_STATS_DEC(vn_active);
297 vp->v_flag |= VRECLM;
298 VN_UNLOCK(vp, 0);
299
300 /*
301 * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
302 * vp's filesystem to flush and invalidate all cached resources.
303 * When vn_reclaim returns, vp should have no private data,
304 * either in a system cache or attached to v_data.
305 */
306 if (vn_reclaim(vp) != 0)
307 panic("vn_purge: cannot reclaim");
308
309 /*
310 * Wakeup anyone waiting for vp to be reclaimed.
311 */
312 vn_wakeup(vp);
313}
314
315/*
316 * Add a reference to a referenced vnode.
317 */
318struct vnode *
319vn_hold(
320 struct vnode *vp)
321{
322 struct inode *inode;
323
324 XFS_STATS_INC(vn_hold);
325
326 VN_LOCK(vp);
327 inode = igrab(LINVFS_GET_IP(vp));
328 ASSERT(inode);
329 VN_UNLOCK(vp, 0);
330
331 return vp;
332}
333
334/*
335 * Call VOP_INACTIVE on last reference.
336 */
337void
338vn_rele(
339 struct vnode *vp)
340{
341 int vcnt;
342 int cache;
343
344 XFS_STATS_INC(vn_rele);
345
346 VN_LOCK(vp);
347
348 vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
349 vcnt = vn_count(vp);
350
351 /*
352 * Since we always get called from put_inode we know
353 * that i_count won't be decremented after we
354 * return.
355 */
356 if (!vcnt) {
357 /*
358 * As soon as we turn this on, noone can find us in vn_get
359 * until we turn off VINACT or VRECLM
360 */
361 vp->v_flag |= VINACT;
362 VN_UNLOCK(vp, 0);
363
364 /*
365 * Do not make the VOP_INACTIVE call if there
366 * are no behaviors attached to the vnode to call.
367 */
368 if (vp->v_fbhv)
369 VOP_INACTIVE(vp, NULL, cache);
370
371 VN_LOCK(vp);
372 if (vp->v_flag & VWAIT)
373 sv_broadcast(vptosync(vp));
374
375 vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
376 }
377
378 VN_UNLOCK(vp, 0);
379
380 vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
381}
382
383/*
384 * Finish the removal of a vnode.
385 */
386void
387vn_remove(
388 struct vnode *vp)
389{
390 vmap_t vmap;
391
392 /* Make sure we don't do this to the same vnode twice */
393 if (!(vp->v_fbhv))
394 return;
395
396 XFS_STATS_INC(vn_remove);
397 vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
398
399 /*
400 * After the following purge the vnode
401 * will no longer exist.
402 */
403 VMAP(vp, vmap);
404 vn_purge(vp, &vmap);
405}
406
407
408#ifdef XFS_VNODE_TRACE
409
410#define KTRACE_ENTER(vp, vk, s, line, ra) \
411 ktrace_enter( (vp)->v_trace, \
412/* 0 */ (void *)(__psint_t)(vk), \
413/* 1 */ (void *)(s), \
414/* 2 */ (void *)(__psint_t) line, \
415/* 3 */ (void *)(vn_count(vp)), \
416/* 4 */ (void *)(ra), \
417/* 5 */ (void *)(__psunsigned_t)(vp)->v_flag, \
418/* 6 */ (void *)(__psint_t)current_cpu(), \
419/* 7 */ (void *)(__psint_t)current_pid(), \
420/* 8 */ (void *)__return_address, \
421/* 9 */ 0, 0, 0, 0, 0, 0, 0)
422
423/*
424 * Vnode tracing code.
425 */
426void
427vn_trace_entry(vnode_t *vp, char *func, inst_t *ra)
428{
429 KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
430}
431
432void
433vn_trace_exit(vnode_t *vp, char *func, inst_t *ra)
434{
435 KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
436}
437
438void
439vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra)
440{
441 KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra);
442}
443
444void
445vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra)
446{
447 KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra);
448}
449
450void
451vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra)
452{
453 KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra);
454}
455#endif /* XFS_VNODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
new file mode 100644
index 000000000000..da76c1f1e11c
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -0,0 +1,666 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 *
32 * Portions Copyright (c) 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 */
59#ifndef __XFS_VNODE_H__
60#define __XFS_VNODE_H__
61
62struct uio;
63struct file;
64struct vattr;
65struct xfs_iomap;
66struct attrlist_cursor_kern;
67
68/*
69 * Vnode types. VNON means no type.
70 */
71enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
72
73typedef xfs_ino_t vnumber_t;
74typedef struct dentry vname_t;
75typedef bhv_head_t vn_bhv_head_t;
76
77/*
78 * MP locking protocols:
79 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK
80 * v_type read-only or fs-dependent
81 */
82typedef struct vnode {
83 __u32 v_flag; /* vnode flags (see below) */
84 enum vtype v_type; /* vnode type */
85 struct vfs *v_vfsp; /* ptr to containing VFS */
86 vnumber_t v_number; /* in-core vnode number */
87 vn_bhv_head_t v_bh; /* behavior head */
88 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */
89 struct inode v_inode; /* Linux inode */
90#ifdef XFS_VNODE_TRACE
91 struct ktrace *v_trace; /* trace header structure */
92#endif
93} vnode_t;
94
95#define v_fbhv v_bh.bh_first /* first behavior */
96#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */
97
98#define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */
99#define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */
100#define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */
101
102typedef enum {
103 VN_BHV_UNKNOWN, /* not specified */
104 VN_BHV_XFS, /* xfs */
105 VN_BHV_DM, /* data migration */
106 VN_BHV_QM, /* quota manager */
107 VN_BHV_IO, /* IO path */
108 VN_BHV_END /* housekeeping end-of-range */
109} vn_bhv_t;
110
111#define VNODE_POSITION_XFS (VNODE_POSITION_BASE)
112#define VNODE_POSITION_DM (VNODE_POSITION_BASE+10)
113#define VNODE_POSITION_QM (VNODE_POSITION_BASE+20)
114#define VNODE_POSITION_IO (VNODE_POSITION_BASE+30)
115
116/*
117 * Macros for dealing with the behavior descriptor inside of the vnode.
118 */
119#define BHV_TO_VNODE(bdp) ((vnode_t *)BHV_VOBJ(bdp))
120#define BHV_TO_VNODE_NULL(bdp) ((vnode_t *)BHV_VOBJNULL(bdp))
121
122#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh)))
123#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name)
124#define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp)
125#define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops)
126#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops)
127
128/*
129 * Vnode to Linux inode mapping.
130 */
131#define LINVFS_GET_VP(inode) ((vnode_t *)list_entry(inode, vnode_t, v_inode))
132#define LINVFS_GET_IP(vp) (&(vp)->v_inode)
133
134/*
135 * Convert between vnode types and inode formats (since POSIX.1
136 * defines mode word of stat structure in terms of inode formats).
137 */
138extern enum vtype iftovt_tab[];
139extern u_short vttoif_tab[];
140#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
141#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
142#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
143
144
145/*
146 * Vnode flags.
147 */
148#define VINACT 0x1 /* vnode is being inactivated */
149#define VRECLM 0x2 /* vnode is being reclaimed */
150#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */
151#define VMODIFIED 0x8 /* XFS inode state possibly differs */
152 /* to the Linux inode state. */
153
154/*
155 * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
156 */
157typedef enum vrwlock {
158 VRWLOCK_NONE,
159 VRWLOCK_READ,
160 VRWLOCK_WRITE,
161 VRWLOCK_WRITE_DIRECT,
162 VRWLOCK_TRY_READ,
163 VRWLOCK_TRY_WRITE
164} vrwlock_t;
165
166/*
167 * Return values for VOP_INACTIVE. A return value of
168 * VN_INACTIVE_NOCACHE implies that the file system behavior
169 * has disassociated its state and bhv_desc_t from the vnode.
170 */
171#define VN_INACTIVE_CACHE 0
172#define VN_INACTIVE_NOCACHE 1
173
174/*
175 * Values for the cmd code given to VOP_VNODE_CHANGE.
176 */
177typedef enum vchange {
178 VCHANGE_FLAGS_FRLOCKS = 0,
179 VCHANGE_FLAGS_ENF_LOCKING = 1,
180 VCHANGE_FLAGS_TRUNCATED = 2,
181 VCHANGE_FLAGS_PAGE_DIRTY = 3,
182 VCHANGE_FLAGS_IOEXCL_COUNT = 4
183} vchange_t;
184
185
186typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
187typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
188 const struct iovec *, unsigned int,
189 loff_t *, int, struct cred *);
190typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
191 const struct iovec *, unsigned int,
192 loff_t *, int, struct cred *);
193typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
194 loff_t *, int, size_t, read_actor_t,
195 void *, struct cred *);
196typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
197 int, unsigned int, void __user *);
198typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
199 struct cred *);
200typedef int (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int,
201 struct cred *);
202typedef int (*vop_access_t)(bhv_desc_t *, int, struct cred *);
203typedef int (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **,
204 int, vnode_t *, struct cred *);
205typedef int (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *,
206 vnode_t **, struct cred *);
207typedef int (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *);
208typedef int (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *,
209 struct cred *);
210typedef int (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *,
211 struct cred *);
212typedef int (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *,
213 vnode_t **, struct cred *);
214typedef int (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *);
215typedef int (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *,
216 int *);
217typedef int (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *,
218 char *, vnode_t **, struct cred *);
219typedef int (*vop_readlink_t)(bhv_desc_t *, struct uio *, int,
220 struct cred *);
221typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
222 xfs_off_t, xfs_off_t);
223typedef int (*vop_inactive_t)(bhv_desc_t *, struct cred *);
224typedef int (*vop_fid2_t)(bhv_desc_t *, struct fid *);
225typedef int (*vop_release_t)(bhv_desc_t *);
226typedef int (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
227typedef void (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
228typedef int (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
229 struct xfs_iomap *, int *);
230typedef int (*vop_reclaim_t)(bhv_desc_t *);
231typedef int (*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int,
232 struct cred *);
233typedef int (*vop_attr_set_t)(bhv_desc_t *, char *, char *, int, int,
234 struct cred *);
235typedef int (*vop_attr_remove_t)(bhv_desc_t *, char *, int, struct cred *);
236typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
237 struct attrlist_cursor_kern *, struct cred *);
238typedef void (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int);
239typedef void (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t);
240typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
241typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
242typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
243 uint64_t, int);
244typedef int (*vop_iflush_t)(bhv_desc_t *, int);
245
246
247typedef struct vnodeops {
248 bhv_position_t vn_position; /* position within behavior chain */
249 vop_open_t vop_open;
250 vop_read_t vop_read;
251 vop_write_t vop_write;
252 vop_sendfile_t vop_sendfile;
253 vop_ioctl_t vop_ioctl;
254 vop_getattr_t vop_getattr;
255 vop_setattr_t vop_setattr;
256 vop_access_t vop_access;
257 vop_lookup_t vop_lookup;
258 vop_create_t vop_create;
259 vop_remove_t vop_remove;
260 vop_link_t vop_link;
261 vop_rename_t vop_rename;
262 vop_mkdir_t vop_mkdir;
263 vop_rmdir_t vop_rmdir;
264 vop_readdir_t vop_readdir;
265 vop_symlink_t vop_symlink;
266 vop_readlink_t vop_readlink;
267 vop_fsync_t vop_fsync;
268 vop_inactive_t vop_inactive;
269 vop_fid2_t vop_fid2;
270 vop_rwlock_t vop_rwlock;
271 vop_rwunlock_t vop_rwunlock;
272 vop_bmap_t vop_bmap;
273 vop_reclaim_t vop_reclaim;
274 vop_attr_get_t vop_attr_get;
275 vop_attr_set_t vop_attr_set;
276 vop_attr_remove_t vop_attr_remove;
277 vop_attr_list_t vop_attr_list;
278 vop_link_removed_t vop_link_removed;
279 vop_vnode_change_t vop_vnode_change;
280 vop_ptossvp_t vop_tosspages;
281 vop_pflushinvalvp_t vop_flushinval_pages;
282 vop_pflushvp_t vop_flush_pages;
283 vop_release_t vop_release;
284 vop_iflush_t vop_iflush;
285} vnodeops_t;
286
287/*
288 * VOP's.
289 */
290#define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op)
291
292#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \
293 rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
294#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \
295 rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
296#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \
297 rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
298#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \
299 rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
300#define VOP_OPEN(vp, cr, rv) \
301 rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
302#define VOP_GETATTR(vp, vap, f, cr, rv) \
303 rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
304#define VOP_SETATTR(vp, vap, f, cr, rv) \
305 rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
306#define VOP_ACCESS(vp, mode, cr, rv) \
307 rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
308#define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv) \
309 rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
310#define VOP_CREATE(dvp,d,vap,vpp,cr,rv) \
311 rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
312#define VOP_REMOVE(dvp,d,cr,rv) \
313 rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
314#define VOP_LINK(tdvp,fvp,d,cr,rv) \
315 rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
316#define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv) \
317 rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
318#define VOP_MKDIR(dp,d,vap,vpp,cr,rv) \
319 rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
320#define VOP_RMDIR(dp,d,cr,rv) \
321 rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
322#define VOP_READDIR(vp,uiop,cr,eofp,rv) \
323 rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
324#define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv) \
325 rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
326#define VOP_READLINK(vp,uiop,fl,cr,rv) \
327 rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr)
328#define VOP_FSYNC(vp,f,cr,b,e,rv) \
329 rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
330#define VOP_INACTIVE(vp, cr, rv) \
331 rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
332#define VOP_RELEASE(vp, rv) \
333 rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
334#define VOP_FID2(vp, fidp, rv) \
335 rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
336#define VOP_RWLOCK(vp,i) \
337 (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
338#define VOP_RWLOCK_TRY(vp,i) \
339 _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
340#define VOP_RWUNLOCK(vp,i) \
341 (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
342#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv) \
343 rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
344#define VOP_RECLAIM(vp, rv) \
345 rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
346#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv) \
347 rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
348#define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv) \
349 rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
350#define VOP_ATTR_REMOVE(vp, name, flags, cred, rv) \
351 rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
352#define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv) \
353 rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
354#define VOP_LINK_REMOVED(vp, dvp, linkzero) \
355 (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
356#define VOP_VNODE_CHANGE(vp, cmd, val) \
357 (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
358/*
359 * These are page cache functions that now go thru VOPs.
360 * 'last' parameter is unused and left in for IRIX compatibility
361 */
362#define VOP_TOSS_PAGES(vp, first, last, fiopt) \
363 _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt)
364/*
365 * 'last' parameter is unused and left in for IRIX compatibility
366 */
367#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt) \
368 _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt)
369/*
370 * 'last' parameter is unused and left in for IRIX compatibility
371 */
372#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv) \
373 rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt)
374#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv) \
375 rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg)
376#define VOP_IFLUSH(vp, flags, rv) \
377 rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags)
378
379/*
380 * Flags for read/write calls - same values as IRIX
381 */
382#define IO_ISAIO 0x00001 /* don't wait for completion */
383#define IO_ISDIRECT 0x00004 /* bypass page cache */
384#define IO_INVIS 0x00020 /* don't update inode timestamps */
385
386/*
387 * Flags for VOP_IFLUSH call
388 */
389#define FLUSH_SYNC 1 /* wait for flush to complete */
390#define FLUSH_INODE 2 /* flush the inode itself */
391#define FLUSH_LOG 4 /* force the last log entry for
392 * this inode out to disk */
393
394/*
395 * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and
396 * VOP_FLUSH_PAGES.
397 */
398#define FI_NONE 0 /* none */
399#define FI_REMAPF 1 /* Do a remapf prior to the operation */
400#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation.
401 Prevent VM access to the pages until
402 the operation completes. */
403
404/*
405 * Vnode attributes. va_mask indicates those attributes the caller
406 * wants to set or extract.
407 */
408typedef struct vattr {
409 int va_mask; /* bit-mask of attributes present */
410 enum vtype va_type; /* vnode type (for create) */
411 mode_t va_mode; /* file access mode and type */
412 nlink_t va_nlink; /* number of references to file */
413 uid_t va_uid; /* owner user id */
414 gid_t va_gid; /* owner group id */
415 xfs_ino_t va_nodeid; /* file id */
416 xfs_off_t va_size; /* file size in bytes */
417 u_long va_blocksize; /* blocksize preferred for i/o */
418 struct timespec va_atime; /* time of last access */
419 struct timespec va_mtime; /* time of last modification */
420 struct timespec va_ctime; /* time file changed */
421 u_int va_gen; /* generation number of file */
422 xfs_dev_t va_rdev; /* device the special file represents */
423 __int64_t va_nblocks; /* number of blocks allocated */
424 u_long va_xflags; /* random extended file flags */
425 u_long va_extsize; /* file extent size */
426 u_long va_nextents; /* number of extents in file */
427 u_long va_anextents; /* number of attr extents in file */
428 int va_projid; /* project id */
429} vattr_t;
430
431/*
432 * setattr or getattr attributes
433 */
434#define XFS_AT_TYPE 0x00000001
435#define XFS_AT_MODE 0x00000002
436#define XFS_AT_UID 0x00000004
437#define XFS_AT_GID 0x00000008
438#define XFS_AT_FSID 0x00000010
439#define XFS_AT_NODEID 0x00000020
440#define XFS_AT_NLINK 0x00000040
441#define XFS_AT_SIZE 0x00000080
442#define XFS_AT_ATIME 0x00000100
443#define XFS_AT_MTIME 0x00000200
444#define XFS_AT_CTIME 0x00000400
445#define XFS_AT_RDEV 0x00000800
446#define XFS_AT_BLKSIZE 0x00001000
447#define XFS_AT_NBLOCKS 0x00002000
448#define XFS_AT_VCODE 0x00004000
449#define XFS_AT_MAC 0x00008000
450#define XFS_AT_UPDATIME 0x00010000
451#define XFS_AT_UPDMTIME 0x00020000
452#define XFS_AT_UPDCTIME 0x00040000
453#define XFS_AT_ACL 0x00080000
454#define XFS_AT_CAP 0x00100000
455#define XFS_AT_INF 0x00200000
456#define XFS_AT_XFLAGS 0x00400000
457#define XFS_AT_EXTSIZE 0x00800000
458#define XFS_AT_NEXTENTS 0x01000000
459#define XFS_AT_ANEXTENTS 0x02000000
460#define XFS_AT_PROJID 0x04000000
461#define XFS_AT_SIZE_NOPERM 0x08000000
462#define XFS_AT_GENCOUNT 0x10000000
463
464#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
465 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
466 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
467 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
468 XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
469 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
470
471#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
472 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
473 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
474 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
475
476#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
477
478#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
479
480#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
481 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
482 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
483
484/*
485 * Modes.
486 */
487#define VSUID S_ISUID /* set user id on execution */
488#define VSGID S_ISGID /* set group id on execution */
489#define VSVTX S_ISVTX /* save swapped text even after use */
490#define VREAD S_IRUSR /* read, write, execute permissions */
491#define VWRITE S_IWUSR
492#define VEXEC S_IXUSR
493
494#define MODEMASK S_IALLUGO /* mode bits plus permission bits */
495
496/*
497 * Check whether mandatory file locking is enabled.
498 */
499#define MANDLOCK(vp, mode) \
500 ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
501
502extern void vn_init(void);
503extern int vn_wait(struct vnode *);
504extern vnode_t *vn_initialize(struct inode *);
505
506/*
507 * Acquiring and invalidating vnodes:
508 *
509 * if (vn_get(vp, version, 0))
510 * ...;
511 * vn_purge(vp, version);
512 *
513 * vn_get and vn_purge must be called with vmap_t arguments, sampled
514 * while a lock that the vnode's VOP_RECLAIM function acquires is
515 * held, to ensure that the vnode sampled with the lock held isn't
516 * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
517 * and the subsequent vn_get or vn_purge.
518 */
519
520/*
521 * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
522 */
523typedef struct vnode_map {
524 vfs_t *v_vfsp;
525 vnumber_t v_number; /* in-core vnode number */
526 xfs_ino_t v_ino; /* inode # */
527} vmap_t;
528
529#define VMAP(vp, vmap) {(vmap).v_vfsp = (vp)->v_vfsp, \
530 (vmap).v_number = (vp)->v_number, \
531 (vmap).v_ino = (vp)->v_inode.i_ino; }
532
533extern void vn_purge(struct vnode *, vmap_t *);
534extern vnode_t *vn_get(struct vnode *, vmap_t *);
535extern int vn_revalidate(struct vnode *);
536extern void vn_revalidate_core(struct vnode *, vattr_t *);
537extern void vn_remove(struct vnode *);
538
539static inline int vn_count(struct vnode *vp)
540{
541 return atomic_read(&LINVFS_GET_IP(vp)->i_count);
542}
543
544/*
545 * Vnode reference counting functions (and macros for compatibility).
546 */
547extern vnode_t *vn_hold(struct vnode *);
548extern void vn_rele(struct vnode *);
549
550#if defined(XFS_VNODE_TRACE)
551#define VN_HOLD(vp) \
552 ((void)vn_hold(vp), \
553 vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address))
554#define VN_RELE(vp) \
555 (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \
556 iput(LINVFS_GET_IP(vp)))
557#else
558#define VN_HOLD(vp) ((void)vn_hold(vp))
559#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp)))
560#endif
561
562/*
563 * Vname handling macros.
564 */
565#define VNAME(dentry) ((char *) (dentry)->d_name.name)
566#define VNAMELEN(dentry) ((dentry)->d_name.len)
567#define VNAME_TO_VNODE(dentry) (LINVFS_GET_VP((dentry)->d_inode))
568
569/*
570 * Vnode spinlock manipulation.
571 */
572#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock)
573#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s)
574#define VN_FLAGSET(vp,b) vn_flagset(vp,b)
575#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b)
576
577static __inline__ void vn_flagset(struct vnode *vp, uint flag)
578{
579 spin_lock(&vp->v_lock);
580 vp->v_flag |= flag;
581 spin_unlock(&vp->v_lock);
582}
583
584static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
585{
586 spin_lock(&vp->v_lock);
587 vp->v_flag &= ~flag;
588 spin_unlock(&vp->v_lock);
589}
590
591/*
592 * Update modify/access/change times on the vnode
593 */
594#define VN_MTIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_mtime = *(tvp))
595#define VN_ATIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_atime = *(tvp))
596#define VN_CTIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_ctime = *(tvp))
597
598/*
599 * Dealing with bad inodes
600 */
601static inline void vn_mark_bad(struct vnode *vp)
602{
603 make_bad_inode(LINVFS_GET_IP(vp));
604}
605
606static inline int VN_BAD(struct vnode *vp)
607{
608 return is_bad_inode(LINVFS_GET_IP(vp));
609}
610
611/*
612 * Some useful predicates.
613 */
614#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
615#define VN_CACHED(vp) (LINVFS_GET_IP(vp)->i_mapping->nrpages)
616#define VN_DIRTY(vp) mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \
617 PAGECACHE_TAG_DIRTY)
618#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED)
619#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED)
620
621/*
622 * Flags to VOP_SETATTR/VOP_GETATTR.
623 */
624#define ATTR_UTIME 0x01 /* non-default utime(2) request */
625#define ATTR_DMI 0x08 /* invocation from a DMI function */
626#define ATTR_LAZY 0x80 /* set/get attributes lazily */
627#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */
628
629/*
630 * Flags to VOP_FSYNC and VOP_RECLAIM.
631 */
632#define FSYNC_NOWAIT 0 /* asynchronous flush */
633#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */
634#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */
635#define FSYNC_DATA 0x4 /* synchronous fsync of data only */
636
637/*
638 * Tracking vnode activity.
639 */
640#if defined(XFS_VNODE_TRACE)
641
642#define VNODE_TRACE_SIZE 16 /* number of trace entries */
643#define VNODE_KTRACE_ENTRY 1
644#define VNODE_KTRACE_EXIT 2
645#define VNODE_KTRACE_HOLD 3
646#define VNODE_KTRACE_REF 4
647#define VNODE_KTRACE_RELE 5
648
649extern void vn_trace_entry(struct vnode *, char *, inst_t *);
650extern void vn_trace_exit(struct vnode *, char *, inst_t *);
651extern void vn_trace_hold(struct vnode *, char *, int, inst_t *);
652extern void vn_trace_ref(struct vnode *, char *, int, inst_t *);
653extern void vn_trace_rele(struct vnode *, char *, int, inst_t *);
654
655#define VN_TRACE(vp) \
656 vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address)
657#else
658#define vn_trace_entry(a,b,c)
659#define vn_trace_exit(a,b,c)
660#define vn_trace_hold(a,b,c,d)
661#define vn_trace_ref(a,b,c,d)
662#define vn_trace_rele(a,b,c,d)
663#define VN_TRACE(vp)
664#endif
665
666#endif /* __XFS_VNODE_H__ */