diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/fiemap.txt | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt new file mode 100644 index 000000000000..1e3defcfe50b --- /dev/null +++ b/Documentation/filesystems/fiemap.txt | |||
@@ -0,0 +1,228 @@ | |||
1 | ============ | ||
2 | Fiemap Ioctl | ||
3 | ============ | ||
4 | |||
5 | The fiemap ioctl is an efficient method for userspace to get file | ||
6 | extent mappings. Instead of block-by-block mapping (such as bmap), fiemap | ||
7 | returns a list of extents. | ||
8 | |||
9 | |||
10 | Request Basics | ||
11 | -------------- | ||
12 | |||
13 | A fiemap request is encoded within struct fiemap: | ||
14 | |||
15 | struct fiemap { | ||
16 | __u64 fm_start; /* logical offset (inclusive) at | ||
17 | * which to start mapping (in) */ | ||
18 | __u64 fm_length; /* logical length of mapping which | ||
19 | * userspace cares about (in) */ | ||
20 | __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ | ||
21 | __u32 fm_mapped_extents; /* number of extents that were | ||
22 | * mapped (out) */ | ||
23 | __u32 fm_extent_count; /* size of fm_extents array (in) */ | ||
24 | __u32 fm_reserved; | ||
25 | struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ | ||
26 | }; | ||
27 | |||
28 | |||
29 | fm_start, and fm_length specify the logical range within the file | ||
30 | which the process would like mappings for. Extents returned mirror | ||
31 | those on disk - that is, the logical offset of the 1st returned extent | ||
32 | may start before fm_start, and the range covered by the last returned | ||
33 | extent may end after fm_length. All offsets and lengths are in bytes. | ||
34 | |||
35 | Certain flags to modify the way in which mappings are looked up can be | ||
36 | set in fm_flags. If the kernel doesn't understand some particular | ||
37 | flags, it will return EBADR and the contents of fm_flags will contain | ||
38 | the set of flags which caused the error. If the kernel is compatible | ||
39 | with all flags passed, the contents of fm_flags will be unmodified. | ||
40 | It is up to userspace to determine whether rejection of a particular | ||
41 | flag is fatal to it's operation. This scheme is intended to allow the | ||
42 | fiemap interface to grow in the future but without losing | ||
43 | compatibility with old software. | ||
44 | |||
45 | fm_extent_count specifies the number of elements in the fm_extents[] array | ||
46 | that can be used to return extents. If fm_extent_count is zero, then the | ||
47 | fm_extents[] array is ignored (no extents will be returned), and the | ||
48 | fm_mapped_extents count will hold the number of extents needed in | ||
49 | fm_extents[] to hold the file's current mapping. Note that there is | ||
50 | nothing to prevent the file from changing between calls to FIEMAP. | ||
51 | |||
52 | The following flags can be set in fm_flags: | ||
53 | |||
54 | * FIEMAP_FLAG_SYNC | ||
55 | If this flag is set, the kernel will sync the file before mapping extents. | ||
56 | |||
57 | * FIEMAP_FLAG_XATTR | ||
58 | If this flag is set, the extents returned will describe the inodes | ||
59 | extended attribute lookup tree, instead of it's data tree. | ||
60 | |||
61 | |||
62 | Extent Mapping | ||
63 | -------------- | ||
64 | |||
65 | Extent information is returned within the embedded fm_extents array | ||
66 | which userspace must allocate along with the fiemap structure. The | ||
67 | number of elements in the fiemap_extents[] array should be passed via | ||
68 | fm_extent_count. The number of extents mapped by kernel will be | ||
69 | returned via fm_mapped_extents. If the number of fiemap_extents | ||
70 | allocated is less than would be required to map the requested range, | ||
71 | the maximum number of extents that can be mapped in the fm_extent[] | ||
72 | array will be returned and fm_mapped_extents will be equal to | ||
73 | fm_extent_count. In that case, the last extent in the array will not | ||
74 | complete the requested range and will not have the FIEMAP_EXTENT_LAST | ||
75 | flag set (see the next section on extent flags). | ||
76 | |||
77 | Each extent is described by a single fiemap_extent structure as | ||
78 | returned in fm_extents. | ||
79 | |||
80 | struct fiemap_extent { | ||
81 | __u64 fe_logical; /* logical offset in bytes for the start of | ||
82 | * the extent */ | ||
83 | __u64 fe_physical; /* physical offset in bytes for the start | ||
84 | * of the extent */ | ||
85 | __u64 fe_length; /* length in bytes for the extent */ | ||
86 | __u64 fe_reserved64[2]; | ||
87 | __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ | ||
88 | __u32 fe_reserved[3]; | ||
89 | }; | ||
90 | |||
91 | All offsets and lengths are in bytes and mirror those on disk. It is valid | ||
92 | for an extents logical offset to start before the request or it's logical | ||
93 | length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is | ||
94 | returned, fe_logical, fe_physical, and fe_length will be aligned to the | ||
95 | block size of the file system. With the exception of extents flagged as | ||
96 | FIEMAP_EXTENT_MERGED, adjacent extents will not be merged. | ||
97 | |||
98 | The fe_flags field contains flags which describe the extent returned. | ||
99 | A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in | ||
100 | the file so that the process making fiemap calls can determine when no | ||
101 | more extents are available, without having to call the ioctl again. | ||
102 | |||
103 | Some flags are intentionally vague and will always be set in the | ||
104 | presence of other more specific flags. This way a program looking for | ||
105 | a general property does not have to know all existing and future flags | ||
106 | which imply that property. | ||
107 | |||
108 | For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL | ||
109 | are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking | ||
110 | for inline or tail-packed data can key on the specific flag. Software | ||
111 | which simply cares not to try operating on non-aligned extents | ||
112 | however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to | ||
113 | worry about all present and future flags which might imply unaligned | ||
114 | data. Note that the opposite is not true - it would be valid for | ||
115 | FIEMAP_EXTENT_NOT_ALIGNED to appear alone. | ||
116 | |||
117 | * FIEMAP_EXTENT_LAST | ||
118 | This is the last extent in the file. A mapping attempt past this | ||
119 | extent will return nothing. | ||
120 | |||
121 | * FIEMAP_EXTENT_UNKNOWN | ||
122 | The location of this extent is currently unknown. This may indicate | ||
123 | the data is stored on an inaccessible volume or that no storage has | ||
124 | been allocated for the file yet. | ||
125 | |||
126 | * FIEMAP_EXTENT_DELALLOC | ||
127 | - This will also set FIEMAP_EXTENT_UNKNOWN. | ||
128 | Delayed allocation - while there is data for this extent, it's | ||
129 | physical location has not been allocated yet. | ||
130 | |||
131 | * FIEMAP_EXTENT_ENCODED | ||
132 | This extent does not consist of plain filesystem blocks but is | ||
133 | encoded (e.g. encrypted or compressed). Reading the data in this | ||
134 | extent via I/O to the block device will have undefined results. | ||
135 | |||
136 | Note that it is *always* undefined to try to update the data | ||
137 | in-place by writing to the indicated location without the | ||
138 | assistance of the filesystem, or to access the data using the | ||
139 | information returned by the FIEMAP interface while the filesystem | ||
140 | is mounted. In other words, user applications may only read the | ||
141 | extent data via I/O to the block device while the filesystem is | ||
142 | unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is | ||
143 | clear; user applications must not try reading or writing to the | ||
144 | filesystem via the block device under any other circumstances. | ||
145 | |||
146 | * FIEMAP_EXTENT_DATA_ENCRYPTED | ||
147 | - This will also set FIEMAP_EXTENT_ENCODED | ||
148 | The data in this extent has been encrypted by the file system. | ||
149 | |||
150 | * FIEMAP_EXTENT_NOT_ALIGNED | ||
151 | Extent offsets and length are not guaranteed to be block aligned. | ||
152 | |||
153 | * FIEMAP_EXTENT_DATA_INLINE | ||
154 | This will also set FIEMAP_EXTENT_NOT_ALIGNED | ||
155 | Data is located within a meta data block. | ||
156 | |||
157 | * FIEMAP_EXTENT_DATA_TAIL | ||
158 | This will also set FIEMAP_EXTENT_NOT_ALIGNED | ||
159 | Data is packed into a block with data from other files. | ||
160 | |||
161 | * FIEMAP_EXTENT_UNWRITTEN | ||
162 | Unwritten extent - the extent is allocated but it's data has not been | ||
163 | initialized. This indicates the extent's data will be all zero if read | ||
164 | through the filesystem but the contents are undefined if read directly from | ||
165 | the device. | ||
166 | |||
167 | * FIEMAP_EXTENT_MERGED | ||
168 | This will be set when a file does not support extents, i.e., it uses a block | ||
169 | based addressing scheme. Since returning an extent for each block back to | ||
170 | userspace would be highly inefficient, the kernel will try to merge most | ||
171 | adjacent blocks into 'extents'. | ||
172 | |||
173 | |||
174 | VFS -> File System Implementation | ||
175 | --------------------------------- | ||
176 | |||
177 | File systems wishing to support fiemap must implement a ->fiemap callback on | ||
178 | their inode_operations structure. The fs ->fiemap call is responsible for | ||
179 | defining it's set of supported fiemap flags, and calling a helper function on | ||
180 | each discovered extent: | ||
181 | |||
182 | struct inode_operations { | ||
183 | ... | ||
184 | |||
185 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | ||
186 | u64 len); | ||
187 | |||
188 | ->fiemap is passed struct fiemap_extent_info which describes the | ||
189 | fiemap request: | ||
190 | |||
191 | struct fiemap_extent_info { | ||
192 | unsigned int fi_flags; /* Flags as passed from user */ | ||
193 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | ||
194 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | ||
195 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */ | ||
196 | }; | ||
197 | |||
198 | It is intended that the file system should not need to access any of this | ||
199 | structure directly. | ||
200 | |||
201 | |||
202 | Flag checking should be done at the beginning of the ->fiemap callback via the | ||
203 | fiemap_check_flags() helper: | ||
204 | |||
205 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | ||
206 | |||
207 | The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The | ||
208 | set of fiemap flags which the fs understands should be passed via fs_flags. If | ||
209 | fiemap_check_flags finds invalid user flags, it will place the bad values in | ||
210 | fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from | ||
211 | fiemap_check_flags(), it should immediately exit, returning that error back to | ||
212 | ioctl_fiemap(). | ||
213 | |||
214 | |||
215 | For each extent in the request range, the file system should call | ||
216 | the helper function, fiemap_fill_next_extent(): | ||
217 | |||
218 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | ||
219 | u64 phys, u64 len, u32 flags, u32 dev); | ||
220 | |||
221 | fiemap_fill_next_extent() will use the passed values to populate the | ||
222 | next free extent in the fm_extents array. 'General' extent flags will | ||
223 | automatically be set from specific flags on behalf of the calling file | ||
224 | system so that the userspace API is not broken. | ||
225 | |||
226 | fiemap_fill_next_extent() returns 0 on success, and 1 when the | ||
227 | user-supplied fm_extents array is full. If an error is encountered | ||
228 | while copying the extent to user memory, -EFAULT will be returned. | ||