aboutsummaryrefslogtreecommitdiffstats
path: root/fs/logfs/logfs_abi.h
diff options
context:
space:
mode:
Diffstat (limited to 'fs/logfs/logfs_abi.h')
-rw-r--r--fs/logfs/logfs_abi.h627
1 files changed, 627 insertions, 0 deletions
diff --git a/fs/logfs/logfs_abi.h b/fs/logfs/logfs_abi.h
new file mode 100644
index 000000000000..5d3782ddecc8
--- /dev/null
+++ b/fs/logfs/logfs_abi.h
@@ -0,0 +1,627 @@
1/*
2 * fs/logfs/logfs_abi.h
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7 *
8 * Public header for logfs.
9 */
10#ifndef FS_LOGFS_LOGFS_ABI_H
11#define FS_LOGFS_LOGFS_ABI_H
12
13/* For out-of-kernel compiles */
14#ifndef BUILD_BUG_ON
15#define BUILD_BUG_ON(condition) /**/
16#endif
17
18#define SIZE_CHECK(type, size) \
19static inline void check_##type(void) \
20{ \
21 BUILD_BUG_ON(sizeof(struct type) != (size)); \
22}
23
24/*
25 * Throughout the logfs code, we're constantly dealing with blocks at
26 * various positions or offsets. To remove confusion, we stricly
27 * distinguish between a "position" - the logical position within a
28 * file and an "offset" - the physical location within the device.
29 *
30 * Any usage of the term offset for a logical location or position for
31 * a physical one is a bug and should get fixed.
32 */
33
34/*
35 * Block are allocated in one of several segments depending on their
36 * level. The following levels are used:
37 * 0 - regular data block
38 * 1 - i1 indirect blocks
39 * 2 - i2 indirect blocks
40 * 3 - i3 indirect blocks
41 * 4 - i4 indirect blocks
42 * 5 - i5 indirect blocks
43 * 6 - ifile data blocks
44 * 7 - ifile i1 indirect blocks
45 * 8 - ifile i2 indirect blocks
46 * 9 - ifile i3 indirect blocks
47 * 10 - ifile i4 indirect blocks
48 * 11 - ifile i5 indirect blocks
49 * Potential levels to be used in the future:
50 * 12 - gc recycled blocks, long-lived data
51 * 13 - replacement blocks, short-lived data
52 *
53 * Levels 1-11 are necessary for robust gc operations and help seperate
54 * short-lived metadata from longer-lived file data. In the future,
55 * file data should get seperated into several segments based on simple
56 * heuristics. Old data recycled during gc operation is expected to be
57 * long-lived. New data is of uncertain life expectancy. New data
58 * used to replace older blocks in existing files is expected to be
59 * short-lived.
60 */
61
62
63/* Magic numbers. 64bit for superblock, 32bit for statfs f_type */
64#define LOGFS_MAGIC 0xb21f205ac97e8168ull
65#define LOGFS_MAGIC_U32 0xc97e8168u
66
67/*
68 * Various blocksize related macros. Blocksize is currently fixed at 4KiB.
69 * Sooner or later that should become configurable and the macros replaced
70 * by something superblock-dependent. Pointers in indirect blocks are and
71 * will remain 64bit.
72 *
73 * LOGFS_BLOCKSIZE - self-explaining
74 * LOGFS_BLOCK_FACTOR - number of pointers per indirect block
75 * LOGFS_BLOCK_BITS - log2 of LOGFS_BLOCK_FACTOR, used for shifts
76 */
77#define LOGFS_BLOCKSIZE (4096ull)
78#define LOGFS_BLOCK_FACTOR (LOGFS_BLOCKSIZE / sizeof(u64))
79#define LOGFS_BLOCK_BITS (9)
80
81/*
82 * Number of blocks at various levels of indirection. There are 16 direct
83 * block pointers plus a single indirect pointer.
84 */
85#define I0_BLOCKS (16)
86#define I1_BLOCKS LOGFS_BLOCK_FACTOR
87#define I2_BLOCKS (LOGFS_BLOCK_FACTOR * I1_BLOCKS)
88#define I3_BLOCKS (LOGFS_BLOCK_FACTOR * I2_BLOCKS)
89#define I4_BLOCKS (LOGFS_BLOCK_FACTOR * I3_BLOCKS)
90#define I5_BLOCKS (LOGFS_BLOCK_FACTOR * I4_BLOCKS)
91
92#define INDIRECT_INDEX I0_BLOCKS
93#define LOGFS_EMBEDDED_FIELDS (I0_BLOCKS + 1)
94
95/*
96 * Sizes at which files require another level of indirection. Files smaller
97 * than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
98 * similar like ext2 fast symlinks.
99 *
100 * Data at a position smaller than LOGFS_I0_SIZE is accessed through the
101 * direct pointers, else through the 1x indirect pointer and so forth.
102 */
103#define LOGFS_EMBEDDED_SIZE (LOGFS_EMBEDDED_FIELDS * sizeof(u64))
104#define LOGFS_I0_SIZE (I0_BLOCKS * LOGFS_BLOCKSIZE)
105#define LOGFS_I1_SIZE (I1_BLOCKS * LOGFS_BLOCKSIZE)
106#define LOGFS_I2_SIZE (I2_BLOCKS * LOGFS_BLOCKSIZE)
107#define LOGFS_I3_SIZE (I3_BLOCKS * LOGFS_BLOCKSIZE)
108#define LOGFS_I4_SIZE (I4_BLOCKS * LOGFS_BLOCKSIZE)
109#define LOGFS_I5_SIZE (I5_BLOCKS * LOGFS_BLOCKSIZE)
110
111/*
112 * Each indirect block pointer must have this flag set, if all block pointers
113 * behind it are set, i.e. there is no hole hidden in the shadow of this
114 * indirect block pointer.
115 */
116#define LOGFS_FULLY_POPULATED (1ULL << 63)
117#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)
118
119/*
120 * LogFS needs to seperate data into levels. Each level is defined as the
121 * maximal possible distance from the master inode (inode of the inode file).
122 * Data blocks reside on level 0, 1x indirect block on level 1, etc.
123 * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
124 * This effort is necessary to guarantee garbage collection to always make
125 * progress.
126 *
127 * LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
128 * LOGFS_MAX_LEVELS is one more for the actual data level of a file. It is
129 * the maximal number of levels for one file.
130 * LOGFS_NO_AREAS is twice that, as the inode file and regular files are
131 * effectively stacked on top of each other.
132 */
133#define LOGFS_MAX_INDIRECT (5)
134#define LOGFS_MAX_LEVELS (LOGFS_MAX_INDIRECT + 1)
135#define LOGFS_NO_AREAS (2 * LOGFS_MAX_LEVELS)
136
137/* Maximum size of filenames */
138#define LOGFS_MAX_NAMELEN (255)
139
140/* Number of segments in the primary journal. */
141#define LOGFS_JOURNAL_SEGS (16)
142
143/* Maximum number of free/erased/etc. segments in journal entries */
144#define MAX_CACHED_SEGS (64)
145
146
147/*
148 * LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
149 * LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
150 * its header,
151 * LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
152 * its segment header and the padded space at the end when no further objects
153 * fit.
154 */
155#define LOGFS_OBJECT_HEADERSIZE (0x1c)
156#define LOGFS_SEGMENT_HEADERSIZE (0x18)
157#define LOGFS_MAX_OBJECTSIZE (LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)
158#define LOGFS_SEGMENT_RESERVE \
159 (LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)
160
161/*
162 * Segment types:
163 * SEG_SUPER - Data or indirect block
164 * SEG_JOURNAL - Inode
165 * SEG_OSTORE - Dentry
166 */
167enum {
168 SEG_SUPER = 0x01,
169 SEG_JOURNAL = 0x02,
170 SEG_OSTORE = 0x03,
171};
172
173/**
174 * struct logfs_segment_header - per-segment header in the ostore
175 *
176 * @crc: crc32 of header (there is no data)
177 * @pad: unused, must be 0
178 * @type: segment type, see above
179 * @level: GC level for all objects in this segment
180 * @segno: segment number
181 * @ec: erase count for this segment
182 * @gec: global erase count at time of writing
183 */
184struct logfs_segment_header {
185 __be32 crc;
186 __be16 pad;
187 __u8 type;
188 __u8 level;
189 __be32 segno;
190 __be32 ec;
191 __be64 gec;
192};
193
194SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);
195
196/**
197 * struct logfs_disk_super - on-medium superblock
198 *
199 * @ds_magic: magic number, must equal LOGFS_MAGIC
200 * @ds_crc: crc32 of structure starting with the next field
201 * @ds_ifile_levels: maximum number of levels for ifile
202 * @ds_iblock_levels: maximum number of levels for regular files
203 * @ds_data_levels: number of seperate levels for data
204 * @pad0: reserved, must be 0
205 * @ds_feature_incompat: incompatible filesystem features
206 * @ds_feature_ro_compat: read-only compatible filesystem features
207 * @ds_feature_compat: compatible filesystem features
208 * @ds_flags: flags
209 * @ds_segment_shift: log2 of segment size
210 * @ds_block_shift: log2 of block size
211 * @ds_write_shift: log2 of write size
212 * @pad1: reserved, must be 0
213 * @ds_journal_seg: segments used by primary journal
214 * @ds_root_reserve: bytes reserved for the superuser
215 * @ds_speed_reserve: bytes reserved to speed up GC
216 * @ds_bad_seg_reserve: number of segments reserved to handle bad blocks
217 * @pad2: reserved, must be 0
218 * @pad3: reserved, must be 0
219 *
220 * Contains only read-only fields. Read-write fields like the amount of used
221 * space is tracked in the dynamic superblock, which is stored in the journal.
222 */
223struct logfs_disk_super {
224 struct logfs_segment_header ds_sh;
225 __be64 ds_magic;
226
227 __be32 ds_crc;
228 __u8 ds_ifile_levels;
229 __u8 ds_iblock_levels;
230 __u8 ds_data_levels;
231 __u8 ds_segment_shift;
232 __u8 ds_block_shift;
233 __u8 ds_write_shift;
234 __u8 pad0[6];
235
236 __be64 ds_filesystem_size;
237 __be32 ds_segment_size;
238 __be32 ds_bad_seg_reserve;
239
240 __be64 ds_feature_incompat;
241 __be64 ds_feature_ro_compat;
242
243 __be64 ds_feature_compat;
244 __be64 ds_feature_flags;
245
246 __be64 ds_root_reserve;
247 __be64 ds_speed_reserve;
248
249 __be32 ds_journal_seg[LOGFS_JOURNAL_SEGS];
250
251 __be64 ds_super_ofs[2];
252 __be64 pad3[8];
253};
254
255SIZE_CHECK(logfs_disk_super, 256);
256
257/*
258 * Object types:
259 * OBJ_BLOCK - Data or indirect block
260 * OBJ_INODE - Inode
261 * OBJ_DENTRY - Dentry
262 */
263enum {
264 OBJ_BLOCK = 0x04,
265 OBJ_INODE = 0x05,
266 OBJ_DENTRY = 0x06,
267};
268
269/**
270 * struct logfs_object_header - per-object header in the ostore
271 *
272 * @crc: crc32 of header, excluding data_crc
273 * @len: length of data
274 * @type: object type, see above
275 * @compr: compression type
276 * @ino: inode number
277 * @bix: block index
278 * @data_crc: crc32 of payload
279 */
280struct logfs_object_header {
281 __be32 crc;
282 __be16 len;
283 __u8 type;
284 __u8 compr;
285 __be64 ino;
286 __be64 bix;
287 __be32 data_crc;
288} __attribute__((packed));
289
290SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);
291
292/*
293 * Reserved inode numbers:
294 * LOGFS_INO_MASTER - master inode (for inode file)
295 * LOGFS_INO_ROOT - root directory
296 * LOGFS_INO_SEGFILE - per-segment used bytes and erase count
297 */
298enum {
299 LOGFS_INO_MAPPING = 0x00,
300 LOGFS_INO_MASTER = 0x01,
301 LOGFS_INO_ROOT = 0x02,
302 LOGFS_INO_SEGFILE = 0x03,
303 LOGFS_RESERVED_INOS = 0x10,
304};
305
306/*
307 * Inode flags. High bits should never be written to the medium. They are
308 * reserved for in-memory usage.
309 * Low bits should either remain in sync with the corresponding FS_*_FL or
310 * reuse slots that obviously don't make sense for logfs.
311 *
312 * LOGFS_IF_DIRTY Inode must be written back
313 * LOGFS_IF_ZOMBIE Inode has been deleted
314 * LOGFS_IF_STILLBORN -ENOSPC happened when creating inode
315 */
316#define LOGFS_IF_COMPRESSED 0x00000004 /* == FS_COMPR_FL */
317#define LOGFS_IF_DIRTY 0x20000000
318#define LOGFS_IF_ZOMBIE 0x40000000
319#define LOGFS_IF_STILLBORN 0x80000000
320
321/* Flags available to chattr */
322#define LOGFS_FL_USER_VISIBLE (LOGFS_IF_COMPRESSED)
323#define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
324/* Flags inherited from parent directory on file/directory creation */
325#define LOGFS_FL_INHERITED (LOGFS_IF_COMPRESSED)
326
327/**
328 * struct logfs_disk_inode - on-medium inode
329 *
330 * @di_mode: file mode
331 * @di_pad: reserved, must be 0
332 * @di_flags: inode flags, see above
333 * @di_uid: user id
334 * @di_gid: group id
335 * @di_ctime: change time
336 * @di_mtime: modify time
337 * @di_refcount: reference count (aka nlink or link count)
338 * @di_generation: inode generation, for nfs
339 * @di_used_bytes: number of bytes used
340 * @di_size: file size
341 * @di_data: data pointers
342 */
343struct logfs_disk_inode {
344 __be16 di_mode;
345 __u8 di_height;
346 __u8 di_pad;
347 __be32 di_flags;
348 __be32 di_uid;
349 __be32 di_gid;
350
351 __be64 di_ctime;
352 __be64 di_mtime;
353
354 __be64 di_atime;
355 __be32 di_refcount;
356 __be32 di_generation;
357
358 __be64 di_used_bytes;
359 __be64 di_size;
360
361 __be64 di_data[LOGFS_EMBEDDED_FIELDS];
362};
363
364SIZE_CHECK(logfs_disk_inode, 200);
365
366#define INODE_POINTER_OFS \
367 (offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))
368#define INODE_USED_OFS \
369 (offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))
370#define INODE_SIZE_OFS \
371 (offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))
372#define INODE_HEIGHT_OFS (0)
373
374/**
375 * struct logfs_disk_dentry - on-medium dentry structure
376 *
377 * @ino: inode number
378 * @namelen: length of file name
379 * @type: file type, identical to bits 12..15 of mode
380 * @name: file name
381 */
382/* FIXME: add 6 bytes of padding to remove the __packed */
383struct logfs_disk_dentry {
384 __be64 ino;
385 __be16 namelen;
386 __u8 type;
387 __u8 name[LOGFS_MAX_NAMELEN];
388} __attribute__((packed));
389
390SIZE_CHECK(logfs_disk_dentry, 266);
391
392#define RESERVED 0xffffffff
393#define BADSEG 0xffffffff
394/**
395 * struct logfs_segment_entry - segment file entry
396 *
397 * @ec_level: erase count and level
398 * @valid: number of valid bytes
399 *
400 * Segment file contains one entry for every segment. ec_level contains the
401 * erasecount in the upper 28 bits and the level in the lower 4 bits. An
402 * ec_level of BADSEG (-1) identifies bad segments. valid contains the number
403 * of valid bytes or RESERVED (-1 again) if the segment is used for either the
404 * superblock or the journal, or when the segment is bad.
405 */
406struct logfs_segment_entry {
407 __be32 ec_level;
408 __be32 valid;
409};
410
411SIZE_CHECK(logfs_segment_entry, 8);
412
413/**
414 * struct logfs_journal_header - header for journal entries (JEs)
415 *
416 * @h_crc: crc32 of journal entry
417 * @h_len: length of compressed journal entry,
418 * not including header
419 * @h_datalen: length of uncompressed data
420 * @h_type: JE type
421 * @h_version: unnormalized version of journal entry
422 * @h_compr: compression type
423 * @h_pad: reserved
424 */
425struct logfs_journal_header {
426 __be32 h_crc;
427 __be16 h_len;
428 __be16 h_datalen;
429 __be16 h_type;
430 __be16 h_version;
431 __u8 h_compr;
432 __u8 h_pad[3];
433};
434
435SIZE_CHECK(logfs_journal_header, 16);
436
437/*
438 * Life expectency of data.
439 * VIM_DEFAULT - default vim
440 * VIM_SEGFILE - for segment file only - very short-living
441 * VIM_GC - GC'd data - likely long-living
442 */
443enum logfs_vim {
444 VIM_DEFAULT = 0,
445 VIM_SEGFILE = 1,
446};
447
448/**
449 * struct logfs_je_area - wbuf header
450 *
451 * @segno: segment number of area
452 * @used_bytes: number of bytes already used
453 * @gc_level: GC level
454 * @vim: life expectancy of data
455 *
456 * "Areas" are segments currently being used for writing. There is at least
457 * one area per GC level. Several may be used to seperate long-living from
458 * short-living data. If an area with unknown vim is encountered, it can
459 * simply be closed.
460 * The write buffer immediately follow this header.
461 */
462struct logfs_je_area {
463 __be32 segno;
464 __be32 used_bytes;
465 __u8 gc_level;
466 __u8 vim;
467} __attribute__((packed));
468
469SIZE_CHECK(logfs_je_area, 10);
470
471#define MAX_JOURNAL_HEADER \
472 (sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))
473
474/**
475 * struct logfs_je_dynsb - dynamic superblock
476 *
477 * @ds_gec: global erase count
478 * @ds_sweeper: current position of GC "sweeper"
479 * @ds_rename_dir: source directory ino (see dir.c documentation)
480 * @ds_rename_pos: position of source dd (see dir.c documentation)
481 * @ds_victim_ino: victims of incomplete dir operation (see dir.c)
482 * @ds_victim_ino: parent inode of victim (see dir.c)
483 * @ds_used_bytes: number of used bytes
484 */
485struct logfs_je_dynsb {
486 __be64 ds_gec;
487 __be64 ds_sweeper;
488
489 __be64 ds_rename_dir;
490 __be64 ds_rename_pos;
491
492 __be64 ds_victim_ino;
493 __be64 ds_victim_parent; /* XXX */
494
495 __be64 ds_used_bytes;
496 __be32 ds_generation;
497 __be32 pad;
498};
499
500SIZE_CHECK(logfs_je_dynsb, 64);
501
502/**
503 * struct logfs_je_anchor - anchor of filesystem tree, aka master inode
504 *
505 * @da_size: size of inode file
506 * @da_last_ino: last created inode
507 * @da_used_bytes: number of bytes used
508 * @da_data: data pointers
509 */
510struct logfs_je_anchor {
511 __be64 da_size;
512 __be64 da_last_ino;
513
514 __be64 da_used_bytes;
515 u8 da_height;
516 u8 pad[7];
517
518 __be64 da_data[LOGFS_EMBEDDED_FIELDS];
519};
520
521SIZE_CHECK(logfs_je_anchor, 168);
522
523/**
524 * struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
525 *
526 * @so_segment: segments used for 2nd journal
527 *
528 * Length of the array is given by h_len field in the header.
529 */
530struct logfs_je_spillout {
531 __be64 so_segment[0];
532};
533
534SIZE_CHECK(logfs_je_spillout, 0);
535
536/**
537 * struct logfs_je_journal_ec - erase counts for all journal segments
538 *
539 * @ec: erase count
540 *
541 * Length of the array is given by h_len field in the header.
542 */
543struct logfs_je_journal_ec {
544 __be32 ec[0];
545};
546
547SIZE_CHECK(logfs_je_journal_ec, 0);
548
549/**
550 * struct logfs_je_free_segments - list of free segmetns with erase count
551 */
552struct logfs_je_free_segments {
553 __be32 segno;
554 __be32 ec;
555};
556
557SIZE_CHECK(logfs_je_free_segments, 8);
558
559/**
560 * struct logfs_seg_alias - list of segment aliases
561 */
562struct logfs_seg_alias {
563 __be32 old_segno;
564 __be32 new_segno;
565};
566
567SIZE_CHECK(logfs_seg_alias, 8);
568
569/**
570 * struct logfs_obj_alias - list of object aliases
571 */
572struct logfs_obj_alias {
573 __be64 ino;
574 __be64 bix;
575 __be64 val;
576 u8 level;
577 u8 pad[5];
578 __be16 child_no;
579};
580
581SIZE_CHECK(logfs_obj_alias, 32);
582
583/**
584 * Compression types.
585 *
586 * COMPR_NONE - uncompressed
587 * COMPR_ZLIB - compressed with zlib
588 */
589enum {
590 COMPR_NONE = 0,
591 COMPR_ZLIB = 1,
592};
593
594/*
595 * Journal entries come in groups of 16. First group contains unique
596 * entries, next groups contain one entry per level
597 *
598 * JE_FIRST - smallest possible journal entry number
599 *
600 * JEG_BASE - base group, containing unique entries
601 * JE_COMMIT - commit entry, validates all previous entries
602 * JE_DYNSB - dynamic superblock, anything that ought to be in the
603 * superblock but cannot because it is read-write data
604 * JE_ANCHOR - anchor aka master inode aka inode file's inode
605 * JE_ERASECOUNT erasecounts for all journal segments
606 * JE_SPILLOUT - unused
607 * JE_SEG_ALIAS - aliases segments
608 * JE_AREA - area description
609 *
610 * JE_LAST - largest possible journal entry number
611 */
612enum {
613 JE_FIRST = 0x01,
614
615 JEG_BASE = 0x00,
616 JE_COMMIT = 0x02,
617 JE_DYNSB = 0x03,
618 JE_ANCHOR = 0x04,
619 JE_ERASECOUNT = 0x05,
620 JE_SPILLOUT = 0x06,
621 JE_OBJ_ALIAS = 0x0d,
622 JE_AREA = 0x0e,
623
624 JE_LAST = 0x0e,
625};
626
627#endif