aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_filestream.h
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-07-10 21:09:12 -0400
committerTim Shimmin <tes@chook.melbourne.sgi.com>2007-07-14 01:40:53 -0400
commit2a82b8be8a8dacb48cb7371449a7a9daa558b4a8 (patch)
tree44e6a81dd0e7d7dc634e04b9230b5262a254c5ee /fs/xfs/xfs_filestream.h
parent0892ccd6fe13e08ad9e57007afbb78fe02d66005 (diff)
[XFS] Concurrent Multi-File Data Streams
In media spaces, video is often stored in a frame-per-file format. When dealing with uncompressed realtime HD video streams in this format, it is crucial that files do not get fragmented and that multiple files a placed contiguously on disk. When multiple streams are being ingested and played out at the same time, it is critical that the filesystem does not cross the streams and interleave them together as this creates seek and readahead cache miss latency and prevents both ingest and playout from meeting frame rate targets. This patch set creates a "stream of files" concept into the allocator to place all the data from a single stream contiguously on disk so that RAID array readahead can be used effectively. Each additional stream gets placed in different allocation groups within the filesystem, thereby ensuring that we don't cross any streams. When an AG fills up, we select a new AG for the stream that is not in use. The core of the functionality is the stream tracking - each inode that we create in a directory needs to be associated with the directories' stream. Hence every time we create a file, we look up the directories' stream object and associate the new file with that object. Once we have a stream object for a file, we use the AG that the stream object point to for allocations. If we can't allocate in that AG (e.g. it is full) we move the entire stream to another AG. Other inodes in the same stream are moved to the new AG on their next allocation (i.e. lazy update). Stream objects are kept in a cache and hold a reference on the inode. Hence the inode cannot be reclaimed while there is an outstanding stream reference. This means that on unlink we need to remove the stream association and we also need to flush all the associations on certain events that want to reclaim all unreferenced inodes (e.g. filesystem freeze). SGI-PV: 964469 SGI-Modid: xfs-linux-melb:xfs-kern:29096a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Barry Naujok <bnaujok@sgi.com> Signed-off-by: Donald Douwsma <donaldd@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Tim Shimmin <tes@sgi.com> Signed-off-by: Vlad Apostolov <vapo@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_filestream.h')
-rw-r--r--fs/xfs/xfs_filestream.h136
1 files changed, 136 insertions, 0 deletions
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
new file mode 100644
index 000000000000..f655f7dc334c
--- /dev/null
+++ b/fs/xfs/xfs_filestream.h
@@ -0,0 +1,136 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_FILESTREAM_H__
19#define __XFS_FILESTREAM_H__
20
21#ifdef __KERNEL__
22
23struct xfs_mount;
24struct xfs_inode;
25struct xfs_perag;
26struct xfs_bmalloca;
27
28#ifdef XFS_FILESTREAMS_TRACE
29#define XFS_FSTRM_KTRACE_INFO 1
30#define XFS_FSTRM_KTRACE_AGSCAN 2
31#define XFS_FSTRM_KTRACE_AGPICK1 3
32#define XFS_FSTRM_KTRACE_AGPICK2 4
33#define XFS_FSTRM_KTRACE_UPDATE 5
34#define XFS_FSTRM_KTRACE_FREE 6
35#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7
36#define XFS_FSTRM_KTRACE_ASSOCIATE 8
37#define XFS_FSTRM_KTRACE_MOVEAG 9
38#define XFS_FSTRM_KTRACE_ORPHAN 10
39
40#define XFS_FSTRM_KTRACE_SIZE 16384
41extern ktrace_t *xfs_filestreams_trace_buf;
42
43#endif
44
45/*
46 * Allocation group filestream associations are tracked with per-ag atomic
47 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
48 * particular AG already has active filestreams associated with it. The mount
49 * point's m_peraglock is used to protect these counters from per-ag array
50 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
51 * about to reallocate the array, it calls xfs_filestream_flush() with the
52 * m_peraglock held in write mode.
53 *
54 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
55 * the cache elements have finished executing before it returns, it's safe for
56 * the free functions to use the atomic counters without m_peraglock protection.
57 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
58 * whether it was called with the m_peraglock held in read mode, write mode or
59 * not held at all. The race condition this addresses is the following:
60 *
61 * - The work queue scheduler fires and pulls a filestream directory cache
62 * element off the LRU end of the cache for deletion, then gets pre-empted.
63 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
64 * remaining items from the cache and reallocates the mount point's per-ag
65 * array, resetting all the counters to zero.
66 * - The work queue thread resumes and calls the free function for the element
67 * it started cleaning up earlier. In the process it decrements the
68 * filestreams counter for an AG that now has no references.
69 *
70 * With a shrinkfs feature, the above scenario could panic the system.
71 *
72 * All other uses of the following macros should be protected by either the
73 * m_peraglock held in read mode, or the cache's internal locking exposed by the
74 * interval between a call to xfs_mru_cache_lookup() and a call to
75 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
76 * when new elements are added to the cache.
77 *
78 * Combined, these locking rules ensure that no associations will ever exist in
79 * the cache that reference per-ag array elements that have since been
80 * reallocated.
81 */
82STATIC_INLINE int
83xfs_filestream_peek_ag(
84 xfs_mount_t *mp,
85 xfs_agnumber_t agno)
86{
87 return atomic_read(&mp->m_perag[agno].pagf_fstrms);
88}
89
90STATIC_INLINE int
91xfs_filestream_get_ag(
92 xfs_mount_t *mp,
93 xfs_agnumber_t agno)
94{
95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
96}
97
98STATIC_INLINE int
99xfs_filestream_put_ag(
100 xfs_mount_t *mp,
101 xfs_agnumber_t agno)
102{
103 return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
104}
105
106/* allocation selection flags */
107typedef enum xfs_fstrm_alloc {
108 XFS_PICK_USERDATA = 1,
109 XFS_PICK_LOWSPACE = 2,
110} xfs_fstrm_alloc_t;
111
112/* prototypes for filestream.c */
113int xfs_filestream_init(void);
114void xfs_filestream_uninit(void);
115int xfs_filestream_mount(struct xfs_mount *mp);
116void xfs_filestream_unmount(struct xfs_mount *mp);
117void xfs_filestream_flush(struct xfs_mount *mp);
118xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
119int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
120void xfs_filestream_deassociate(struct xfs_inode *ip);
121int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
122
123
124/* filestreams for the inode? */
125STATIC_INLINE int
126xfs_inode_is_filestream(
127 struct xfs_inode *ip)
128{
129 return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
130 xfs_iflags_test(ip, XFS_IFILESTREAM) ||
131 (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
132}
133
134#endif /* __KERNEL__ */
135
136#endif /* __XFS_FILESTREAM_H__ */