aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_filestream.c80
-rw-r--r--fs/xfs/xfs_filestream.h82
2 files changed, 77 insertions, 85 deletions
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index d34b9e8d2d37..9b715dce5699 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -123,6 +123,82 @@ typedef struct fstrm_item
123 xfs_inode_t *pip; /* Parent directory inode pointer. */ 123 xfs_inode_t *pip; /* Parent directory inode pointer. */
124} fstrm_item_t; 124} fstrm_item_t;
125 125
126/*
127 * Allocation group filestream associations are tracked with per-ag atomic
128 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
129 * particular AG already has active filestreams associated with it. The mount
130 * point's m_peraglock is used to protect these counters from per-ag array
131 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
132 * about to reallocate the array, it calls xfs_filestream_flush() with the
133 * m_peraglock held in write mode.
134 *
135 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
136 * the cache elements have finished executing before it returns, it's safe for
137 * the free functions to use the atomic counters without m_peraglock protection.
138 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
139 * whether it was called with the m_peraglock held in read mode, write mode or
140 * not held at all. The race condition this addresses is the following:
141 *
142 * - The work queue scheduler fires and pulls a filestream directory cache
143 * element off the LRU end of the cache for deletion, then gets pre-empted.
144 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
145 * remaining items from the cache and reallocates the mount point's per-ag
146 * array, resetting all the counters to zero.
147 * - The work queue thread resumes and calls the free function for the element
148 * it started cleaning up earlier. In the process it decrements the
149 * filestreams counter for an AG that now has no references.
150 *
151 * With a shrinkfs feature, the above scenario could panic the system.
152 *
153 * All other uses of the following macros should be protected by either the
154 * m_peraglock held in read mode, or the cache's internal locking exposed by the
155 * interval between a call to xfs_mru_cache_lookup() and a call to
156 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
157 * when new elements are added to the cache.
158 *
159 * Combined, these locking rules ensure that no associations will ever exist in
160 * the cache that reference per-ag array elements that have since been
161 * reallocated.
162 */
163static int
164xfs_filestream_peek_ag(
165 xfs_mount_t *mp,
166 xfs_agnumber_t agno)
167{
168 struct xfs_perag *pag;
169 int ret;
170
171 pag = xfs_perag_get(mp, agno);
172 ret = atomic_read(&pag->pagf_fstrms);
173 xfs_perag_put(pag);
174 return ret;
175}
176
177static int
178xfs_filestream_get_ag(
179 xfs_mount_t *mp,
180 xfs_agnumber_t agno)
181{
182 struct xfs_perag *pag;
183 int ret;
184
185 pag = xfs_perag_get(mp, agno);
186 ret = atomic_inc_return(&pag->pagf_fstrms);
187 xfs_perag_put(pag);
188 return ret;
189}
190
191static void
192xfs_filestream_put_ag(
193 xfs_mount_t *mp,
194 xfs_agnumber_t agno)
195{
196 struct xfs_perag *pag;
197
198 pag = xfs_perag_get(mp, agno);
199 atomic_dec(&pag->pagf_fstrms);
200 xfs_perag_put(pag);
201}
126 202
127/* 203/*
128 * Scan the AGs starting at startag looking for an AG that isn't in use and has 204 * Scan the AGs starting at startag looking for an AG that isn't in use and has
@@ -351,16 +427,14 @@ xfs_fstrm_free_func(
351{ 427{
352 fstrm_item_t *item = (fstrm_item_t *)data; 428 fstrm_item_t *item = (fstrm_item_t *)data;
353 xfs_inode_t *ip = item->ip; 429 xfs_inode_t *ip = item->ip;
354 int ref;
355 430
356 ASSERT(ip->i_ino == ino); 431 ASSERT(ip->i_ino == ino);
357 432
358 xfs_iflags_clear(ip, XFS_IFILESTREAM); 433 xfs_iflags_clear(ip, XFS_IFILESTREAM);
359 434
360 /* Drop the reference taken on the AG when the item was added. */ 435 /* Drop the reference taken on the AG when the item was added. */
361 ref = xfs_filestream_put_ag(ip->i_mount, item->ag); 436 xfs_filestream_put_ag(ip->i_mount, item->ag);
362 437
363 ASSERT(ref >= 0);
364 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, 438 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
365 xfs_filestream_peek_ag(ip->i_mount, item->ag)); 439 xfs_filestream_peek_ag(ip->i_mount, item->ag));
366 440
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 260f757bbc5d..09dd9af45434 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -42,88 +42,6 @@ extern ktrace_t *xfs_filestreams_trace_buf;
42 42
43#endif 43#endif
44 44
45/*
46 * Allocation group filestream associations are tracked with per-ag atomic
47 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
48 * particular AG already has active filestreams associated with it. The mount
49 * point's m_peraglock is used to protect these counters from per-ag array
50 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
51 * about to reallocate the array, it calls xfs_filestream_flush() with the
52 * m_peraglock held in write mode.
53 *
54 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
55 * the cache elements have finished executing before it returns, it's safe for
56 * the free functions to use the atomic counters without m_peraglock protection.
57 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
58 * whether it was called with the m_peraglock held in read mode, write mode or
59 * not held at all. The race condition this addresses is the following:
60 *
61 * - The work queue scheduler fires and pulls a filestream directory cache
62 * element off the LRU end of the cache for deletion, then gets pre-empted.
63 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
64 * remaining items from the cache and reallocates the mount point's per-ag
65 * array, resetting all the counters to zero.
66 * - The work queue thread resumes and calls the free function for the element
67 * it started cleaning up earlier. In the process it decrements the
68 * filestreams counter for an AG that now has no references.
69 *
70 * With a shrinkfs feature, the above scenario could panic the system.
71 *
72 * All other uses of the following macros should be protected by either the
73 * m_peraglock held in read mode, or the cache's internal locking exposed by the
74 * interval between a call to xfs_mru_cache_lookup() and a call to
75 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
76 * when new elements are added to the cache.
77 *
78 * Combined, these locking rules ensure that no associations will ever exist in
79 * the cache that reference per-ag array elements that have since been
80 * reallocated.
81 */
82/*
83 * xfs_filestream_peek_ag is only used in tracing code
84 */
85static inline int
86xfs_filestream_peek_ag(
87 xfs_mount_t *mp,
88 xfs_agnumber_t agno)
89{
90 struct xfs_perag *pag;
91 int ret;
92
93 pag = xfs_perag_get(mp, agno);
94 ret = atomic_read(&pag->pagf_fstrms);
95 xfs_perag_put(pag);
96 return ret;
97}
98
99static inline int
100xfs_filestream_get_ag(
101 xfs_mount_t *mp,
102 xfs_agnumber_t agno)
103{
104 struct xfs_perag *pag;
105 int ret;
106
107 pag = xfs_perag_get(mp, agno);
108 ret = atomic_inc_return(&pag->pagf_fstrms);
109 xfs_perag_put(pag);
110 return ret;
111}
112
113static inline int
114xfs_filestream_put_ag(
115 xfs_mount_t *mp,
116 xfs_agnumber_t agno)
117{
118 struct xfs_perag *pag;
119 int ret;
120
121 pag = xfs_perag_get(mp, agno);
122 ret = atomic_dec_return(&pag->pagf_fstrms);
123 xfs_perag_put(pag);
124 return ret;
125}
126
127/* allocation selection flags */ 45/* allocation selection flags */
128typedef enum xfs_fstrm_alloc { 46typedef enum xfs_fstrm_alloc {
129 XFS_PICK_USERDATA = 1, 47 XFS_PICK_USERDATA = 1,