aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-01-29 12:02:55 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-29 12:02:55 -0500
commitbfbe5bab66e1aa68033786599cc495e6728e55e8 (patch)
tree68e92d489b96c65ef2fe4a662d615d9400af3635
parent7ece54a60ee2ba7a386308cae73c790bd580589c (diff)
parent491847f3b29cef0417a03142b96e2a6dea81cca0 (diff)
Merge branch 'ptr_ring-fixes'
Michael S. Tsirkin says: ==================== ptr_ring fixes This fixes a bunch of issues around ptr_ring use in net core. One of these: "tap: fix use-after-free" is also needed on net, but can't be backported cleanly. I will post a net patch separately. Lightly tested - Jason, could you pls confirm this addresses the security issue you saw with ptr_ring? Testing reports would be appreciated too. ==================== Signed-off-by: David S. Miller <davem@davemloft.net> Tested-by: Jason Wang <jasowang@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
-rw-r--r--drivers/net/tap.c3
-rw-r--r--include/linux/ptr_ring.h86
-rw-r--r--include/linux/skb_array.h2
-rw-r--r--tools/virtio/linux/kernel.h2
-rw-r--r--tools/virtio/linux/thread_info.h1
-rw-r--r--tools/virtio/ringtest/main.h59
-rw-r--r--tools/virtio/ringtest/ptr_ring.c2
7 files changed, 110 insertions, 45 deletions
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 7c38659b2a76..77872699c45d 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -330,9 +330,6 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
330 if (!q) 330 if (!q)
331 return RX_HANDLER_PASS; 331 return RX_HANDLER_PASS;
332 332
333 if (__ptr_ring_full(&q->ring))
334 goto drop;
335
336 skb_push(skb, ETH_HLEN); 333 skb_push(skb, ETH_HLEN);
337 334
338 /* Apply the forward feature mask so that we perform segmentation 335 /* Apply the forward feature mask so that we perform segmentation
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 9ca1726ff963..1883d6137e9b 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -45,9 +45,10 @@ struct ptr_ring {
45}; 45};
46 46
47/* Note: callers invoking this in a loop must use a compiler barrier, 47/* Note: callers invoking this in a loop must use a compiler barrier,
48 * for example cpu_relax(). If ring is ever resized, callers must hold 48 * for example cpu_relax().
49 * producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold 49 *
50 * producer_lock, the next call to __ptr_ring_produce may fail. 50 * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
51 * see e.g. ptr_ring_full.
51 */ 52 */
52static inline bool __ptr_ring_full(struct ptr_ring *r) 53static inline bool __ptr_ring_full(struct ptr_ring *r)
53{ 54{
@@ -113,7 +114,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
113 /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ 114 /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
114 smp_wmb(); 115 smp_wmb();
115 116
116 r->queue[r->producer++] = ptr; 117 WRITE_ONCE(r->queue[r->producer++], ptr);
117 if (unlikely(r->producer >= r->size)) 118 if (unlikely(r->producer >= r->size))
118 r->producer = 0; 119 r->producer = 0;
119 return 0; 120 return 0;
@@ -169,32 +170,36 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
169 return ret; 170 return ret;
170} 171}
171 172
172/* Note: callers invoking this in a loop must use a compiler barrier,
173 * for example cpu_relax(). Callers must take consumer_lock
174 * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
175 * If ring is never resized, and if the pointer is merely
176 * tested, there's no need to take the lock - see e.g. __ptr_ring_empty.
177 * However, if called outside the lock, and if some other CPU
178 * consumes ring entries at the same time, the value returned
179 * is not guaranteed to be correct.
180 * In this case - to avoid incorrectly detecting the ring
181 * as empty - the CPU consuming the ring entries is responsible
182 * for either consuming all ring entries until the ring is empty,
183 * or synchronizing with some other CPU and causing it to
184 * execute __ptr_ring_peek and/or consume the ring enteries
185 * after the synchronization point.
186 */
187static inline void *__ptr_ring_peek(struct ptr_ring *r) 173static inline void *__ptr_ring_peek(struct ptr_ring *r)
188{ 174{
189 if (likely(r->size)) 175 if (likely(r->size))
190 return r->queue[r->consumer_head]; 176 return READ_ONCE(r->queue[r->consumer_head]);
191 return NULL; 177 return NULL;
192} 178}
193 179
194/* See __ptr_ring_peek above for locking rules. */ 180/*
181 * Test ring empty status without taking any locks.
182 *
183 * NB: This is only safe to call if ring is never resized.
184 *
185 * However, if some other CPU consumes ring entries at the same time, the value
186 * returned is not guaranteed to be correct.
187 *
188 * In this case - to avoid incorrectly detecting the ring
189 * as empty - the CPU consuming the ring entries is responsible
190 * for either consuming all ring entries until the ring is empty,
191 * or synchronizing with some other CPU and causing it to
192 * re-test __ptr_ring_empty and/or consume the ring enteries
193 * after the synchronization point.
194 *
195 * Note: callers invoking this in a loop must use a compiler barrier,
196 * for example cpu_relax().
197 */
195static inline bool __ptr_ring_empty(struct ptr_ring *r) 198static inline bool __ptr_ring_empty(struct ptr_ring *r)
196{ 199{
197 return !__ptr_ring_peek(r); 200 if (likely(r->size))
201 return !r->queue[READ_ONCE(r->consumer_head)];
202 return true;
198} 203}
199 204
200static inline bool ptr_ring_empty(struct ptr_ring *r) 205static inline bool ptr_ring_empty(struct ptr_ring *r)
@@ -248,22 +253,28 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
248 /* Fundamentally, what we want to do is update consumer 253 /* Fundamentally, what we want to do is update consumer
249 * index and zero out the entry so producer can reuse it. 254 * index and zero out the entry so producer can reuse it.
250 * Doing it naively at each consume would be as simple as: 255 * Doing it naively at each consume would be as simple as:
251 * r->queue[r->consumer++] = NULL; 256 * consumer = r->consumer;
252 * if (unlikely(r->consumer >= r->size)) 257 * r->queue[consumer++] = NULL;
253 * r->consumer = 0; 258 * if (unlikely(consumer >= r->size))
259 * consumer = 0;
260 * r->consumer = consumer;
254 * but that is suboptimal when the ring is full as producer is writing 261 * but that is suboptimal when the ring is full as producer is writing
255 * out new entries in the same cache line. Defer these updates until a 262 * out new entries in the same cache line. Defer these updates until a
256 * batch of entries has been consumed. 263 * batch of entries has been consumed.
257 */ 264 */
258 int head = r->consumer_head++; 265 /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
266 * to work correctly.
267 */
268 int consumer_head = r->consumer_head;
269 int head = consumer_head++;
259 270
260 /* Once we have processed enough entries invalidate them in 271 /* Once we have processed enough entries invalidate them in
261 * the ring all at once so producer can reuse their space in the ring. 272 * the ring all at once so producer can reuse their space in the ring.
262 * We also do this when we reach end of the ring - not mandatory 273 * We also do this when we reach end of the ring - not mandatory
263 * but helps keep the implementation simple. 274 * but helps keep the implementation simple.
264 */ 275 */
265 if (unlikely(r->consumer_head - r->consumer_tail >= r->batch || 276 if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
266 r->consumer_head >= r->size)) { 277 consumer_head >= r->size)) {
267 /* Zero out entries in the reverse order: this way we touch the 278 /* Zero out entries in the reverse order: this way we touch the
268 * cache line that producer might currently be reading the last; 279 * cache line that producer might currently be reading the last;
269 * producer won't make progress and touch other cache lines 280 * producer won't make progress and touch other cache lines
@@ -271,12 +282,14 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
271 */ 282 */
272 while (likely(head >= r->consumer_tail)) 283 while (likely(head >= r->consumer_tail))
273 r->queue[head--] = NULL; 284 r->queue[head--] = NULL;
274 r->consumer_tail = r->consumer_head; 285 r->consumer_tail = consumer_head;
275 } 286 }
276 if (unlikely(r->consumer_head >= r->size)) { 287 if (unlikely(consumer_head >= r->size)) {
277 r->consumer_head = 0; 288 consumer_head = 0;
278 r->consumer_tail = 0; 289 r->consumer_tail = 0;
279 } 290 }
291 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
292 WRITE_ONCE(r->consumer_head, consumer_head);
280} 293}
281 294
282static inline void *__ptr_ring_consume(struct ptr_ring *r) 295static inline void *__ptr_ring_consume(struct ptr_ring *r)
@@ -453,12 +466,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
453 466
454static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) 467static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
455{ 468{
456 /* Allocate an extra dummy element at end of ring to avoid consumer head 469 return kcalloc(size, sizeof(void *), gfp);
457 * or produce head access past the end of the array. Possible when
458 * producer/consumer operations and __ptr_ring_peek operations run in
459 * parallel.
460 */
461 return kcalloc(size + 1, sizeof(void *), gfp);
462} 470}
463 471
464static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) 472static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
@@ -532,7 +540,9 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
532 goto done; 540 goto done;
533 } 541 }
534 r->queue[head] = batch[--n]; 542 r->queue[head] = batch[--n];
535 r->consumer_tail = r->consumer_head = head; 543 r->consumer_tail = head;
544 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
545 WRITE_ONCE(r->consumer_head, head);
536 } 546 }
537 547
538done: 548done:
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index c7addf37d119..a6b6e8bb3d7b 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -69,7 +69,7 @@ static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb
69 */ 69 */
70static inline bool __skb_array_empty(struct skb_array *a) 70static inline bool __skb_array_empty(struct skb_array *a)
71{ 71{
72 return !__ptr_ring_peek(&a->ring); 72 return __ptr_ring_empty(&a->ring);
73} 73}
74 74
75static inline struct sk_buff *__skb_array_peek(struct skb_array *a) 75static inline struct sk_buff *__skb_array_peek(struct skb_array *a)
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 395521a7a8d8..fca8381bbe04 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr)
118#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) 118#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
119#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) 119#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
120 120
121#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) 121#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0)
122 122
123#define min(x, y) ({ \ 123#define min(x, y) ({ \
124 typeof(x) _min1 = (x); \ 124 typeof(x) _min1 = (x); \
diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h
new file mode 100644
index 000000000000..e0f610d08006
--- /dev/null
+++ b/tools/virtio/linux/thread_info.h
@@ -0,0 +1 @@
#define check_copy_size(A, B, C) (1)
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 5706e075adf2..301d59bfcd0a 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -111,7 +111,7 @@ static inline void busy_wait(void)
111} 111}
112 112
113#if defined(__x86_64__) || defined(__i386__) 113#if defined(__x86_64__) || defined(__i386__)
114#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") 114#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
115#else 115#else
116/* 116/*
117 * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized 117 * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
@@ -134,4 +134,61 @@ static inline void busy_wait(void)
134 barrier(); \ 134 barrier(); \
135} while (0) 135} while (0)
136 136
137#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
138#define smp_wmb() barrier()
139#else
140#define smp_wmb() smp_release()
141#endif
142
143#ifdef __alpha__
144#define smp_read_barrier_depends() smp_acquire()
145#else
146#define smp_read_barrier_depends() do {} while(0)
147#endif
148
149static __always_inline
150void __read_once_size(const volatile void *p, void *res, int size)
151{
152 switch (size) { \
153 case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \
154 case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \
155 case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \
156 case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \
157 default: \
158 barrier(); \
159 __builtin_memcpy((void *)res, (const void *)p, size); \
160 barrier(); \
161 } \
162}
163
164static __always_inline void __write_once_size(volatile void *p, void *res, int size)
165{
166 switch (size) {
167 case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
168 case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
169 case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
170 case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
171 default:
172 barrier();
173 __builtin_memcpy((void *)p, (const void *)res, size);
174 barrier();
175 }
176}
177
178#define READ_ONCE(x) \
179({ \
180 union { typeof(x) __val; char __c[1]; } __u; \
181 __read_once_size(&(x), __u.__c, sizeof(x)); \
182 smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
183 __u.__val; \
184})
185
186#define WRITE_ONCE(x, val) \
187({ \
188 union { typeof(x) __val; char __c[1]; } __u = \
189 { .__val = (typeof(x)) (val) }; \
190 __write_once_size(&(x), __u.__c, sizeof(x)); \
191 __u.__val; \
192})
193
137#endif 194#endif
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index e6e81305ef46..477899c12c51 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -187,7 +187,7 @@ bool enable_kick()
187 187
188bool avail_empty() 188bool avail_empty()
189{ 189{
190 return !__ptr_ring_peek(&array); 190 return __ptr_ring_empty(&array);
191} 191}
192 192
193bool use_buf(unsigned *lenp, void **bufp) 193bool use_buf(unsigned *lenp, void **bufp)