aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-10-11 16:54:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-11 18:06:32 -0400
commit26b5679e437ef4f83db66437981c7c0d569973b1 (patch)
treefeeb347be17c53374f073abdaf48b3aa11671b64
parentab47deb6bb03ddf1c1e253bfbbdbf5f867975ca5 (diff)
relay: Use irq_work instead of plain timer for deferred wakeup
Relay avoids calling wake_up_interruptible() for doing the wakeup of readers/consumers, waiting for the generation of new data, from the context of a process which produced the data. This is apparently done to prevent the possibility of a deadlock in case Scheduler itself is is generating data for the relay, after acquiring rq->lock. The following patch used a timer (to be scheduled at next jiffy), for delegating the wakeup to another context. commit 7c9cb38302e78d24e37f7d8a2ea7eed4ae5f2fa7 Author: Tom Zanussi <zanussi@comcast.net> Date: Wed May 9 02:34:01 2007 -0700 relay: use plain timer instead of delayed work relay doesn't need to use schedule_delayed_work() for waking readers when a simple timer will do. Scheduling a plain timer, at next jiffies boundary, to do the wakeup causes a significant wakeup latency for the Userspace client, which makes relay less suitable for the high-frequency low-payload use cases where the data gets generated at a very high rate, like multiple sub buffers getting filled within a milli second. Moreover the timer is re-scheduled on every newly produced sub buffer so the timer keeps getting pushed out if sub buffers are filled in a very quick succession (less than a jiffy gap between filling of 2 sub buffers). As a result relay runs out of sub buffers to store the new data. By using irq_work it is ensured that wakeup of userspace client, blocked in the poll call, is done at earliest (through self IPI or next timer tick) enabling it to always consume the data in time. Also this makes relay consistent with printk & ring buffers (trace), as they too use irq_work for deferred wake up of readers. [arnd@arndb.de: select CONFIG_IRQ_WORK] Link: http://lkml.kernel.org/r/20160912154035.3222156-1-arnd@arndb.de [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1472906487-1559-1-git-send-email-akash.goel@intel.com Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Akash Goel <akash.goel@intel.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/relay.h3
-rw-r--r--init/Kconfig1
-rw-r--r--kernel/relay.c24
3 files changed, 17 insertions, 11 deletions
diff --git a/include/linux/relay.h b/include/linux/relay.h
index ecbb34a382b8..68c1448e56bb 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -15,6 +15,7 @@
15#include <linux/timer.h> 15#include <linux/timer.h>
16#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/irq_work.h>
18#include <linux/bug.h> 19#include <linux/bug.h>
19#include <linux/fs.h> 20#include <linux/fs.h>
20#include <linux/poll.h> 21#include <linux/poll.h>
@@ -38,7 +39,7 @@ struct rchan_buf
38 size_t subbufs_consumed; /* count of sub-buffers consumed */ 39 size_t subbufs_consumed; /* count of sub-buffers consumed */
39 struct rchan *chan; /* associated channel */ 40 struct rchan *chan; /* associated channel */
40 wait_queue_head_t read_wait; /* reader wait queue */ 41 wait_queue_head_t read_wait; /* reader wait queue */
41 struct timer_list timer; /* reader wake-up timer */ 42 struct irq_work wakeup_work; /* reader wakeup */
42 struct dentry *dentry; /* channel file dentry */ 43 struct dentry *dentry; /* channel file dentry */
43 struct kref kref; /* channel buffer refcount */ 44 struct kref kref; /* channel buffer refcount */
44 struct page **page_array; /* array of current buffer pages */ 45 struct page **page_array; /* array of current buffer pages */
diff --git a/init/Kconfig b/init/Kconfig
index d7fc22639665..34407f15e6d3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1288,6 +1288,7 @@ config SYSFS_DEPRECATED_V2
1288 1288
1289config RELAY 1289config RELAY
1290 bool "Kernel->user space relay support (formerly relayfs)" 1290 bool "Kernel->user space relay support (formerly relayfs)"
1291 select IRQ_WORK
1291 help 1292 help
1292 This option enables support for relay interface support in 1293 This option enables support for relay interface support in
1293 certain file systems (such as debugfs). 1294 certain file systems (such as debugfs).
diff --git a/kernel/relay.c b/kernel/relay.c
index 9988f5cc2d46..da79a109dbeb 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -328,13 +328,15 @@ static struct rchan_callbacks default_channel_callbacks = {
328 328
329/** 329/**
330 * wakeup_readers - wake up readers waiting on a channel 330 * wakeup_readers - wake up readers waiting on a channel
331 * @data: contains the channel buffer 331 * @work: contains the channel buffer
332 * 332 *
333 * This is the timer function used to defer reader waking. 333 * This is the function used to defer reader waking
334 */ 334 */
335static void wakeup_readers(unsigned long data) 335static void wakeup_readers(struct irq_work *work)
336{ 336{
337 struct rchan_buf *buf = (struct rchan_buf *)data; 337 struct rchan_buf *buf;
338
339 buf = container_of(work, struct rchan_buf, wakeup_work);
338 wake_up_interruptible(&buf->read_wait); 340 wake_up_interruptible(&buf->read_wait);
339} 341}
340 342
@@ -352,9 +354,10 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
352 if (init) { 354 if (init) {
353 init_waitqueue_head(&buf->read_wait); 355 init_waitqueue_head(&buf->read_wait);
354 kref_init(&buf->kref); 356 kref_init(&buf->kref);
355 setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf); 357 init_irq_work(&buf->wakeup_work, wakeup_readers);
356 } else 358 } else {
357 del_timer_sync(&buf->timer); 359 irq_work_sync(&buf->wakeup_work);
360 }
358 361
359 buf->subbufs_produced = 0; 362 buf->subbufs_produced = 0;
360 buf->subbufs_consumed = 0; 363 buf->subbufs_consumed = 0;
@@ -487,7 +490,7 @@ free_buf:
487static void relay_close_buf(struct rchan_buf *buf) 490static void relay_close_buf(struct rchan_buf *buf)
488{ 491{
489 buf->finalized = 1; 492 buf->finalized = 1;
490 del_timer_sync(&buf->timer); 493 irq_work_sync(&buf->wakeup_work);
491 buf->chan->cb->remove_buf_file(buf->dentry); 494 buf->chan->cb->remove_buf_file(buf->dentry);
492 kref_put(&buf->kref, relay_remove_buf); 495 kref_put(&buf->kref, relay_remove_buf);
493} 496}
@@ -754,14 +757,15 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
754 buf->early_bytes += buf->chan->subbuf_size - 757 buf->early_bytes += buf->chan->subbuf_size -
755 buf->padding[old_subbuf]; 758 buf->padding[old_subbuf];
756 smp_mb(); 759 smp_mb();
757 if (waitqueue_active(&buf->read_wait)) 760 if (waitqueue_active(&buf->read_wait)) {
758 /* 761 /*
759 * Calling wake_up_interruptible() from here 762 * Calling wake_up_interruptible() from here
760 * will deadlock if we happen to be logging 763 * will deadlock if we happen to be logging
761 * from the scheduler (trying to re-grab 764 * from the scheduler (trying to re-grab
762 * rq->lock), so defer it. 765 * rq->lock), so defer it.
763 */ 766 */
764 mod_timer(&buf->timer, jiffies + 1); 767 irq_work_queue(&buf->wakeup_work);
768 }
765 } 769 }
766 770
767 old = buf->data; 771 old = buf->data;