aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2016-02-19 11:20:02 -0500
committerMark Brown <broonie@kernel.org>2016-02-19 11:20:02 -0500
commit88a69d7b07747af08f737b160b27503b1400d3b6 (patch)
tree3205fa71ba94e1b50ed74c80cd13ac6aecd066c4 /tools
parent8faa268ab780a7379cfe54b882c6f6e0083233f2 (diff)
parentf11aec0d7c835c3b83e74e3fd60d2e48a850c857 (diff)
Merge tag 'asoc-fix-v4.5-rc4' into asoc-qcom
ASoC: Fixes for v4.5 A rather large batch of fixes here, almost all in the Intel driver. The changes that got merged in this merge window for Skylake were rather large and as well as issues that you'd expect in a large block of new code there were some problems created for older processors which needed fixing up. Things are largely settling down now hopefully.
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Makefile.perf25
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c2
-rw-r--r--tools/perf/config/Makefile4
-rw-r--r--tools/perf/tests/make55
-rw-r--r--tools/perf/ui/browsers/annotate.c4
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/session.c2
-rw-r--r--tools/perf/util/stat.c1
-rw-r--r--tools/perf/util/symbol.c2
-rw-r--r--tools/testing/nvdimm/test/iomap.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c139
-rw-r--r--tools/virtio/asm/barrier.h22
-rw-r--r--tools/virtio/linux/compiler.h9
-rw-r--r--tools/virtio/linux/kernel.h1
-rw-r--r--tools/virtio/ringtest/Makefile22
-rw-r--r--tools/virtio/ringtest/README2
-rw-r--r--tools/virtio/ringtest/main.c366
-rw-r--r--tools/virtio/ringtest/main.h119
-rw-r--r--tools/virtio/ringtest/ring.c272
-rwxr-xr-xtools/virtio/ringtest/run-on-all.sh24
-rw-r--r--tools/virtio/ringtest/virtio_ring_0_9.c316
-rw-r--r--tools/virtio/ringtest/virtio_ring_poll.c2
22 files changed, 1360 insertions, 33 deletions
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0a22407e1d7d..5d34815c7ccb 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -77,6 +77,9 @@ include config/utilities.mak
77# Define NO_AUXTRACE if you do not want AUX area tracing support 77# Define NO_AUXTRACE if you do not want AUX area tracing support
78# 78#
79# Define NO_LIBBPF if you do not want BPF support 79# Define NO_LIBBPF if you do not want BPF support
80#
81# Define FEATURES_DUMP to provide features detection dump file
82# and bypass the feature detection
80 83
81# As per kernel Makefile, avoid funny character set dependencies 84# As per kernel Makefile, avoid funny character set dependencies
82unexport LC_ALL 85unexport LC_ALL
@@ -166,6 +169,15 @@ ifeq ($(config),1)
166include config/Makefile 169include config/Makefile
167endif 170endif
168 171
172# The FEATURE_DUMP_EXPORT holds location of the actual
173# FEATURE_DUMP file to be used to bypass feature detection
174# (for bpf or any other subproject)
175ifeq ($(FEATURES_DUMP),)
176FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
177else
178FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
179endif
180
169export prefix bindir sharedir sysconfdir DESTDIR 181export prefix bindir sharedir sysconfdir DESTDIR
170 182
171# sparse is architecture-neutral, which means that we need to tell it 183# sparse is architecture-neutral, which means that we need to tell it
@@ -436,7 +448,7 @@ $(LIBAPI)-clean:
436 $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null 448 $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
437 449
438$(LIBBPF): fixdep FORCE 450$(LIBBPF): fixdep FORCE
439 $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP) 451 $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
440 452
441$(LIBBPF)-clean: 453$(LIBBPF)-clean:
442 $(call QUIET_CLEAN, libbpf) 454 $(call QUIET_CLEAN, libbpf)
@@ -611,6 +623,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
611 $(python-clean) 623 $(python-clean)
612 624
613# 625#
626# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
627# file if defined, with no further action.
628feature-dump:
629ifdef FEATURE_DUMP_COPY
630 @cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
631 @echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
632else
633 @echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
634endif
635
636#
614# Trick: if ../../.git does not exist - we are building out of tree for example, 637# Trick: if ../../.git does not exist - we are building out of tree for example,
615# then force version regeneration: 638# then force version regeneration:
616# 639#
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index 3e89ba825f6b..7f064eb37158 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -17,7 +17,7 @@ static pid_t spawn(void)
17 if (pid) 17 if (pid)
18 return pid; 18 return pid;
19 19
20 while(1); 20 while(1)
21 sleep(5); 21 sleep(5);
22 return 0; 22 return 0;
23} 23}
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index e5959c136a19..511141b102e8 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -181,7 +181,11 @@ LDFLAGS += -Wl,-z,noexecstack
181 181
182EXTLIBS = -lpthread -lrt -lm -ldl 182EXTLIBS = -lpthread -lrt -lm -ldl
183 183
184ifeq ($(FEATURES_DUMP),)
184include $(srctree)/tools/build/Makefile.feature 185include $(srctree)/tools/build/Makefile.feature
186else
187include $(FEATURES_DUMP)
188endif
185 189
186ifeq ($(feature-stackprotector-all), 1) 190ifeq ($(feature-stackprotector-all), 1)
187 CFLAGS += -fstack-protector-all 191 CFLAGS += -fstack-protector-all
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index df38decc48c3..f918015512af 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
5# no target specified, trigger the whole suite 5# no target specified, trigger the whole suite
6all: 6all:
7 @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile 7 @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile
8 @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf 8 @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
9else 9else
10# run only specific test over 'Makefile' 10# run only specific test over 'Makefile'
11%: 11%:
@@ -13,6 +13,26 @@ else
13endif 13endif
14else 14else
15PERF := . 15PERF := .
16PERF_O := $(PERF)
17O_OPT :=
18
19ifneq ($(O),)
20 FULL_O := $(shell readlink -f $(O) || echo $(O))
21 PERF_O := $(FULL_O)
22 ifeq ($(SET_O),1)
23 O_OPT := 'O=$(FULL_O)'
24 endif
25 K_O_OPT := 'O=$(FULL_O)'
26endif
27
28PARALLEL_OPT=
29ifeq ($(SET_PARALLEL),1)
30 cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
31 ifeq ($(cores),0)
32 cores := 1
33 endif
34 PARALLEL_OPT="-j$(cores)"
35endif
16 36
17# As per kernel Makefile, avoid funny character set dependencies 37# As per kernel Makefile, avoid funny character set dependencies
18unexport LC_ALL 38unexport LC_ALL
@@ -156,11 +176,11 @@ test_make_doc := $(test_ok)
156test_make_help_O := $(test_ok) 176test_make_help_O := $(test_ok)
157test_make_doc_O := $(test_ok) 177test_make_doc_O := $(test_ok)
158 178
159test_make_python_perf_so := test -f $(PERF)/python/perf.so 179test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
160 180
161test_make_perf_o := test -f $(PERF)/perf.o 181test_make_perf_o := test -f $(PERF_O)/perf.o
162test_make_util_map_o := test -f $(PERF)/util/map.o 182test_make_util_map_o := test -f $(PERF_O)/util/map.o
163test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o 183test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
164 184
165define test_dest_files 185define test_dest_files
166 for file in $(1); do \ 186 for file in $(1); do \
@@ -227,7 +247,7 @@ test_make_perf_o_O := test -f $$TMP_O/perf.o
227test_make_util_map_o_O := test -f $$TMP_O/util/map.o 247test_make_util_map_o_O := test -f $$TMP_O/util/map.o
228test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o 248test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
229 249
230test_default = test -x $(PERF)/perf 250test_default = test -x $(PERF_O)/perf
231test = $(if $(test_$1),$(test_$1),$(test_default)) 251test = $(if $(test_$1),$(test_$1),$(test_default))
232 252
233test_default_O = test -x $$TMP_O/perf 253test_default_O = test -x $$TMP_O/perf
@@ -247,12 +267,12 @@ endif
247 267
248MAKEFLAGS := --no-print-directory 268MAKEFLAGS := --no-print-directory
249 269
250clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null) 270clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
251 271
252$(run): 272$(run):
253 $(call clean) 273 $(call clean)
254 @TMP_DEST=$$(mktemp -d); \ 274 @TMP_DEST=$$(mktemp -d); \
255 cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \ 275 cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
256 echo "- $@: $$cmd" && echo $$cmd > $@ && \ 276 echo "- $@: $$cmd" && echo $$cmd > $@ && \
257 ( eval $$cmd ) >> $@ 2>&1; \ 277 ( eval $$cmd ) >> $@ 2>&1; \
258 echo " test: $(call test,$@)" >> $@ 2>&1; \ 278 echo " test: $(call test,$@)" >> $@ 2>&1; \
@@ -263,7 +283,7 @@ $(run_O):
263 $(call clean) 283 $(call clean)
264 @TMP_O=$$(mktemp -d); \ 284 @TMP_O=$$(mktemp -d); \
265 TMP_DEST=$$(mktemp -d); \ 285 TMP_DEST=$$(mktemp -d); \
266 cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ 286 cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
267 echo "- $@: $$cmd" && echo $$cmd > $@ && \ 287 echo "- $@: $$cmd" && echo $$cmd > $@ && \
268 ( eval $$cmd ) >> $@ 2>&1 && \ 288 ( eval $$cmd ) >> $@ 2>&1 && \
269 echo " test: $(call test_O,$@)" >> $@ 2>&1; \ 289 echo " test: $(call test_O,$@)" >> $@ 2>&1; \
@@ -276,17 +296,22 @@ tarpkg:
276 ( eval $$cmd ) >> $@ 2>&1 && \ 296 ( eval $$cmd ) >> $@ 2>&1 && \
277 rm -f $@ 297 rm -f $@
278 298
299KERNEL_O := ../..
300ifneq ($(O),)
301 KERNEL_O := $(O)
302endif
303
279make_kernelsrc: 304make_kernelsrc:
280 @echo "- make -C <kernelsrc> tools/perf" 305 @echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
281 $(call clean); \ 306 $(call clean); \
282 (make -C ../.. tools/perf) > $@ 2>&1 && \ 307 (make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
283 test -x perf && rm -f $@ || (cat $@ ; false) 308 test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
284 309
285make_kernelsrc_tools: 310make_kernelsrc_tools:
286 @echo "- make -C <kernelsrc>/tools perf" 311 @echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
287 $(call clean); \ 312 $(call clean); \
288 (make -C ../../tools perf) > $@ 2>&1 && \ 313 (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
289 test -x perf && rm -f $@ || (cat $@ ; false) 314 test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
290 315
291all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools 316all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
292 @echo OK 317 @echo OK
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d4d7cc27252f..718bd46d47fa 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
755 nd = browser->curr_hot; 755 nd = browser->curr_hot;
756 break; 756 break;
757 case K_UNTAB: 757 case K_UNTAB:
758 if (nd != NULL) 758 if (nd != NULL) {
759 nd = rb_next(nd); 759 nd = rb_next(nd);
760 if (nd == NULL) 760 if (nd == NULL)
761 nd = rb_first(&browser->entries); 761 nd = rb_first(&browser->entries);
762 else 762 } else
763 nd = browser->curr_hot; 763 nd = browser->curr_hot;
764 break; 764 break;
765 case K_F1: 765 case K_F1:
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index c226303e3da0..68a7612019dc 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
131 symlen = unresolved_col_width + 4 + 2; 131 symlen = unresolved_col_width + 4 + 2;
132 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, 132 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
133 symlen); 133 symlen);
134 hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
135 symlen);
134 } 136 }
135 137
136 if (h->mem_info->iaddr.sym) { 138 if (h->mem_info->iaddr.sym) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d5636ba94b20..40b7a0d0905b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1149,7 +1149,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
1149 1149
1150 machine = machines__find(machines, pid); 1150 machine = machines__find(machines, pid);
1151 if (!machine) 1151 if (!machine)
1152 machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); 1152 machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
1153 return machine; 1153 return machine;
1154 } 1154 }
1155 1155
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2f901d15e063..2b58edccd56f 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -310,7 +310,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
310 int i, ret; 310 int i, ret;
311 311
312 aggr->val = aggr->ena = aggr->run = 0; 312 aggr->val = aggr->ena = aggr->run = 0;
313 init_stats(ps->res_stats);
314 313
315 if (counter->per_pkg) 314 if (counter->per_pkg)
316 zero_per_pkg(counter); 315 zero_per_pkg(counter);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3b2de6eb3376..ab02209a7cf3 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1466,7 +1466,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
1466 * Read the build id if possible. This is required for 1466 * Read the build id if possible. This is required for
1467 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work 1467 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
1468 */ 1468 */
1469 if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0) 1469 if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
1470 dso__set_build_id(dso, build_id); 1470 dso__set_build_id(dso, build_id);
1471 1471
1472 /* 1472 /*
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 7ec7df9e7fc7..0c1a7e65bb81 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
113} 113}
114EXPORT_SYMBOL(__wrap_devm_memremap_pages); 114EXPORT_SYMBOL(__wrap_devm_memremap_pages);
115 115
116pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags) 116pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
117{ 117{
118 struct nfit_test_resource *nfit_res = get_nfit_res(addr); 118 struct nfit_test_resource *nfit_res = get_nfit_res(addr);
119 119
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index e86d937cc22c..60fe3c569bd9 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -45,7 +45,17 @@ static inline int ksft_exit_fail(void)
45} 45}
46#endif 46#endif
47 47
48#define NSEC_PER_SEC 1000000000L 48#define NSEC_PER_SEC 1000000000LL
49#define USEC_PER_SEC 1000000LL
50
51#define ADJ_SETOFFSET 0x0100
52
53#include <sys/syscall.h>
54static int clock_adjtime(clockid_t id, struct timex *tx)
55{
56 return syscall(__NR_clock_adjtime, id, tx);
57}
58
49 59
50/* clear NTP time_status & time_state */ 60/* clear NTP time_status & time_state */
51int clear_time_state(void) 61int clear_time_state(void)
@@ -193,10 +203,137 @@ out:
193} 203}
194 204
195 205
206int set_offset(long long offset, int use_nano)
207{
208 struct timex tmx = {};
209 int ret;
210
211 tmx.modes = ADJ_SETOFFSET;
212 if (use_nano) {
213 tmx.modes |= ADJ_NANO;
214
215 tmx.time.tv_sec = offset / NSEC_PER_SEC;
216 tmx.time.tv_usec = offset % NSEC_PER_SEC;
217
218 if (offset < 0 && tmx.time.tv_usec) {
219 tmx.time.tv_sec -= 1;
220 tmx.time.tv_usec += NSEC_PER_SEC;
221 }
222 } else {
223 tmx.time.tv_sec = offset / USEC_PER_SEC;
224 tmx.time.tv_usec = offset % USEC_PER_SEC;
225
226 if (offset < 0 && tmx.time.tv_usec) {
227 tmx.time.tv_sec -= 1;
228 tmx.time.tv_usec += USEC_PER_SEC;
229 }
230 }
231
232 ret = clock_adjtime(CLOCK_REALTIME, &tmx);
233 if (ret < 0) {
234 printf("(sec: %ld usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
235 printf("[FAIL]\n");
236 return -1;
237 }
238 return 0;
239}
240
241int set_bad_offset(long sec, long usec, int use_nano)
242{
243 struct timex tmx = {};
244 int ret;
245
246 tmx.modes = ADJ_SETOFFSET;
247 if (use_nano)
248 tmx.modes |= ADJ_NANO;
249
250 tmx.time.tv_sec = sec;
251 tmx.time.tv_usec = usec;
252 ret = clock_adjtime(CLOCK_REALTIME, &tmx);
253 if (ret >= 0) {
254 printf("Invalid (sec: %ld usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
255 printf("[FAIL]\n");
256 return -1;
257 }
258 return 0;
259}
260
261int validate_set_offset(void)
262{
263 printf("Testing ADJ_SETOFFSET... ");
264
265 /* Test valid values */
266 if (set_offset(NSEC_PER_SEC - 1, 1))
267 return -1;
268
269 if (set_offset(-NSEC_PER_SEC + 1, 1))
270 return -1;
271
272 if (set_offset(-NSEC_PER_SEC - 1, 1))
273 return -1;
274
275 if (set_offset(5 * NSEC_PER_SEC, 1))
276 return -1;
277
278 if (set_offset(-5 * NSEC_PER_SEC, 1))
279 return -1;
280
281 if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
282 return -1;
283
284 if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
285 return -1;
286
287 if (set_offset(USEC_PER_SEC - 1, 0))
288 return -1;
289
290 if (set_offset(-USEC_PER_SEC + 1, 0))
291 return -1;
292
293 if (set_offset(-USEC_PER_SEC - 1, 0))
294 return -1;
295
296 if (set_offset(5 * USEC_PER_SEC, 0))
297 return -1;
298
299 if (set_offset(-5 * USEC_PER_SEC, 0))
300 return -1;
301
302 if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
303 return -1;
304
305 if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
306 return -1;
307
308 /* Test invalid values */
309 if (set_bad_offset(0, -1, 1))
310 return -1;
311 if (set_bad_offset(0, -1, 0))
312 return -1;
313 if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
314 return -1;
315 if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
316 return -1;
317 if (set_bad_offset(0, NSEC_PER_SEC, 1))
318 return -1;
319 if (set_bad_offset(0, USEC_PER_SEC, 0))
320 return -1;
321 if (set_bad_offset(0, -NSEC_PER_SEC, 1))
322 return -1;
323 if (set_bad_offset(0, -USEC_PER_SEC, 0))
324 return -1;
325
326 printf("[OK]\n");
327 return 0;
328}
329
196int main(int argc, char **argv) 330int main(int argc, char **argv)
197{ 331{
198 if (validate_freq()) 332 if (validate_freq())
199 return ksft_exit_fail(); 333 return ksft_exit_fail();
200 334
335 if (validate_set_offset())
336 return ksft_exit_fail();
337
201 return ksft_exit_pass(); 338 return ksft_exit_pass();
202} 339}
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 26b7926bda88..ba34f9e96efd 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,15 +1,19 @@
1#if defined(__i386__) || defined(__x86_64__) 1#if defined(__i386__) || defined(__x86_64__)
2#define barrier() asm volatile("" ::: "memory") 2#define barrier() asm volatile("" ::: "memory")
3#define mb() __sync_synchronize() 3#define virt_mb() __sync_synchronize()
4 4#define virt_rmb() barrier()
5#define smp_mb() mb() 5#define virt_wmb() barrier()
6# define dma_rmb() barrier() 6/* Atomic store should be enough, but gcc generates worse code in that case. */
7# define dma_wmb() barrier() 7#define virt_store_mb(var, value) do { \
8# define smp_rmb() barrier() 8 typeof(var) virt_store_mb_value = (value); \
9# define smp_wmb() barrier() 9 __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
10 __ATOMIC_SEQ_CST); \
11 barrier(); \
12} while (0);
10/* Weak barriers should be used. If not - it's a bug */ 13/* Weak barriers should be used. If not - it's a bug */
11# define rmb() abort() 14# define mb() abort()
12# define wmb() abort() 15# define rmb() abort()
16# define wmb() abort()
13#else 17#else
14#error Please fill in barrier macros 18#error Please fill in barrier macros
15#endif 19#endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 000000000000..845960e1cbf2
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,9 @@
1#ifndef LINUX_COMPILER_H
2#define LINUX_COMPILER_H
3
4#define WRITE_ONCE(var, val) \
5 (*((volatile typeof(val) *)(&(var))) = (val))
6
7#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
8
9#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 4db7d5691ba7..033849948215 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -8,6 +8,7 @@
8#include <assert.h> 8#include <assert.h>
9#include <stdarg.h> 9#include <stdarg.h>
10 10
11#include <linux/compiler.h>
11#include <linux/types.h> 12#include <linux/types.h>
12#include <linux/printk.h> 13#include <linux/printk.h>
13#include <linux/bug.h> 14#include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 000000000000..feaa64ac4630
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,22 @@
1all:
2
3all: ring virtio_ring_0_9 virtio_ring_poll
4
5CFLAGS += -Wall
6CFLAGS += -pthread -O2 -ggdb
7LDFLAGS += -pthread -O2 -ggdb
8
9main.o: main.c main.h
10ring.o: ring.c main.h
11virtio_ring_0_9.o: virtio_ring_0_9.c main.h
12virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
13ring: ring.o main.o
14virtio_ring_0_9: virtio_ring_0_9.o main.o
15virtio_ring_poll: virtio_ring_poll.o main.o
16clean:
17 -rm main.o
18 -rm ring.o ring
19 -rm virtio_ring_0_9.o virtio_ring_0_9
20 -rm virtio_ring_poll.o virtio_ring_poll
21
22.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 000000000000..34e94c46104f
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,2 @@
1Partial implementation of various ring layouts, useful to tune virtio design.
2Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 000000000000..3a5ff438bd62
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,366 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Command line processing and common functions for ring benchmarking.
7 */
8#define _GNU_SOURCE
9#include <getopt.h>
10#include <pthread.h>
11#include <assert.h>
12#include <sched.h>
13#include "main.h"
14#include <sys/eventfd.h>
15#include <stdlib.h>
16#include <stdio.h>
17#include <unistd.h>
18#include <limits.h>
19
20int runcycles = 10000000;
21int max_outstanding = INT_MAX;
22int batch = 1;
23
24bool do_sleep = false;
25bool do_relax = false;
26bool do_exit = true;
27
28unsigned ring_size = 256;
29
30static int kickfd = -1;
31static int callfd = -1;
32
33void notify(int fd)
34{
35 unsigned long long v = 1;
36 int r;
37
38 vmexit();
39 r = write(fd, &v, sizeof v);
40 assert(r == sizeof v);
41 vmentry();
42}
43
44void wait_for_notify(int fd)
45{
46 unsigned long long v = 1;
47 int r;
48
49 vmexit();
50 r = read(fd, &v, sizeof v);
51 assert(r == sizeof v);
52 vmentry();
53}
54
55void kick(void)
56{
57 notify(kickfd);
58}
59
60void wait_for_kick(void)
61{
62 wait_for_notify(kickfd);
63}
64
65void call(void)
66{
67 notify(callfd);
68}
69
70void wait_for_call(void)
71{
72 wait_for_notify(callfd);
73}
74
75void set_affinity(const char *arg)
76{
77 cpu_set_t cpuset;
78 int ret;
79 pthread_t self;
80 long int cpu;
81 char *endptr;
82
83 if (!arg)
84 return;
85
86 cpu = strtol(arg, &endptr, 0);
87 assert(!*endptr);
88
89 assert(cpu >= 0 || cpu < CPU_SETSIZE);
90
91 self = pthread_self();
92 CPU_ZERO(&cpuset);
93 CPU_SET(cpu, &cpuset);
94
95 ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
96 assert(!ret);
97}
98
99static void run_guest(void)
100{
101 int completed_before;
102 int completed = 0;
103 int started = 0;
104 int bufs = runcycles;
105 int spurious = 0;
106 int r;
107 unsigned len;
108 void *buf;
109 int tokick = batch;
110
111 for (;;) {
112 if (do_sleep)
113 disable_call();
114 completed_before = completed;
115 do {
116 if (started < bufs &&
117 started - completed < max_outstanding) {
118 r = add_inbuf(0, NULL, "Hello, world!");
119 if (__builtin_expect(r == 0, true)) {
120 ++started;
121 if (!--tokick) {
122 tokick = batch;
123 if (do_sleep)
124 kick_available();
125 }
126
127 }
128 } else
129 r = -1;
130
131 /* Flush out completed bufs if any */
132 if (get_buf(&len, &buf)) {
133 ++completed;
134 if (__builtin_expect(completed == bufs, false))
135 return;
136 r = 0;
137 }
138 } while (r == 0);
139 if (completed == completed_before)
140 ++spurious;
141 assert(completed <= bufs);
142 assert(started <= bufs);
143 if (do_sleep) {
144 if (enable_call())
145 wait_for_call();
146 } else {
147 poll_used();
148 }
149 }
150}
151
152static void run_host(void)
153{
154 int completed_before;
155 int completed = 0;
156 int spurious = 0;
157 int bufs = runcycles;
158 unsigned len;
159 void *buf;
160
161 for (;;) {
162 if (do_sleep) {
163 if (enable_kick())
164 wait_for_kick();
165 } else {
166 poll_avail();
167 }
168 if (do_sleep)
169 disable_kick();
170 completed_before = completed;
171 while (__builtin_expect(use_buf(&len, &buf), true)) {
172 if (do_sleep)
173 call_used();
174 ++completed;
175 if (__builtin_expect(completed == bufs, false))
176 return;
177 }
178 if (completed == completed_before)
179 ++spurious;
180 assert(completed <= bufs);
181 if (completed == bufs)
182 break;
183 }
184}
185
186void *start_guest(void *arg)
187{
188 set_affinity(arg);
189 run_guest();
190 pthread_exit(NULL);
191}
192
193void *start_host(void *arg)
194{
195 set_affinity(arg);
196 run_host();
197 pthread_exit(NULL);
198}
199
200static const char optstring[] = "";
201static const struct option longopts[] = {
202 {
203 .name = "help",
204 .has_arg = no_argument,
205 .val = 'h',
206 },
207 {
208 .name = "host-affinity",
209 .has_arg = required_argument,
210 .val = 'H',
211 },
212 {
213 .name = "guest-affinity",
214 .has_arg = required_argument,
215 .val = 'G',
216 },
217 {
218 .name = "ring-size",
219 .has_arg = required_argument,
220 .val = 'R',
221 },
222 {
223 .name = "run-cycles",
224 .has_arg = required_argument,
225 .val = 'C',
226 },
227 {
228 .name = "outstanding",
229 .has_arg = required_argument,
230 .val = 'o',
231 },
232 {
233 .name = "batch",
234 .has_arg = required_argument,
235 .val = 'b',
236 },
237 {
238 .name = "sleep",
239 .has_arg = no_argument,
240 .val = 's',
241 },
242 {
243 .name = "relax",
244 .has_arg = no_argument,
245 .val = 'x',
246 },
247 {
248 .name = "exit",
249 .has_arg = no_argument,
250 .val = 'e',
251 },
252 {
253 }
254};
255
256static void help(void)
257{
258 fprintf(stderr, "Usage: <test> [--help]"
259 " [--host-affinity H]"
260 " [--guest-affinity G]"
261 " [--ring-size R (default: %d)]"
262 " [--run-cycles C (default: %d)]"
263 " [--batch b]"
264 " [--outstanding o]"
265 " [--sleep]"
266 " [--relax]"
267 " [--exit]"
268 "\n",
269 ring_size,
270 runcycles);
271}
272
273int main(int argc, char **argv)
274{
275 int ret;
276 pthread_t host, guest;
277 void *tret;
278 char *host_arg = NULL;
279 char *guest_arg = NULL;
280 char *endptr;
281 long int c;
282
283 kickfd = eventfd(0, 0);
284 assert(kickfd >= 0);
285 callfd = eventfd(0, 0);
286 assert(callfd >= 0);
287
288 for (;;) {
289 int o = getopt_long(argc, argv, optstring, longopts, NULL);
290 switch (o) {
291 case -1:
292 goto done;
293 case '?':
294 help();
295 exit(2);
296 case 'H':
297 host_arg = optarg;
298 break;
299 case 'G':
300 guest_arg = optarg;
301 break;
302 case 'R':
303 ring_size = strtol(optarg, &endptr, 0);
304 assert(ring_size && !(ring_size & (ring_size - 1)));
305 assert(!*endptr);
306 break;
307 case 'C':
308 c = strtol(optarg, &endptr, 0);
309 assert(!*endptr);
310 assert(c > 0 && c < INT_MAX);
311 runcycles = c;
312 break;
313 case 'o':
314 c = strtol(optarg, &endptr, 0);
315 assert(!*endptr);
316 assert(c > 0 && c < INT_MAX);
317 max_outstanding = c;
318 break;
319 case 'b':
320 c = strtol(optarg, &endptr, 0);
321 assert(!*endptr);
322 assert(c > 0 && c < INT_MAX);
323 batch = c;
324 break;
325 case 's':
326 do_sleep = true;
327 break;
328 case 'x':
329 do_relax = true;
330 break;
331 case 'e':
332 do_exit = true;
333 break;
334 default:
335 help();
336 exit(4);
337 break;
338 }
339 }
340
341 /* does nothing here, used to make sure all smp APIs compile */
342 smp_acquire();
343 smp_release();
344 smp_mb();
345done:
346
347 if (batch > max_outstanding)
348 batch = max_outstanding;
349
350 if (optind < argc) {
351 help();
352 exit(4);
353 }
354 alloc_ring();
355
356 ret = pthread_create(&host, NULL, start_host, host_arg);
357 assert(!ret);
358 ret = pthread_create(&guest, NULL, start_guest, guest_arg);
359 assert(!ret);
360
361 ret = pthread_join(guest, &tret);
362 assert(!ret);
363 ret = pthread_join(host, &tret);
364 assert(!ret);
365 return 0;
366}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 000000000000..16917acb0ade
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,119 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Common macros and functions for ring benchmarking.
7 */
8#ifndef MAIN_H
9#define MAIN_H
10
11#include <stdbool.h>
12
13extern bool do_exit;
14
15#if defined(__x86_64__) || defined(__i386__)
16#include "x86intrin.h"
17
18static inline void wait_cycles(unsigned long long cycles)
19{
20 unsigned long long t;
21
22 t = __rdtsc();
23 while (__rdtsc() - t < cycles) {}
24}
25
26#define VMEXIT_CYCLES 500
27#define VMENTRY_CYCLES 500
28
29#else
30static inline void wait_cycles(unsigned long long cycles)
31{
32 _Exit(5);
33}
34#define VMEXIT_CYCLES 0
35#define VMENTRY_CYCLES 0
36#endif
37
38static inline void vmexit(void)
39{
40 if (!do_exit)
41 return;
42
43 wait_cycles(VMEXIT_CYCLES);
44}
45static inline void vmentry(void)
46{
47 if (!do_exit)
48 return;
49
50 wait_cycles(VMENTRY_CYCLES);
51}
52
53/* implemented by ring */
54void alloc_ring(void);
55/* guest side */
56int add_inbuf(unsigned, void *, void *);
57void *get_buf(unsigned *, void **);
58void disable_call();
59bool enable_call();
60void kick_available();
61void poll_used();
62/* host side */
63void disable_kick();
64bool enable_kick();
65bool use_buf(unsigned *, void **);
66void call_used();
67void poll_avail();
68
69/* implemented by main */
70extern bool do_sleep;
71void kick(void);
72void wait_for_kick(void);
73void call(void);
74void wait_for_call(void);
75
76extern unsigned ring_size;
77
78/* Compiler barrier - similar to what Linux uses */
79#define barrier() asm volatile("" ::: "memory")
80
81/* Is there a portable way to do this? */
82#if defined(__x86_64__) || defined(__i386__)
83#define cpu_relax() asm ("rep; nop" ::: "memory")
84#else
85#define cpu_relax() assert(0)
86#endif
87
88extern bool do_relax;
89
90static inline void busy_wait(void)
91{
92 if (do_relax)
93 cpu_relax();
94 else
95 /* prevent compiler from removing busy loops */
96 barrier();
97}
98
99/*
100 * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
101 * with other __ATOMIC_SEQ_CST calls.
102 */
103#define smp_mb() __sync_synchronize()
104
105/*
106 * This abuses the atomic builtins for thread fences, and
107 * adds a compiler barrier.
108 */
109#define smp_release() do { \
110 barrier(); \
111 __atomic_thread_fence(__ATOMIC_RELEASE); \
112} while (0)
113
114#define smp_acquire() do { \
115 __atomic_thread_fence(__ATOMIC_ACQUIRE); \
116 barrier(); \
117} while (0)
118
119#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 000000000000..c25c8d248b6b
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,272 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7 * signalling, unconditionally.
8 */
9#define _GNU_SOURCE
10#include "main.h"
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14
15/* Next - Where next entry will be written.
16 * Prev - "Next" value when event triggered previously.
17 * Event - Peer requested event after writing this entry.
18 */
19static inline bool need_event(unsigned short event,
20 unsigned short next,
21 unsigned short prev)
22{
23 return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
24}
25
26/* Design:
27 * Guest adds descriptors with unique index values and DESC_HW in flags.
28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29 * Flags are always set last.
30 */
31#define DESC_HW 0x1
32
33struct desc {
34 unsigned short flags;
35 unsigned short index;
36 unsigned len;
37 unsigned long long addr;
38};
39
40/* how much padding is needed to avoid false cache sharing */
41#define HOST_GUEST_PADDING 0x80
42
43/* Mostly read */
44struct event {
45 unsigned short kick_index;
46 unsigned char reserved0[HOST_GUEST_PADDING - 2];
47 unsigned short call_index;
48 unsigned char reserved1[HOST_GUEST_PADDING - 2];
49};
50
51struct data {
52 void *buf; /* descriptor is writeable, we can't get buf from there */
53 void *data;
54} *data;
55
56struct desc *ring;
57struct event *event;
58
59struct guest {
60 unsigned avail_idx;
61 unsigned last_used_idx;
62 unsigned num_free;
63 unsigned kicked_avail_idx;
64 unsigned char reserved[HOST_GUEST_PADDING - 12];
65} guest;
66
67struct host {
68 /* we do not need to track last avail index
69 * unless we have more than one in flight.
70 */
71 unsigned used_idx;
72 unsigned called_used_idx;
73 unsigned char reserved[HOST_GUEST_PADDING - 4];
74} host;
75
76/* implemented by ring */
77void alloc_ring(void)
78{
79 int ret;
80 int i;
81
82 ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
83 if (ret) {
84 perror("Unable to allocate ring buffer.\n");
85 exit(3);
86 }
87 event = malloc(sizeof *event);
88 if (!event) {
89 perror("Unable to allocate event buffer.\n");
90 exit(3);
91 }
92 memset(event, 0, sizeof *event);
93 guest.avail_idx = 0;
94 guest.kicked_avail_idx = -1;
95 guest.last_used_idx = 0;
96 host.used_idx = 0;
97 host.called_used_idx = -1;
98 for (i = 0; i < ring_size; ++i) {
99 struct desc desc = {
100 .index = i,
101 };
102 ring[i] = desc;
103 }
104 guest.num_free = ring_size;
105 data = malloc(ring_size * sizeof *data);
106 if (!data) {
107 perror("Unable to allocate data buffer.\n");
108 exit(3);
109 }
110 memset(data, 0, ring_size * sizeof *data);
111}
112
113/* guest side */
114int add_inbuf(unsigned len, void *buf, void *datap)
115{
116 unsigned head, index;
117
118 if (!guest.num_free)
119 return -1;
120
121 guest.num_free--;
122 head = (ring_size - 1) & (guest.avail_idx++);
123
124 /* Start with a write. On MESI architectures this helps
125 * avoid a shared state with consumer that is polling this descriptor.
126 */
127 ring[head].addr = (unsigned long)(void*)buf;
128 ring[head].len = len;
129 /* read below might bypass write above. That is OK because it's just an
130 * optimization. If this happens, we will get the cache line in a
131 * shared state which is unfortunate, but probably not worth it to
132 * add an explicit full barrier to avoid this.
133 */
134 barrier();
135 index = ring[head].index;
136 data[index].buf = buf;
137 data[index].data = datap;
138 /* Barrier A (for pairing) */
139 smp_release();
140 ring[head].flags = DESC_HW;
141
142 return 0;
143}
144
145void *get_buf(unsigned *lenp, void **bufp)
146{
147 unsigned head = (ring_size - 1) & guest.last_used_idx;
148 unsigned index;
149 void *datap;
150
151 if (ring[head].flags & DESC_HW)
152 return NULL;
153 /* Barrier B (for pairing) */
154 smp_acquire();
155 *lenp = ring[head].len;
156 index = ring[head].index & (ring_size - 1);
157 datap = data[index].data;
158 *bufp = data[index].buf;
159 data[index].buf = NULL;
160 data[index].data = NULL;
161 guest.num_free++;
162 guest.last_used_idx++;
163 return datap;
164}
165
166void poll_used(void)
167{
168 unsigned head = (ring_size - 1) & guest.last_used_idx;
169
170 while (ring[head].flags & DESC_HW)
171 busy_wait();
172}
173
174void disable_call()
175{
176 /* Doing nothing to disable calls might cause
177 * extra interrupts, but reduces the number of cache misses.
178 */
179}
180
181bool enable_call()
182{
183 unsigned head = (ring_size - 1) & guest.last_used_idx;
184
185 event->call_index = guest.last_used_idx;
186 /* Flush call index write */
187 /* Barrier D (for pairing) */
188 smp_mb();
189 return ring[head].flags & DESC_HW;
190}
191
192void kick_available(void)
193{
194 /* Flush in previous flags write */
195 /* Barrier C (for pairing) */
196 smp_mb();
197 if (!need_event(event->kick_index,
198 guest.avail_idx,
199 guest.kicked_avail_idx))
200 return;
201
202 guest.kicked_avail_idx = guest.avail_idx;
203 kick();
204}
205
206/* host side */
207void disable_kick()
208{
209 /* Doing nothing to disable kicks might cause
210 * extra interrupts, but reduces the number of cache misses.
211 */
212}
213
214bool enable_kick()
215{
216 unsigned head = (ring_size - 1) & host.used_idx;
217
218 event->kick_index = host.used_idx;
219 /* Barrier C (for pairing) */
220 smp_mb();
221 return !(ring[head].flags & DESC_HW);
222}
223
224void poll_avail(void)
225{
226 unsigned head = (ring_size - 1) & host.used_idx;
227
228 while (!(ring[head].flags & DESC_HW))
229 busy_wait();
230}
231
232bool use_buf(unsigned *lenp, void **bufp)
233{
234 unsigned head = (ring_size - 1) & host.used_idx;
235
236 if (!(ring[head].flags & DESC_HW))
237 return false;
238
239 /* make sure length read below is not speculated */
240 /* Barrier A (for pairing) */
241 smp_acquire();
242
243 /* simple in-order completion: we don't need
244 * to touch index at all. This also means we
245 * can just modify the descriptor in-place.
246 */
247 ring[head].len--;
248 /* Make sure len is valid before flags.
249 * Note: alternative is to write len and flags in one access -
250 * possible on 64 bit architectures but wmb is free on Intel anyway
251 * so I have no way to test whether it's a gain.
252 */
253 /* Barrier B (for pairing) */
254 smp_release();
255 ring[head].flags = 0;
256 host.used_idx++;
257 return true;
258}
259
260void call_used(void)
261{
262 /* Flush in previous flags write */
263 /* Barrier D (for pairing) */
264 smp_mb();
265 if (!need_event(event->call_index,
266 host.used_idx,
267 host.called_used_idx))
268 return;
269
270 host.called_used_idx = host.used_idx;
271 call();
272}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 000000000000..52b0f71ffa8d
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,24 @@
1#!/bin/sh
2
3#use last CPU for host. Why not the first?
4#many devices tend to use cpu0 by default so
5#it tends to be busier
6HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
7
8#run command on all cpus
9for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
10do
11 #Don't run guest and host on same CPU
12 #It actually works ok if using signalling
13 if
14 (echo "$@" | grep -e "--sleep" > /dev/null) || \
15 test $HOST_AFFINITY '!=' $cpu
16 then
17 echo "GUEST AFFINITY $cpu"
18 "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
19 fi
20done
21echo "NO GUEST AFFINITY"
22"$@" --host-affinity $HOST_AFFINITY
23echo "NO AFFINITY"
24"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 000000000000..47c9a1a18d36
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,316 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Partial implementation of virtio 0.9. event index is used for signalling,
7 * unconditionally. Design roughly follows linux kernel implementation in order
8 * to be able to judge its performance.
9 */
10#define _GNU_SOURCE
11#include "main.h"
12#include <stdlib.h>
13#include <stdio.h>
14#include <assert.h>
15#include <string.h>
16#include <linux/virtio_ring.h>
17
18struct data {
19 void *data;
20} *data;
21
22struct vring ring;
23
24/* enabling the below activates experimental ring polling code
25 * (which skips index reads on consumer in favor of looking at
26 * high bits of ring id ^ 0x8000).
27 */
28/* #ifdef RING_POLL */
29
30/* how much padding is needed to avoid false cache sharing */
31#define HOST_GUEST_PADDING 0x80
32
33struct guest {
34 unsigned short avail_idx;
35 unsigned short last_used_idx;
36 unsigned short num_free;
37 unsigned short kicked_avail_idx;
38 unsigned short free_head;
39 unsigned char reserved[HOST_GUEST_PADDING - 10];
40} guest;
41
42struct host {
43 /* we do not need to track last avail index
44 * unless we have more than one in flight.
45 */
46 unsigned short used_idx;
47 unsigned short called_used_idx;
48 unsigned char reserved[HOST_GUEST_PADDING - 4];
49} host;
50
51/* implemented by ring */
52void alloc_ring(void)
53{
54 int ret;
55 int i;
56 void *p;
57
58 ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
59 if (ret) {
60 perror("Unable to allocate ring buffer.\n");
61 exit(3);
62 }
63 memset(p, 0, vring_size(ring_size, 0x1000));
64 vring_init(&ring, ring_size, p, 0x1000);
65
66 guest.avail_idx = 0;
67 guest.kicked_avail_idx = -1;
68 guest.last_used_idx = 0;
69 /* Put everything in free lists. */
70 guest.free_head = 0;
71 for (i = 0; i < ring_size - 1; i++)
72 ring.desc[i].next = i + 1;
73 host.used_idx = 0;
74 host.called_used_idx = -1;
75 guest.num_free = ring_size;
76 data = malloc(ring_size * sizeof *data);
77 if (!data) {
78 perror("Unable to allocate data buffer.\n");
79 exit(3);
80 }
81 memset(data, 0, ring_size * sizeof *data);
82}
83
84/* guest side */
85int add_inbuf(unsigned len, void *buf, void *datap)
86{
87 unsigned head, avail;
88 struct vring_desc *desc;
89
90 if (!guest.num_free)
91 return -1;
92
93 head = guest.free_head;
94 guest.num_free--;
95
96 desc = ring.desc;
97 desc[head].flags = VRING_DESC_F_NEXT;
98 desc[head].addr = (unsigned long)(void *)buf;
99 desc[head].len = len;
100 /* We do it like this to simulate the way
101 * we'd have to flip it if we had multiple
102 * descriptors.
103 */
104 desc[head].flags &= ~VRING_DESC_F_NEXT;
105 guest.free_head = desc[head].next;
106
107 data[head].data = datap;
108
109#ifdef RING_POLL
110 /* Barrier A (for pairing) */
111 smp_release();
112 avail = guest.avail_idx++;
113 ring.avail->ring[avail & (ring_size - 1)] =
114 (head | (avail & ~(ring_size - 1))) ^ 0x8000;
115#else
116 avail = (ring_size - 1) & (guest.avail_idx++);
117 ring.avail->ring[avail] = head;
118 /* Barrier A (for pairing) */
119 smp_release();
120#endif
121 ring.avail->idx = guest.avail_idx;
122 return 0;
123}
124
125void *get_buf(unsigned *lenp, void **bufp)
126{
127 unsigned head;
128 unsigned index;
129 void *datap;
130
131#ifdef RING_POLL
132 head = (ring_size - 1) & guest.last_used_idx;
133 index = ring.used->ring[head].id;
134 if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
135 return NULL;
136 /* Barrier B (for pairing) */
137 smp_acquire();
138 index &= ring_size - 1;
139#else
140 if (ring.used->idx == guest.last_used_idx)
141 return NULL;
142 /* Barrier B (for pairing) */
143 smp_acquire();
144 head = (ring_size - 1) & guest.last_used_idx;
145 index = ring.used->ring[head].id;
146#endif
147 *lenp = ring.used->ring[head].len;
148 datap = data[index].data;
149 *bufp = (void*)(unsigned long)ring.desc[index].addr;
150 data[index].data = NULL;
151 ring.desc[index].next = guest.free_head;
152 guest.free_head = index;
153 guest.num_free++;
154 guest.last_used_idx++;
155 return datap;
156}
157
158void poll_used(void)
159{
160#ifdef RING_POLL
161 unsigned head = (ring_size - 1) & guest.last_used_idx;
162
163 for (;;) {
164 unsigned index = ring.used->ring[head].id;
165
166 if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
167 busy_wait();
168 else
169 break;
170 }
171#else
172 unsigned head = guest.last_used_idx;
173
174 while (ring.used->idx == head)
175 busy_wait();
176#endif
177}
178
179void disable_call()
180{
181 /* Doing nothing to disable calls might cause
182 * extra interrupts, but reduces the number of cache misses.
183 */
184}
185
186bool enable_call()
187{
188 unsigned short last_used_idx;
189
190 vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
191 /* Flush call index write */
192 /* Barrier D (for pairing) */
193 smp_mb();
194#ifdef RING_POLL
195 {
196 unsigned short head = last_used_idx & (ring_size - 1);
197 unsigned index = ring.used->ring[head].id;
198
199 return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
200 }
201#else
202 return ring.used->idx == last_used_idx;
203#endif
204}
205
206void kick_available(void)
207{
208 /* Flush in previous flags write */
209 /* Barrier C (for pairing) */
210 smp_mb();
211 if (!vring_need_event(vring_avail_event(&ring),
212 guest.avail_idx,
213 guest.kicked_avail_idx))
214 return;
215
216 guest.kicked_avail_idx = guest.avail_idx;
217 kick();
218}
219
220/* host side */
221void disable_kick()
222{
223 /* Doing nothing to disable kicks might cause
224 * extra interrupts, but reduces the number of cache misses.
225 */
226}
227
228bool enable_kick()
229{
230 unsigned head = host.used_idx;
231
232 vring_avail_event(&ring) = head;
233 /* Barrier C (for pairing) */
234 smp_mb();
235#ifdef RING_POLL
236 {
237 unsigned index = ring.avail->ring[head & (ring_size - 1)];
238
239 return (index ^ head ^ 0x8000) & ~(ring_size - 1);
240 }
241#else
242 return head == ring.avail->idx;
243#endif
244}
245
246void poll_avail(void)
247{
248 unsigned head = host.used_idx;
249#ifdef RING_POLL
250 for (;;) {
251 unsigned index = ring.avail->ring[head & (ring_size - 1)];
252 if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
253 busy_wait();
254 else
255 break;
256 }
257#else
258 while (ring.avail->idx == head)
259 busy_wait();
260#endif
261}
262
263bool use_buf(unsigned *lenp, void **bufp)
264{
265 unsigned used_idx = host.used_idx;
266 struct vring_desc *desc;
267 unsigned head;
268
269#ifdef RING_POLL
270 head = ring.avail->ring[used_idx & (ring_size - 1)];
271 if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
272 return false;
273 /* Barrier A (for pairing) */
274 smp_acquire();
275
276 used_idx &= ring_size - 1;
277 desc = &ring.desc[head & (ring_size - 1)];
278#else
279 if (used_idx == ring.avail->idx)
280 return false;
281
282 /* Barrier A (for pairing) */
283 smp_acquire();
284
285 used_idx &= ring_size - 1;
286 head = ring.avail->ring[used_idx];
287 desc = &ring.desc[head];
288#endif
289
290 *lenp = desc->len;
291 *bufp = (void *)(unsigned long)desc->addr;
292
293 /* now update used ring */
294 ring.used->ring[used_idx].id = head;
295 ring.used->ring[used_idx].len = desc->len - 1;
296 /* Barrier B (for pairing) */
297 smp_release();
298 host.used_idx++;
299 ring.used->idx = host.used_idx;
300
301 return true;
302}
303
304void call_used(void)
305{
306 /* Flush in previous flags write */
307 /* Barrier D (for pairing) */
308 smp_mb();
309 if (!vring_need_event(vring_used_event(&ring),
310 host.used_idx,
311 host.called_used_idx))
312 return;
313
314 host.called_used_idx = host.used_idx;
315 call();
316}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 000000000000..84fc2c557aaa
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
1#define RING_POLL 1
2#include "virtio_ring_0_9.c"