925 lines
19 KiB
C
925 lines
19 KiB
C
|
/*
|
||
|
* Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions
|
||
|
* are met:
|
||
|
* 1. Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in the
|
||
|
* documentation and/or other materials provided with the distribution.
|
||
|
*
|
||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||
|
* SUCH DAMAGE.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* $Id$
|
||
|
*
|
||
|
* Test program to study various ops and concurrency issues.
|
||
|
* Create multiple threads, possibly bind to cpus, and run a workload.
|
||
|
*
|
||
|
* cc -O2 -Werror -Wall testlock.c -o testlock -lpthread
|
||
|
* you might need -lrt
|
||
|
*/
|
||
|
|
||
|
#include <inttypes.h>
|
||
|
#include <sys/types.h>
|
||
|
#include <pthread.h> /* pthread_* */
|
||
|
|
||
|
#if defined(__APPLE__)
|
||
|
|
||
|
#include <net/if_var.h>
|
||
|
#include <libkern/OSAtomic.h>
|
||
|
#define atomic_add_int(p, n) OSAtomicAdd32(n, (int *)p)
|
||
|
#define atomic_cmpset_32(p, o, n) OSAtomicCompareAndSwap32(o, n, (int *)p)
|
||
|
|
||
|
#elif defined(linux)
|
||
|
|
||
|
int atomic_cmpset_32(volatile uint32_t *p, uint32_t old, uint32_t new)
|
||
|
{
|
||
|
int ret = *p == old;
|
||
|
*p = new;
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
#if defined(HAVE_GCC_ATOMICS)
|
||
|
int atomic_add_int(volatile int *p, int v)
|
||
|
{
|
||
|
return __sync_fetch_and_add(p, v);
|
||
|
}
|
||
|
#else
|
||
|
inline
|
||
|
uint32_t atomic_add_int(uint32_t *p, int v)
|
||
|
{
|
||
|
__asm __volatile (
|
||
|
" lock xaddl %0, %1 ; "
|
||
|
: "+r" (v), /* 0 (result) */
|
||
|
"=m" (*p) /* 1 */
|
||
|
: "m" (*p)); /* 2 */
|
||
|
return (v);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#else /* FreeBSD */
|
||
|
#include <sys/param.h>
|
||
|
#include <machine/atomic.h>
|
||
|
#include <pthread_np.h> /* pthread w/ affinity */
|
||
|
|
||
|
#if __FreeBSD_version > 500000
|
||
|
#include <sys/cpuset.h> /* cpu_set */
|
||
|
#if __FreeBSD_version > 800000
|
||
|
#define HAVE_AFFINITY
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#else /* FreeBSD 4.x */
|
||
|
int atomic_cmpset_32(volatile uint32_t *p, uint32_t old, uint32_t new)
|
||
|
{
|
||
|
int ret = *p == old;
|
||
|
*p = new;
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
#define PRIu64 "llu"
|
||
|
#endif /* FreeBSD 4.x */
|
||
|
|
||
|
#endif /* FreeBSD */
|
||
|
|
||
|
#include <signal.h> /* signal */
|
||
|
#include <stdlib.h>
|
||
|
#include <stdio.h>
|
||
|
#include <poll.h>
|
||
|
#include <inttypes.h> /* PRI* macros */
|
||
|
#include <string.h> /* strcmp */
|
||
|
#include <fcntl.h> /* open */
|
||
|
#include <unistd.h> /* getopt */
|
||
|
|
||
|
|
||
|
#include <sys/sysctl.h> /* sysctl */
|
||
|
#include <sys/time.h> /* timersub */
|
||
|
|
||
|
#define ONE_MILLION 1000000
|
||
|
/* debug support */
|
||
|
#define ND(format, ...)
|
||
|
#define D(format, ...) \
|
||
|
fprintf(stderr, "%s [%d] " format "\n", \
|
||
|
__FUNCTION__, __LINE__, ##__VA_ARGS__)
|
||
|
|
||
|
int verbose = 0;
|
||
|
|
||
|
#if 1//def MY_RDTSC
|
||
|
/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */
|
||
|
#define my_rdtsc(t) \
|
||
|
do { \
|
||
|
u_int __regs[4]; \
|
||
|
\
|
||
|
do_cpuid(0, __regs); \
|
||
|
(t) = rdtsc(); \
|
||
|
} while (0)
|
||
|
|
||
|
static __inline void
|
||
|
do_cpuid(u_int ax, u_int *p)
|
||
|
{
|
||
|
__asm __volatile("cpuid"
|
||
|
: "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
|
||
|
: "0" (ax) );
|
||
|
}
|
||
|
|
||
|
static __inline uint64_t
|
||
|
rdtsc(void)
|
||
|
{
|
||
|
uint64_t rv;
|
||
|
|
||
|
// XXX does not work on linux-64 bit
|
||
|
__asm __volatile("rdtscp" : "=A" (rv) : : "%rax");
|
||
|
return (rv);
|
||
|
}
|
||
|
#endif /* 1 */
|
||
|
|
||
|
struct targ;
|
||
|
|
||
|
/*** global arguments for all threads ***/
|
||
|
struct glob_arg {
|
||
|
struct {
|
||
|
uint32_t ctr[1024];
|
||
|
} v __attribute__ ((aligned(256) ));
|
||
|
int64_t m_cycles; /* total cycles */
|
||
|
int nthreads;
|
||
|
int cpus;
|
||
|
int privs; // 1 if has IO privileges
|
||
|
int arg; // microseconds in usleep
|
||
|
int nullfd; // open(/dev/null)
|
||
|
char *test_name;
|
||
|
void (*fn)(struct targ *);
|
||
|
uint64_t scale; // scaling factor
|
||
|
char *scale_name; // scaling factor
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Arguments for a new thread.
|
||
|
*/
|
||
|
struct targ {
|
||
|
struct glob_arg *g;
|
||
|
int completed;
|
||
|
u_int *glob_ctr;
|
||
|
uint64_t volatile count;
|
||
|
struct timeval tic, toc;
|
||
|
int me;
|
||
|
pthread_t thread;
|
||
|
int affinity;
|
||
|
};
|
||
|
|
||
|
|
||
|
static struct targ *ta;
|
||
|
static int global_nthreads;
|
||
|
|
||
|
/* control-C handler */
|
||
|
static void
|
||
|
sigint_h(int sig)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
(void)sig; /* UNUSED */
|
||
|
for (i = 0; i < global_nthreads; i++) {
|
||
|
/* cancel active threads. */
|
||
|
if (ta[i].completed)
|
||
|
continue;
|
||
|
D("Cancelling thread #%d\n", i);
|
||
|
pthread_cancel(ta[i].thread);
|
||
|
ta[i].completed = 0;
|
||
|
}
|
||
|
signal(SIGINT, SIG_DFL);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* sysctl wrapper to return the number of active CPUs */
|
||
|
static int
|
||
|
system_ncpus(void)
|
||
|
{
|
||
|
#ifdef linux
|
||
|
return 1;
|
||
|
#else
|
||
|
int mib[2] = { CTL_HW, HW_NCPU}, ncpus;
|
||
|
size_t len = sizeof(mib);
|
||
|
sysctl(mib, len / sizeof(mib[0]), &ncpus, &len, NULL, 0);
|
||
|
D("system had %d cpus", ncpus);
|
||
|
|
||
|
return (ncpus);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* try to get I/O privileges so we can execute cli/sti etc.
|
||
|
*/
|
||
|
int
|
||
|
getprivs(void)
|
||
|
{
|
||
|
int fd = open("/dev/io", O_RDWR);
|
||
|
if (fd < 0) {
|
||
|
D("cannot open /dev/io, fd %d", fd);
|
||
|
return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* set the thread affinity. */
|
||
|
/* ARGSUSED */
|
||
|
#ifdef HAVE_AFFINITY
|
||
|
static int
|
||
|
setaffinity(pthread_t me, int i)
|
||
|
{
|
||
|
cpuset_t cpumask;
|
||
|
|
||
|
if (i == -1)
|
||
|
return 0;
|
||
|
|
||
|
/* Set thread affinity affinity.*/
|
||
|
CPU_ZERO(&cpumask);
|
||
|
CPU_SET(i, &cpumask);
|
||
|
|
||
|
if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
|
||
|
D("Unable to set affinity");
|
||
|
return 1;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
static void *
|
||
|
td_body(void *data)
|
||
|
{
|
||
|
struct targ *t = (struct targ *) data;
|
||
|
|
||
|
#ifdef HAVE_AFFINITY
|
||
|
if (0 == setaffinity(t->thread, t->affinity))
|
||
|
#endif
|
||
|
{
|
||
|
/* main loop.*/
|
||
|
D("testing %"PRIu64" cycles arg %d",
|
||
|
t->g->m_cycles, t->g->arg);
|
||
|
gettimeofday(&t->tic, NULL);
|
||
|
t->g->fn(t);
|
||
|
gettimeofday(&t->toc, NULL);
|
||
|
}
|
||
|
t->completed = 1;
|
||
|
return (NULL);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* select and poll:
|
||
|
* arg fd timeout
|
||
|
* >0 block >0
|
||
|
* 0 block 0
|
||
|
* block NULL (not implemented)
|
||
|
* < -2 ready -arg
|
||
|
* -1 ready 0
|
||
|
* -2 ready NULL / <0 for poll
|
||
|
*
|
||
|
* arg = -1 -> NULL timeout (select)
|
||
|
*/
|
||
|
void
|
||
|
test_sel(struct targ *t)
|
||
|
{
|
||
|
int arg = t->g->arg;
|
||
|
// stdin is blocking on reads /dev/null or /dev/zero are not
|
||
|
int fd = (arg < 0) ? t->g->nullfd : 0;
|
||
|
fd_set r;
|
||
|
struct timeval t0 = { 0, arg};
|
||
|
struct timeval tcur, *tp = (arg == -2) ? NULL : &tcur;
|
||
|
int64_t m;
|
||
|
|
||
|
if (arg == -1)
|
||
|
t0.tv_usec = 0;
|
||
|
else if (arg < -2)
|
||
|
t0.tv_usec = -arg;
|
||
|
|
||
|
D("tp %p mode %s timeout %d", tp, arg < 0 ? "ready" : "block",
|
||
|
(int)t0.tv_usec);
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
int ret;
|
||
|
tcur = t0;
|
||
|
FD_ZERO(&r);
|
||
|
FD_SET(fd, &r);
|
||
|
ret = select(fd+1, &r, NULL, NULL, tp);
|
||
|
(void)ret;
|
||
|
ND("ret %d r %d w %d", ret,
|
||
|
FD_ISSET(fd, &r),
|
||
|
FD_ISSET(fd, &w));
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_poll(struct targ *t)
|
||
|
{
|
||
|
int arg = t->g->arg;
|
||
|
// stdin is blocking on reads /dev/null is not
|
||
|
int fd = (arg < 0) ? t->g->nullfd : 0;
|
||
|
int64_t m;
|
||
|
int ms;
|
||
|
|
||
|
if (arg == -1)
|
||
|
ms = 0;
|
||
|
else if (arg == -2)
|
||
|
ms = -1; /* blocking */
|
||
|
else if (arg < 0)
|
||
|
ms = -arg/1000;
|
||
|
else
|
||
|
ms = arg/1000;
|
||
|
|
||
|
D("mode %s timeout %d", arg < 0 ? "ready" : "block", ms);
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
struct pollfd x;
|
||
|
x.fd = fd;
|
||
|
x.events = POLLIN;
|
||
|
poll(&x, 1, ms);
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_usleep(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
usleep(t->g->arg);
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_cli(struct targ *t)
|
||
|
{
|
||
|
int64_t m, i;
|
||
|
if (!t->g->privs) {
|
||
|
D("%s", "privileged instructions not available");
|
||
|
return;
|
||
|
}
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
for (i = 0; i < ONE_MILLION; i++) {
|
||
|
__asm __volatile("cli;");
|
||
|
__asm __volatile("and %eax, %eax;");
|
||
|
__asm __volatile("sti;");
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_nop(struct targ *t)
|
||
|
{
|
||
|
int64_t m, i;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
for (i = 0; i < ONE_MILLION; i++) {
|
||
|
__asm __volatile("nop;");
|
||
|
__asm __volatile("nop; nop; nop; nop; nop;");
|
||
|
//__asm __volatile("nop; nop; nop; nop; nop;");
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_rdtsc1(struct targ *t)
|
||
|
{
|
||
|
int64_t m, i;
|
||
|
uint64_t v;
|
||
|
(void)v;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
for (i = 0; i < ONE_MILLION; i++) {
|
||
|
my_rdtsc(v);
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_rdtsc(struct targ *t)
|
||
|
{
|
||
|
int64_t m, i;
|
||
|
volatile uint64_t v;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
for (i = 0; i < ONE_MILLION; i++) {
|
||
|
v = rdtsc();
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
(void)v;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_add(struct targ *t)
|
||
|
{
|
||
|
int64_t m, i;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
for (i = 0; i < ONE_MILLION; i++) {
|
||
|
t->glob_ctr[0] ++;
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_atomic_add(struct targ *t)
|
||
|
{
|
||
|
int64_t m, i;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
for (i = 0; i < ONE_MILLION; i++) {
|
||
|
atomic_add_int(t->glob_ctr, 1);
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_atomic_cmpset(struct targ *t)
|
||
|
{
|
||
|
int64_t m, i;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
for (i = 0; i < ONE_MILLION; i++) {
|
||
|
atomic_cmpset_32(t->glob_ctr, m, i);
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_time(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
#ifndef __APPLE__
|
||
|
struct timespec ts;
|
||
|
clock_gettime(t->g->arg, &ts);
|
||
|
#endif
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_gettimeofday(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
struct timeval ts;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
gettimeofday(&ts, NULL);
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* getppid is the simplest system call (getpid is cached by glibc
|
||
|
* so it would not be a good test)
|
||
|
*/
|
||
|
void
|
||
|
test_getpid(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
getppid();
|
||
|
t->count++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
#define likely(x) __builtin_expect(!!(x), 1)
|
||
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||
|
|
||
|
static void
|
||
|
fast_bcopy(void *_src, void *_dst, int l)
|
||
|
{
|
||
|
uint64_t *src = _src;
|
||
|
uint64_t *dst = _dst;
|
||
|
if (unlikely(l >= 1024)) {
|
||
|
bcopy(src, dst, l);
|
||
|
return;
|
||
|
}
|
||
|
for (; likely(l > 0); l-=64) {
|
||
|
*dst++ = *src++;
|
||
|
*dst++ = *src++;
|
||
|
*dst++ = *src++;
|
||
|
*dst++ = *src++;
|
||
|
*dst++ = *src++;
|
||
|
*dst++ = *src++;
|
||
|
*dst++ = *src++;
|
||
|
*dst++ = *src++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
asmcopy(void *dst, void *src, uint64_t l)
|
||
|
{
|
||
|
(void)dst;
|
||
|
(void)src;
|
||
|
asm(
|
||
|
"\n\t"
|
||
|
"movq %0, %%rcx\n\t"
|
||
|
"addq $7, %%rcx\n\t"
|
||
|
"shrq $03, %%rcx\n\t"
|
||
|
"cld\n\t"
|
||
|
"movq %1, %%rdi\n\t"
|
||
|
"movq %2, %%rsi\n\t"
|
||
|
"repe movsq\n\t"
|
||
|
/* "movq %0, %%rcx\n\t"
|
||
|
"andq $0x7, %%rcx\n\t"
|
||
|
"repe movsb\n\t"
|
||
|
*/
|
||
|
: /* out */
|
||
|
: "r" (l), "r" (dst), "r" (src) /* in */
|
||
|
: "%rcx", "%rsi", "%rdi" /* clobbered */
|
||
|
);
|
||
|
|
||
|
}
|
||
|
// XXX if you want to make sure there is no inlining...
|
||
|
// static void (*fp)(void *_src, void *_dst, int l) = fast_bcopy;
|
||
|
|
||
|
#define HU 0x3ffff
|
||
|
static struct glob_arg huge[HU+1];
|
||
|
|
||
|
void
|
||
|
test_fastcopy(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
int len = t->g->arg;
|
||
|
|
||
|
if (len > (int)sizeof(struct glob_arg))
|
||
|
len = sizeof(struct glob_arg);
|
||
|
D("fast copying %d bytes", len);
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
fast_bcopy(t->g, (void *)&huge[m & HU], len);
|
||
|
t->count+=1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_asmcopy(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
int len = t->g->arg;
|
||
|
|
||
|
if (len > (int)sizeof(struct glob_arg))
|
||
|
len = sizeof(struct glob_arg);
|
||
|
D("fast copying %d bytes", len);
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
asmcopy((void *)&huge[m & HU], t->g, len);
|
||
|
t->count+=1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_bcopy(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
int len = t->g->arg;
|
||
|
|
||
|
if (len > (int)sizeof(struct glob_arg))
|
||
|
len = sizeof(struct glob_arg);
|
||
|
D("bcopying %d bytes", len);
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
bcopy(t->g, (void *)&huge[m & HU], len);
|
||
|
t->count+=1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_builtin_memcpy(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
int len = t->g->arg;
|
||
|
|
||
|
if (len > (int)sizeof(struct glob_arg))
|
||
|
len = sizeof(struct glob_arg);
|
||
|
D("bcopying %d bytes", len);
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
__builtin_memcpy((void *)&huge[m & HU], t->g, len);
|
||
|
t->count+=1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
test_memcpy(struct targ *t)
|
||
|
{
|
||
|
int64_t m;
|
||
|
int len = t->g->arg;
|
||
|
|
||
|
if (len > (int)sizeof(struct glob_arg))
|
||
|
len = sizeof(struct glob_arg);
|
||
|
D("memcopying %d bytes", len);
|
||
|
for (m = 0; m < t->g->m_cycles; m++) {
|
||
|
memcpy((void *)&huge[m & HU], t->g, len);
|
||
|
t->count+=1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#include <sys/ioctl.h>
|
||
|
#include <sys/socket.h> // OSX
|
||
|
#include <net/if.h>
|
||
|
#include <net/netmap.h>
|
||
|
void
|
||
|
test_netmap(struct targ *t)
|
||
|
{
|
||
|
struct nmreq nmr;
|
||
|
int fd;
|
||
|
int64_t m, scale;
|
||
|
|
||
|
scale = t->g->m_cycles / 100;
|
||
|
fd = open("/dev/netmap", O_RDWR);
|
||
|
if (fd < 0) {
|
||
|
D("fail to open netmap, exit");
|
||
|
return;
|
||
|
}
|
||
|
bzero(&nmr, sizeof(nmr));
|
||
|
for (m = 0; m < t->g->m_cycles; m += scale) {
|
||
|
nmr.nr_version = 666;
|
||
|
nmr.nr_cmd = t->g->arg;
|
||
|
nmr.nr_offset = (uint32_t)scale;
|
||
|
ioctl(fd, NIOCGINFO, &nmr);
|
||
|
t->count += scale;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
struct entry {
|
||
|
void (*fn)(struct targ *);
|
||
|
char *name;
|
||
|
uint64_t scale;
|
||
|
uint64_t m_cycles;
|
||
|
};
|
||
|
struct entry tests[] = {
|
||
|
{ test_sel, "select", 1, 1000 },
|
||
|
{ test_poll, "poll", 1, 1000 },
|
||
|
{ test_usleep, "usleep", 1, 1000 },
|
||
|
{ test_time, "time", 1, 1000 },
|
||
|
{ test_gettimeofday, "gettimeofday", 1, 1000000 },
|
||
|
{ test_getpid, "getpid", 1, 1000000 },
|
||
|
{ test_bcopy, "bcopy", 1000, 100000000 },
|
||
|
{ test_builtin_memcpy, "__builtin_memcpy", 1000, 100000000 },
|
||
|
{ test_memcpy, "memcpy", 1000, 100000000 },
|
||
|
{ test_fastcopy, "fastcopy", 1000, 100000000 },
|
||
|
{ test_asmcopy, "asmcopy", 1000, 100000000 },
|
||
|
{ test_add, "add", ONE_MILLION, 100000000 },
|
||
|
{ test_nop, "nop", ONE_MILLION, 100000000 },
|
||
|
{ test_atomic_add, "atomic-add", ONE_MILLION, 100000000 },
|
||
|
{ test_cli, "cli", ONE_MILLION, 100000000 },
|
||
|
{ test_rdtsc, "rdtsc", ONE_MILLION, 100000000 }, // unserialized
|
||
|
{ test_rdtsc1, "rdtsc1", ONE_MILLION, 100000000 }, // serialized
|
||
|
{ test_atomic_cmpset, "cmpset", ONE_MILLION, 100000000 },
|
||
|
{ test_netmap, "netmap", 1000, 100000000 },
|
||
|
{ NULL, NULL, 0, 0 }
|
||
|
};
|
||
|
|
||
|
static void
|
||
|
usage(void)
|
||
|
{
|
||
|
const char *cmd = "test";
|
||
|
int i;
|
||
|
|
||
|
fprintf(stderr,
|
||
|
"Usage:\n"
|
||
|
"%s arguments\n"
|
||
|
"\t-m name test name\n"
|
||
|
"\t-n cycles (millions) of cycles\n"
|
||
|
"\t-l arg bytes, usec, ... \n"
|
||
|
"\t-t threads total threads\n"
|
||
|
"\t-c cores cores to use\n"
|
||
|
"\t-a n force affinity every n cores\n"
|
||
|
"\t-A n cache contention every n bytes\n"
|
||
|
"\t-w report_ms milliseconds between reports\n"
|
||
|
"",
|
||
|
cmd);
|
||
|
fprintf(stderr, "Available tests:\n");
|
||
|
for (i = 0; tests[i].name; i++) {
|
||
|
fprintf(stderr, "%12s\n", tests[i].name);
|
||
|
}
|
||
|
|
||
|
exit(0);
|
||
|
}
|
||
|
|
||
|
static int64_t
|
||
|
getnum(const char *s)
|
||
|
{
|
||
|
int64_t n;
|
||
|
char *e;
|
||
|
|
||
|
n = strtol(s, &e, 0);
|
||
|
switch (e ? *e : '\0') {
|
||
|
case 'k':
|
||
|
case 'K':
|
||
|
return n*1000;
|
||
|
case 'm':
|
||
|
case 'M':
|
||
|
return n*1000*1000;
|
||
|
case 'g':
|
||
|
case 'G':
|
||
|
return n*1000*1000*1000;
|
||
|
case 't':
|
||
|
case 'T':
|
||
|
return n*1000*1000*1000*1000;
|
||
|
default:
|
||
|
return n;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
struct glob_arg g;
|
||
|
int
|
||
|
main(int argc, char **argv)
|
||
|
{
|
||
|
int i, ch, report_interval, affinity, align;
|
||
|
|
||
|
ND("g has size %d", (int)sizeof(g));
|
||
|
report_interval = 250; /* ms */
|
||
|
affinity = 0; /* no affinity */
|
||
|
align = 0; /* global variable */
|
||
|
|
||
|
bzero(&g, sizeof(g));
|
||
|
|
||
|
g.privs = getprivs();
|
||
|
g.nthreads = 1;
|
||
|
g.cpus = 1;
|
||
|
g.m_cycles = 0;
|
||
|
g.nullfd = open("/dev/zero", O_RDWR);
|
||
|
D("nullfd is %d", g.nullfd);
|
||
|
|
||
|
while ( (ch = getopt(argc, argv, "A:a:m:n:w:c:t:vl:")) != -1) {
|
||
|
switch(ch) {
|
||
|
default:
|
||
|
D("bad option %c %s", ch, optarg);
|
||
|
usage();
|
||
|
break;
|
||
|
case 'A': /* align */
|
||
|
align = atoi(optarg);
|
||
|
break;
|
||
|
case 'a': /* force affinity */
|
||
|
affinity = atoi(optarg);
|
||
|
break;
|
||
|
case 'n': /* cycles */
|
||
|
g.m_cycles = getnum(optarg);
|
||
|
break;
|
||
|
case 'w': /* report interval */
|
||
|
report_interval = atoi(optarg);
|
||
|
break;
|
||
|
case 'c':
|
||
|
g.cpus = atoi(optarg);
|
||
|
break;
|
||
|
case 't':
|
||
|
g.nthreads = atoi(optarg);
|
||
|
break;
|
||
|
case 'm':
|
||
|
g.test_name = optarg;
|
||
|
break;
|
||
|
case 'l':
|
||
|
g.arg = getnum(optarg);
|
||
|
break;
|
||
|
|
||
|
case 'v':
|
||
|
verbose++;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
argc -= optind;
|
||
|
argv += optind;
|
||
|
if (!g.test_name && argc > 0)
|
||
|
g.test_name = argv[0];
|
||
|
|
||
|
if (g.test_name) {
|
||
|
for (i = 0; tests[i].name; i++) {
|
||
|
if (!strcmp(g.test_name, tests[i].name)) {
|
||
|
g.fn = tests[i].fn;
|
||
|
g.scale = tests[i].scale;
|
||
|
if (g.m_cycles == 0)
|
||
|
g.m_cycles = tests[i].m_cycles;
|
||
|
if (g.scale == ONE_MILLION)
|
||
|
g.scale_name = "M";
|
||
|
else if (g.scale == 1000)
|
||
|
g.scale_name = "K";
|
||
|
else {
|
||
|
g.scale = 1;
|
||
|
g.scale_name = "";
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (!g.fn) {
|
||
|
D("%s", "missing/unknown test name");
|
||
|
usage();
|
||
|
}
|
||
|
i = system_ncpus();
|
||
|
if (g.cpus < 0 || g.cpus > i) {
|
||
|
D("%d cpus is too high, have only %d cpus", g.cpus, i);
|
||
|
usage();
|
||
|
}
|
||
|
if (g.cpus == 0)
|
||
|
g.cpus = i;
|
||
|
if (g.nthreads < 1) {
|
||
|
D("bad nthreads %d, using 1", g.nthreads);
|
||
|
g.nthreads = 1;
|
||
|
}
|
||
|
i = sizeof(g.v.ctr) / g.nthreads*sizeof(g.v.ctr[0]);
|
||
|
if (align < 0 || align > i) {
|
||
|
D("bad align %d, max is %d", align, i);
|
||
|
align = i;
|
||
|
}
|
||
|
|
||
|
/* Install ^C handler. */
|
||
|
global_nthreads = g.nthreads;
|
||
|
signal(SIGINT, sigint_h);
|
||
|
|
||
|
ta = calloc(g.nthreads, sizeof(*ta));
|
||
|
/*
|
||
|
* Now create the desired number of threads, each one
|
||
|
* using a single descriptor.
|
||
|
*/
|
||
|
D("start %d threads on %d cores", g.nthreads, g.cpus);
|
||
|
for (i = 0; i < g.nthreads; i++) {
|
||
|
struct targ *t = &ta[i];
|
||
|
bzero(t, sizeof(*t));
|
||
|
t->g = &g;
|
||
|
t->me = i;
|
||
|
t->glob_ctr = &g.v.ctr[(i*align)/sizeof(g.v.ctr[0])];
|
||
|
D("thread %d ptr %p", i, t->glob_ctr);
|
||
|
t->affinity = affinity ? (affinity*i) % g.cpus : -1;
|
||
|
if (pthread_create(&t->thread, NULL, td_body, t) == -1) {
|
||
|
D("Unable to create thread %d", i);
|
||
|
t->completed = 1;
|
||
|
}
|
||
|
}
|
||
|
/* the main loop */
|
||
|
|
||
|
{
|
||
|
uint64_t my_count = 0, prev = 0;
|
||
|
uint64_t count = 0;
|
||
|
double delta_t;
|
||
|
struct timeval tic, toc;
|
||
|
|
||
|
gettimeofday(&toc, NULL);
|
||
|
for (;;) {
|
||
|
struct timeval now, delta;
|
||
|
uint64_t pps;
|
||
|
int done = 0;
|
||
|
|
||
|
delta.tv_sec = report_interval/1000;
|
||
|
delta.tv_usec = (report_interval%1000)*1000;
|
||
|
select(0, NULL, NULL, NULL, &delta);
|
||
|
gettimeofday(&now, NULL);
|
||
|
timersub(&now, &toc, &toc);
|
||
|
my_count = 0;
|
||
|
for (i = 0; i < g.nthreads; i++) {
|
||
|
my_count += ta[i].count;
|
||
|
if (ta[i].completed)
|
||
|
done++;
|
||
|
}
|
||
|
pps = toc.tv_sec* ONE_MILLION + toc.tv_usec;
|
||
|
if (pps < 10000)
|
||
|
continue;
|
||
|
pps = (my_count - prev)*ONE_MILLION / pps;
|
||
|
D("%" PRIu64 " %scycles/s scale %" PRIu64 " in %dus", pps/g.scale,
|
||
|
g.scale_name, g.scale, (int)(toc.tv_sec* ONE_MILLION + toc.tv_usec));
|
||
|
prev = my_count;
|
||
|
toc = now;
|
||
|
if (done == g.nthreads)
|
||
|
break;
|
||
|
}
|
||
|
D("total %" PRIu64 " cycles", prev);
|
||
|
|
||
|
timerclear(&tic);
|
||
|
timerclear(&toc);
|
||
|
for (i = 0; i < g.nthreads; i++) {
|
||
|
pthread_join(ta[i].thread, NULL);
|
||
|
|
||
|
if (ta[i].completed == 0)
|
||
|
continue;
|
||
|
|
||
|
/*
|
||
|
* Collect threads o1utput and extract information about
|
||
|
* how log it took to send all the packets.
|
||
|
*/
|
||
|
count += ta[i].count;
|
||
|
if (!timerisset(&tic) || timercmp(&ta[i].tic, &tic, <))
|
||
|
tic = ta[i].tic;
|
||
|
if (!timerisset(&toc) || timercmp(&ta[i].toc, &toc, >))
|
||
|
toc = ta[i].toc;
|
||
|
}
|
||
|
|
||
|
/* print output. */
|
||
|
timersub(&toc, &tic, &toc);
|
||
|
delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
|
||
|
D("total %8.6f seconds", delta_t);
|
||
|
}
|
||
|
|
||
|
return (0);
|
||
|
}
|
||
|
/* end of file */
|