From bcf61f229e360e96bb936ad08d64b6a43b181bb2 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sun, 27 Jul 2025 08:48:48 -0700 Subject: [PATCH v12 3/3] pg_test_timing: Add --fast flag to test fast timing, report time source In passing also reduce the per-loop overhead caused by repeated divisions in INSTR_TIME_GET_NANOSEC when the ticks variable has become very large, instead diff first and then turn it into nanosecs. --- src/bin/pg_test_timing/pg_test_timing.c | 59 +++++++++++++++++++------ src/include/portability/instr_time.h | 30 ++++++++----- 2 files changed, 65 insertions(+), 24 deletions(-) diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c index a5621251afc..b77ef2063b6 100644 --- a/src/bin/pg_test_timing/pg_test_timing.c +++ b/src/bin/pg_test_timing/pg_test_timing.c @@ -16,6 +16,7 @@ static const char *progname; static unsigned int test_duration = 3; static double max_rprct = 99.99; +static bool fast_timing = false; /* record duration in powers of 2 nanoseconds */ static long long int histogram[32]; @@ -56,6 +57,7 @@ handle_args(int argc, char *argv[]) static struct option long_options[] = { {"duration", required_argument, NULL, 'd'}, {"cutoff", required_argument, NULL, 'c'}, + {"fast", no_argument, NULL, 'f'}, {NULL, 0, NULL, 0} }; @@ -68,7 +70,7 @@ handle_args(int argc, char *argv[]) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { - printf(_("Usage: %s [-d DURATION] [-c CUTOFF]\n"), progname); + printf(_("Usage: %s [-d DURATION] [-c CUTOFF] [--fast]\n"), progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) @@ -78,7 +80,7 @@ handle_args(int argc, char *argv[]) } } - while ((option = getopt_long(argc, argv, "d:c:", + while ((option = getopt_long(argc, argv, "d:c:f:", long_options, &optindex)) != -1) { switch (option) @@ -125,6 +127,10 @@ handle_args(int argc, char *argv[]) } break; + case 'f': + fast_timing = true; + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); @@ -155,11 +161,31 @@ test_timing(unsigned int duration) uint64 total_time; int64 time_elapsed = 0; uint64 loop_count = 0; - uint64 prev, - cur; instr_time start_time, end_time, - temp; + prev, + cur; + char *time_source = NULL; + bool fast_timing_used = false; + + INSTR_TIME_INITIALIZE(); + +#if !defined(WIN32) && defined(__x86_64__) && defined(__linux__) + if (fast_timing && has_rdtsc) + { + time_source = "RDTSC"; + fast_timing_used = true; + } + else if (has_rdtscp) + time_source = "RDTSCP"; + else + time_source = PG_INSTR_CLOCK_NAME; +#else + time_source = PG_INSTR_CLOCK_NAME; +#endif + if (fast_timing && !fast_timing_used) + printf(_("Warning: Fast timing requested, but not available - regular timing source will be used\n")); + printf(_("Time source: %s\n"), time_source); /* * Pre-zero the statistics data structures. They're already zero by @@ -173,8 +199,11 @@ test_timing(unsigned int duration) total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0; - INSTR_TIME_SET_CURRENT(start_time); - cur = INSTR_TIME_GET_NANOSEC(start_time); + if (fast_timing) + INSTR_TIME_SET_CURRENT_FAST(start_time); + else + INSTR_TIME_SET_CURRENT(start_time); + cur = start_time; while (time_elapsed < total_time) { @@ -182,9 +211,11 @@ test_timing(unsigned int duration) bits; prev = cur; - INSTR_TIME_SET_CURRENT(temp); - cur = INSTR_TIME_GET_NANOSEC(temp); - diff = cur - prev; + if (fast_timing) + INSTR_TIME_SET_CURRENT_FAST(cur); + else + INSTR_TIME_SET_CURRENT(cur); + diff = INSTR_TIME_DIFF_NANOSEC(cur, prev); /* Did time go backwards? */ if (unlikely(diff < 0)) @@ -217,11 +248,13 @@ test_timing(unsigned int duration) largest_diff_count++; loop_count++; - INSTR_TIME_SUBTRACT(temp, start_time); - time_elapsed = INSTR_TIME_GET_NANOSEC(temp); + time_elapsed = INSTR_TIME_DIFF_NANOSEC(cur, start_time); } - INSTR_TIME_SET_CURRENT(end_time); + if (fast_timing) + INSTR_TIME_SET_CURRENT_FAST(end_time); + else + INSTR_TIME_SET_CURRENT(end_time); INSTR_TIME_SUBTRACT(end_time, start_time); diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index e2e339a0c4f..f02296f1026 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -112,10 +112,13 @@ extern int64 max_ticks_no_overflow; */ #if defined(__darwin__) && defined(CLOCK_MONOTONIC_RAW) #define PG_INSTR_CLOCK CLOCK_MONOTONIC_RAW +#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC_RAW)" #elif defined(CLOCK_MONOTONIC) #define PG_INSTR_CLOCK CLOCK_MONOTONIC +#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC)" #else #define PG_INSTR_CLOCK CLOCK_REALTIME +#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_REALTIME)" #endif #if defined(__x86_64__) && defined(__linux__) @@ -174,7 +177,7 @@ pg_get_ticks(void) } static inline int64_t -pg_ticks_to_ns(instr_time t) +pg_ticks_to_ns(int64 ticks) { /* * Would multiplication overflow? If so perform computation in two parts. @@ -183,7 +186,7 @@ pg_ticks_to_ns(instr_time t) */ int64 ns = 0; - if (unlikely(t.ticks > max_ticks_no_overflow)) + if (unlikely(ticks > max_ticks_no_overflow)) { /* * Compute how often the maximum number of ticks fits completely into @@ -192,7 +195,7 @@ pg_ticks_to_ns(instr_time t) * value. In a 2nd step we adjust the number of elapsed ticks and * convert the remaining ticks. */ - int64 count = t.ticks / max_ticks_no_overflow; + int64 count = ticks / max_ticks_no_overflow; int64 max_ns = max_ticks_no_overflow * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; ns = max_ns * count; @@ -201,11 +204,11 @@ pg_ticks_to_ns(instr_time t) * Subtract the ticks that we now already accounted for, so that they * don't get counted twice. */ - t.ticks -= count * max_ticks_no_overflow; - Assert(t.ticks >= 0); + ticks -= count * max_ticks_no_overflow; + Assert(ticks >= 0); } - ns += t.ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; + ns += ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; return ns; } @@ -226,14 +229,14 @@ pg_initialize_get_ticks() #define INSTR_TIME_SET_CURRENT(t) \ ((t) = pg_get_ticks()) -#define INSTR_TIME_GET_NANOSEC(t) \ - pg_ticks_to_ns(t) - +#define INSTR_TIME_TICKS_TO_NANOSEC(ticks) \ + (pg_ticks_to_ns(ticks)) #else /* WIN32 */ /* Use QueryPerformanceCounter() */ +#define PG_INSTR_CLOCK_NAME "QueryPerformanceCounter" /* helper for INSTR_TIME_SET_CURRENT / INSTR_TIME_SET_CURRENT_FAST */ static inline instr_time @@ -265,8 +268,8 @@ GetTimerFrequency(void) #define INSTR_TIME_SET_CURRENT(t) \ ((t) = pg_query_performance_counter()) -#define INSTR_TIME_GET_NANOSEC(t) \ - ((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency()))) +#define INSTR_TIME_TICKS_TO_NANOSEC(ticks) \ + ((int64) ((ticks) * ((double) NS_PER_S / GetTimerFrequency()))) #endif /* WIN32 */ @@ -285,9 +288,14 @@ GetTimerFrequency(void) #define INSTR_TIME_SUBTRACT(x,y) \ ((x).ticks -= (y).ticks) +#define INSTR_TIME_DIFF_NANOSEC(x,y) \ + (INSTR_TIME_TICKS_TO_NANOSEC((x).ticks - (y).ticks)) + #define INSTR_TIME_ACCUM_DIFF(x,y,z) \ ((x).ticks += (y).ticks - (z).ticks) +#define INSTR_TIME_GET_NANOSEC(t) \ + (INSTR_TIME_TICKS_TO_NANOSEC((t).ticks)) #define INSTR_TIME_GET_DOUBLE(t) \ ((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S) -- 2.47.3