Thread: BUG #17095: ./configure fails thread_test.c when run under TSAN
The following bug has been logged on the website: Bug reference: 17095 Logged by: Vitali Lovich Email address: vlovich+github@gmail.com PostgreSQL version: 14beta2 Operating system: Arch Linux Description: When I run configure with -fsanitize=address (using clang 13 at revision d3676d4b666ead794fc58bbc7e07aa406dcf487a), it errors out because `thread_test.c` is violating thread safety. Specifically it complains about the access of errno2_set, errno1_set, thread1_done, & thread2_done being read/modified from multiple threads without any kind of synchronization. AFAICT these TSAN errors are correct.
I've confirmed the following patch (relative to 45b88269a353ad93744772791feb6d01bc7e1e42) resolves the issue. It's predicated on the assumption that gcc or clang (or a compiler with the relevant builtins) are being used to build this.
diff --git a/src/test/thread/thread_test.c b/src/test/thread/thread_test.c
index 5392439146..cef8eb6c82 100644
--- a/src/test/thread/thread_test.c
+++ b/src/test/thread/thread_test.c
@@ -80,6 +80,14 @@ static volatile int thread2_done = 0;
static volatile int errno1_set = 0;
static volatile int errno2_set = 0;
+static void atomic_write(volatile int* ptr, int value) {
+ __atomic_store_n(ptr, value, __ATOMIC_RELAXED);
+}
+
+static int atomic_read(volatile int* ptr) {
+ return __atomic_load_n(ptr, __ATOMIC_RELAXED);
+}
+
#ifndef HAVE_STRERROR_R
static char *strerror_p1;
static char *strerror_p2;
@@ -163,7 +171,7 @@ main(int argc, char *argv[])
exit(1);
}
- while (thread1_done == 0 || thread2_done == 0)
+ while (atomic_read(&thread1_done) == 0 || atomic_read(&thread2_done) == 0)
sched_yield(); /* if this is a portability problem, remove it */
/* Test things while we have thread-local storage */
@@ -312,8 +320,8 @@ func_call_1(void)
* Wait for other thread to set errno. We can't use thread-specific
* locking here because it might affect errno.
*/
- errno1_set = 1;
- while (errno2_set == 0)
+ atomic_write(&errno1_set, 1);
+ while (atomic_read(&errno2_set) == 0)
sched_yield();
#ifdef WIN32
@@ -368,7 +376,7 @@ func_call_1(void)
}
#endif
- thread1_done = 1;
+ atomic_write(&thread1_done, 1);
pthread_mutex_lock(&init_mutex); /* wait for parent to test */
pthread_mutex_unlock(&init_mutex);
}
@@ -403,8 +411,8 @@ func_call_2(void)
* Wait for other thread to set errno. We can't use thread-specific
* locking here because it might affect errno.
*/
- errno2_set = 1;
- while (errno1_set == 0)
+ atomic_write(&errno2_set, 1);
+ while (atomic_read(&errno1_set) == 0)
sched_yield();
#ifdef WIN32
@@ -451,7 +459,7 @@ func_call_2(void)
}
#endif
- thread2_done = 1;
+ atomic_write(&thread2_done, 1);
pthread_mutex_lock(&init_mutex); /* wait for parent to test */
pthread_mutex_unlock(&init_mutex);
}
diff --git a/src/test/thread/thread_test.c b/src/test/thread/thread_test.c
index 5392439146..cef8eb6c82 100644
--- a/src/test/thread/thread_test.c
+++ b/src/test/thread/thread_test.c
@@ -80,6 +80,14 @@ static volatile int thread2_done = 0;
static volatile int errno1_set = 0;
static volatile int errno2_set = 0;
+static void atomic_write(volatile int* ptr, int value) {
+ __atomic_store_n(ptr, value, __ATOMIC_RELAXED);
+}
+
+static int atomic_read(volatile int* ptr) {
+ return __atomic_load_n(ptr, __ATOMIC_RELAXED);
+}
+
#ifndef HAVE_STRERROR_R
static char *strerror_p1;
static char *strerror_p2;
@@ -163,7 +171,7 @@ main(int argc, char *argv[])
exit(1);
}
- while (thread1_done == 0 || thread2_done == 0)
+ while (atomic_read(&thread1_done) == 0 || atomic_read(&thread2_done) == 0)
sched_yield(); /* if this is a portability problem, remove it */
/* Test things while we have thread-local storage */
@@ -312,8 +320,8 @@ func_call_1(void)
* Wait for other thread to set errno. We can't use thread-specific
* locking here because it might affect errno.
*/
- errno1_set = 1;
- while (errno2_set == 0)
+ atomic_write(&errno1_set, 1);
+ while (atomic_read(&errno2_set) == 0)
sched_yield();
#ifdef WIN32
@@ -368,7 +376,7 @@ func_call_1(void)
}
#endif
- thread1_done = 1;
+ atomic_write(&thread1_done, 1);
pthread_mutex_lock(&init_mutex); /* wait for parent to test */
pthread_mutex_unlock(&init_mutex);
}
@@ -403,8 +411,8 @@ func_call_2(void)
* Wait for other thread to set errno. We can't use thread-specific
* locking here because it might affect errno.
*/
- errno2_set = 1;
- while (errno1_set == 0)
+ atomic_write(&errno2_set, 1);
+ while (atomic_read(&errno1_set) == 0)
sched_yield();
#ifdef WIN32
@@ -451,7 +459,7 @@ func_call_2(void)
}
#endif
- thread2_done = 1;
+ atomic_write(&thread2_done, 1);
pthread_mutex_lock(&init_mutex); /* wait for parent to test */
pthread_mutex_unlock(&init_mutex);
}
I didn't predicate using the atomic on TSAN actually being used to build this but that could also be done with something like:
#if defined(__has_feature)
# if __has_feature(thread_sanitizer)
# define ATOMIC_VOLATILES 1
# endif
#endif
#if defined(__has_feature)
# if __has_feature(thread_sanitizer)
# define ATOMIC_VOLATILES 1
# endif
#endif
#if __SANITIZE_THREAD__
#define ATOMIC_VOLATILES 1
#endif
& fixing up the body of atomic_write/atomic_read to change behavior based on #if ATOMIC_VOLATILES.
On Thu, Jul 8, 2021 at 1:02 PM PG Bug reporting form <noreply@postgresql.org> wrote:
The following bug has been logged on the website:
Bug reference: 17095
Logged by: Vitali Lovich
Email address: vlovich+github@gmail.com
PostgreSQL version: 14beta2
Operating system: Arch Linux
Description:
When I run configure with -fsanitize=address (using clang 13 at revision
d3676d4b666ead794fc58bbc7e07aa406dcf487a), it errors out because
`thread_test.c` is violating thread safety.
Specifically it complains about the access of errno2_set, errno1_set,
thread1_done, & thread2_done being read/modified from multiple threads
without any kind of synchronization. AFAICT these TSAN errors are correct.
Vitali Lovich <vlovich@gmail.com> writes: > I've confirmed the following patch (relative to > 45b88269a353ad93744772791feb6d01bc7e1e42) resolves the issue. It's > predicated on the assumption that gcc or clang (or a compiler with the > relevant builtins) are being used to build this. You realize of course that that assumption is *completely* untenable. I'm inclined to say that if -fsanitize=address breaks configure, then don't include -fsanitize=address in the CFLAGS you give to configure. It's hardly the first such restriction --- -Werror is another one you can't inject that way, and I wouldn't be surprised if there are more. FWIW, I don't particularly agree with TSAN about the code being incorrect. On what platform is a fetch or a store of an int not atomic? (If there is one, Postgres will malfunction on it anyway.) regards, tom lane